Home > testing > unit_tests > hmmerTests.m

hmmerTests

PURPOSE ^

run this test case with the command

SYNOPSIS ^

function tests = hmmerTests

DESCRIPTION ^

run this test case with the command
results = runtests('hmmerTests.m')

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SUBFUNCTIONS ^

SOURCE CODE ^

0001 %run this test case with the command
0002 %results = runtests('hmmerTests.m')
0003 function tests = hmmerTests
0004 tests = functiontests(localfunctions);
0005 end
0006 
0007 function testHmmer(testCase)
0008 %This unit test comprises the functionality test for HMMER in RAVEN:
0009 % 1. Check of parsed HMMER results against the expected.
0010 %
0011 % NOTE: as hmm and HMMER results files are time-specific, no checks for
0012 % these files existence are done. Also, due to the way HMMER is utilized in
0013 % getKEGGModelForOrganism (HMMER result files can be parsed only once all
0014 % required hmm files are generated), the code segment involving HMMER
0015 % results parsing is pasted in this test function. Should the parsing problems
0016 % occur in the results processing, the code modifications shall be done in
0017 % this function and getKEGGModelForOrganism respectively.
0018 
0019 %%
0020 %Get the directory for RAVEN Toolbox
0021 [ST, I]=dbstack('-completenames');
0022 ravenPath=fileparts(fileparts(fileparts(ST(I).file)));
0023 
0024 %Generate temporary names for working directory and outFile
0025 tmpDIR=tempname;
0026 outFile=tempname;
0027 
0028 %Create a temporary folder and copy multi-FASTA file there
0029 [~, ~]=system(['mkdir "' tmpDIR '"']);
0030 
0031 sourceDir = fileparts(which(mfilename));
0032 copyfile(fullfile(sourceDir,'test_data','yeast_galactosidases.fa'),tmpDIR);
0033 copyfile(fullfile(sourceDir,'test_data','human_galactosidases.fa'),tmpDIR);
0034 
0035 
0036 %Identify the operating system
0037 if isunix
0038     if ismac
0039         binEnd='.mac';
0040     else
0041         binEnd='';
0042     end
0043 elseif ispc
0044     wslPath.tmpDIR    = getWSLpath(tmpDIR);
0045     wslPath.hmmbuild  = getWSLpath(fullfile(ravenPath,'software','hmmer','hmmbuild'));
0046     wslPath.hmmsearch = getWSLpath(fullfile(ravenPath,'software','hmmer','hmmsearch'));
0047     filesep='/';
0048 else
0049     dispEM('Unknown OS, exiting.')
0050     return
0051 end
0052 
0053 %Create empty structures needed for HMMER results
0054 actHmmResult.genes = {};
0055 actHmmResult.scores = [];
0056 
0057 %Create structures that contain expected HMMER results
0058 expHmmResult.genes = {'sp|P41947|MEL6_YEASX','sp|P41946|MEL5_YEASX', 'sp|P41945|MEL2_YEASX', 'sp|P04824|MEL1_YEASX'};
0059 expHmmResult.scores = [10^-250, 10^-250, 10^-250, 10^-250];
0060 
0061 %Run HMMER multi-threaded to use all logical cores assigned to MATLAB
0062 cores = evalc('feature(''numcores'')');
0063 cores = strsplit(cores, 'MATLAB was assigned: ');
0064 cores = regexp(cores{2},'^\d*','match');
0065 cores = cores{1};
0066 
0067 %%
0068 %Train a hidden Markov model
0069 if ismac || isunix
0070     [~, ~]=system(['"' fullfile(ravenPath,'software','hmmer',['hmmbuild' binEnd]) '" --cpu "' num2str(cores) '" "' fullfile(tmpDIR,'human_galactosidases.hmm') '" "' fullfile(tmpDIR,'yeast_galactosidases.fa') '"']);
0071 else
0072     [~, ~]=system(['wsl "' wslPath.hmmbuild '" --cpu "' num2str(cores) '" "' wslPath.tmpDIR '/human_galactosidases.hmm" "' wslPath.tmpDIR '/yeast_galactosidases.fa"']);
0073 end
0074 
0075 %Run a homology search against the newly-trained HMM
0076 if ismac || isunix
0077     [~, output]=system(['"' fullfile(ravenPath,'software','hmmer',['hmmsearch' binEnd]) '" --cpu "' num2str(cores) '" "' fullfile(tmpDIR,'human_galactosidases.hmm') '" "' fullfile(tmpDIR,'yeast_galactosidases.fa') '"']);
0078 else
0079     [~, output]=system(['wsl "' wslPath.hmmsearch '" --cpu "' num2str(cores) '" "' wslPath.tmpDIR '/human_galactosidases.hmm" "' wslPath.tmpDIR '/yeast_galactosidases.fa"']);
0080 end
0081 
0082 %Save the output to a file
0083 fid=fopen(outFile,'w');
0084 fwrite(fid,output);
0085 fclose(fid);
0086 
0087 %%
0088 %Parse the results
0089 geneCounter=0;
0090 fid=fopen(outFile,'r');
0091 beginMatches=false;
0092 while 1
0093     %Get the next line
0094     tline = fgetl(fid);
0095     
0096     %Abort at end of file
0097     if ~ischar(tline)
0098         break;
0099     end
0100     
0101     if and(beginMatches,strcmp(tline,'  ------ inclusion threshold ------'))
0102         break;
0103     end
0104     
0105     if beginMatches==false
0106         %This is how the listing of matches begins
0107         if any(strfind(tline,'E-value '))
0108             %Read one more line that is only padding
0109             tline = fgetl(fid);
0110             beginMatches=true;
0111         end
0112     else
0113         %If matches should be read
0114         if ~strcmp(tline,'   [No hits detected that satisfy reporting thresholds]') && ~isempty(tline)
0115             elements=regexp(tline,' ','split');
0116             elements=elements(cellfun(@any,elements));
0117             
0118             %Check if the match is below the treshhold
0119             score=str2double(elements{1});
0120             gene=elements{9};
0121             if score<=10^-50
0122                 %If the score is exactly 0, change it to a very
0123                 %small value to avoid NaN
0124                 if score==0
0125                     score=10^-250;
0126                 end
0127                 %Check if the gene is added already and, is so, get
0128                 %the best score for it
0129                 geneCounter=geneCounter+1;
0130                 actHmmResult.genes{geneCounter}=gene;
0131                 actHmmResult.scores(geneCounter)=score;
0132             end
0133         else
0134             break;
0135         end
0136     end
0137 end
0138 fclose(fid);
0139 
0140 %Remove the old tempfiles
0141 delete([outFile '*']);
0142 
0143 %Remove temporary folder, since testing is finished
0144 [~, ~]=system(['rm "' tmpDIR '" -r']);
0145 
0146 %%
0147 %Test 1a: Check if HMMER results match the expected ones
0148 verifyEqual(testCase,actHmmResult,expHmmResult);
0149 
0150 %Test 1b: Modify actual HMMER results structure and check if test fails
0151 actHmmResult.score(2)=1;
0152 verifyNotEqual(testCase,actHmmResult,expHmmResult);
0153 end

Generated by m2html © 2005