Home > testing > unit_tests > hmmerTests.m

hmmerTests

PURPOSE ^

run this test case with the command

SYNOPSIS ^

function tests = hmmerTests

DESCRIPTION ^

run this test case with the command
results = runtests('hmmerTests.m')

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SUBFUNCTIONS ^

SOURCE CODE ^

0001 %run this test case with the command
0002 %results = runtests('hmmerTests.m')
0003 function tests = hmmerTests
0004 tests = functiontests(localfunctions);
0005 end
0006 
0007 function testHmmer(testCase)
0008 %This unit test comprises the functionality test for HMMER in RAVEN:
0009 % 1. Check of parsed HMMER results against the expected.
0010 %
0011 % NOTE: as hmm and HMMER results files are time-specific, no checks for
0012 % these files existence are done. Also, due to the way HMMER is utilized in
0013 % getKEGGModelForOrganism (HMMER result files can be parsed only once all
0014 % required hmm files are generated), the code segment involving HMMER
0015 % results parsing is pasted in this test function. Should the parsing problems
0016 % occur in the results processing, the code modifications shall be done in
0017 % this function and getKEGGModelForOrganism respectively.
0018 
0019 %%
0020 %Get the directory for RAVEN Toolbox
0021 [ST, I]=dbstack('-completenames');
0022 ravenPath=fileparts(fileparts(fileparts(ST(I).file)));
0023 
0024 %Identify the operating system
0025 if isunix
0026     if ismac
0027         binEnd='.mac';
0028     else
0029         binEnd='';
0030     end
0031 elseif ispc
0032     binEnd='.exe';
0033 else
0034     dispEM('Unknown OS, exiting.')
0035     return
0036 end
0037 
0038 %Create empty structures needed for HMMER results
0039 actHmmResult.genes = {};
0040 actHmmResult.scores = [];
0041 
0042 %Create structures that contain expected HMMER results
0043 expHmmResult.genes = {'sp|P41947|MEL6_YEASX','sp|P41946|MEL5_YEASX', 'sp|P41945|MEL2_YEASX', 'sp|P04824|MEL1_YEASX'};
0044 expHmmResult.scores = [10^-250, 10^-250, 10^-250, 10^-250];
0045 
0046 %Generate temporary names for working directory and outFile
0047 tmpDIR=tempname;
0048 outFile=tempname;
0049 
0050 %Run HMMER multi-threaded to use all logical cores assigned to MATLAB
0051 cores = evalc('feature(''numcores'')');
0052 cores = strsplit(cores, 'MATLAB was assigned: ');
0053 cores = regexp(cores{2},'^\d*','match');
0054 cores = cores{1};
0055 
0056 %Create a temporary folder and copy multi-FASTA file there
0057 [~, ~]=system(['mkdir "' tmpDIR '"']);
0058 
0059 sourceDir = fileparts(which(mfilename));
0060 copyfile(fullfile(sourceDir,'test_data','yeast_galactosidases.fa'),tmpDIR);
0061 copyfile(fullfile(sourceDir,'test_data','human_galactosidases.fa'),tmpDIR);
0062 
0063 %%
0064 %Train a hidden Markov model
0065 [~, ~]=system(['"' fullfile(ravenPath,'software','hmmer',['hmmbuild' binEnd]) '" --cpu "' num2str(cores) '" "' fullfile(tmpDIR,'human_galactosidases.hmm') '" "' fullfile(tmpDIR,'yeast_galactosidases.fa') '"']);
0066 
0067 %Run a homology search against the newly-trained HMM
0068 [~, output]=system(['"' fullfile(ravenPath,'software','hmmer',['hmmsearch' binEnd]) '" --cpu "' num2str(cores) '" "' fullfile(tmpDIR,'human_galactosidases.hmm') '" "' fullfile(tmpDIR,'yeast_galactosidases.fa') '"']);
0069 
0070 %Save the output to a file
0071 fid=fopen(outFile,'w');
0072 fwrite(fid,output);
0073 fclose(fid);
0074 
0075 %%
0076 %Parse the results
0077 geneCounter=0;
0078 fid=fopen(outFile,'r');
0079 beginMatches=false;
0080 while 1
0081     %Get the next line
0082     tline = fgetl(fid);
0083     
0084     %Abort at end of file
0085     if ~ischar(tline)
0086         break;
0087     end
0088     
0089     if and(beginMatches,strcmp(tline,'  ------ inclusion threshold ------'))
0090         break;
0091     end
0092     
0093     if beginMatches==false
0094         %This is how the listing of matches begins
0095         if any(strfind(tline,'E-value '))
0096             %Read one more line that is only padding
0097             tline = fgetl(fid);
0098             beginMatches=true;
0099         end
0100     else
0101         %If matches should be read
0102         if ~strcmp(tline,'   [No hits detected that satisfy reporting thresholds]') && ~isempty(tline)
0103             elements=regexp(tline,' ','split');
0104             elements=elements(cellfun(@any,elements));
0105             
0106             %Check if the match is below the treshhold
0107             score=str2double(elements{1});
0108             gene=elements{9};
0109             if score<=10^-50
0110                 %If the score is exactly 0, change it to a very
0111                 %small value to avoid NaN
0112                 if score==0
0113                     score=10^-250;
0114                 end
0115                 %Check if the gene is added already and, is so, get
0116                 %the best score for it
0117                 geneCounter=geneCounter+1;
0118                 actHmmResult.genes{geneCounter}=gene;
0119                 actHmmResult.scores(geneCounter)=score;
0120             end
0121         else
0122             break;
0123         end
0124     end
0125 end
0126 fclose(fid);
0127 
0128 %Remove the old tempfiles
0129 delete([outFile '*']);
0130 
0131 %Remove temporary folder, since testing is finished
0132 [~, ~]=system(['rm "' tmpDIR '" -r']);
0133 
0134 %%
0135 %Test 1a: Check if HMMER results match the expected ones
0136 verifyEqual(testCase,actHmmResult,expHmmResult);
0137 
0138 %Test 1b: Modify actual HMMER results structure and check if test fails
0139 actHmmResult.score(2)=1;
0140 verifyNotEqual(testCase,actHmmResult,expHmmResult);
0141 end

Generated by m2html © 2005