0001 function [blastStructure,blastReport]=getBlast(organismID,fastaFile,...
0002 modelIDs,refFastaFiles,developMode,hideVerbose)
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039 if nargin<5
0040 developMode = false;
0041 end
0042 if nargin<6
0043 hideVerbose = false;
0044 end
0045
0046
0047 organismID=convertCharArray(organismID);
0048 fastaFile=convertCharArray(fastaFile);
0049 modelIDs=convertCharArray(modelIDs);
0050 refFastaFiles=convertCharArray(refFastaFiles);
0051
0052
0053 blastStructure=[];
0054 blastReport.dbHashes.phr={};
0055 blastReport.dbHashes.pot={};
0056 blastReport.dbHashes.psq={};
0057 blastReport.dbHashes.pto={};
0058 blastReport.blastTxtOutput={};
0059
0060
0061 ravenPath=findRAVENroot();
0062
0063
0064 tmpDB=tempname;
0065 outFile=tempname;
0066
0067
0068
0069 if isrow(refFastaFiles)
0070 files=horzcat(fastaFile,refFastaFiles);
0071 else
0072 files=vertcat(fastaFile,refFastaFiles);
0073 end
0074
0075 files=checkFileExistence(files,2);
0076 fastaFile = files(1);
0077 refFastaFiles = files(2:end);
0078
0079
0080 if isunix
0081 if ismac
0082 binEnd='.mac';
0083 else
0084 binEnd='';
0085 end
0086 elseif ispc
0087 binEnd='.exe';
0088 setenv('BLASTDB_LMDB_MAP_SIZE','1000000');
0089 else
0090 dispEM('Unknown OS, exiting.')
0091 return
0092 end
0093
0094
0095 cores = evalc('feature(''numcores'')');
0096 cores = strsplit(cores, 'MATLAB was assigned: ');
0097 cores = regexp(cores{2},'^\d*','match');
0098 cores = cores{1};
0099
0100
0101
0102 [status, ~]=system(['"' fullfile(ravenPath,'software','blast+',['makeblastdb' binEnd]) '" -in ' fastaFile{1} ' -out "' fullfile(tmpDB, 'tmpDB') '" -dbtype prot']);
0103 if developMode
0104 blastReport.dbHashes.phr{numel(blastReport.dbHashes.phr)+1}=getMD5Hash(fullfile(tmpDB, 'tmpDB.phr'));
0105 blastReport.dbHashes.pot{numel(blastReport.dbHashes.pot)+1}=getMD5Hash(fullfile(tmpDB, 'tmpDB.pot'));
0106 blastReport.dbHashes.psq{numel(blastReport.dbHashes.psq)+1}=getMD5Hash(fullfile(tmpDB, 'tmpDB.psq'));
0107 blastReport.dbHashes.pto{numel(blastReport.dbHashes.pto)+1}=getMD5Hash(fullfile(tmpDB, 'tmpDB.pto'));
0108 end
0109 if status~=0
0110 EM=['makeblastdb did not run successfully, error: ', num2str(status)];
0111 dispEM(EM,true);
0112 end
0113
0114 for i=1:numel(refFastaFiles)
0115 if ~hideVerbose
0116 fprintf(['BLASTing "' modelIDs{i} '" against "' organismID{1} '"..\n']);
0117 end
0118 [status, ~]=system(['"' fullfile(ravenPath,'software','blast+',['blastp' binEnd]) '" -query ' refFastaFiles{i} ' -out "' outFile '_' num2str(i) '" -db "' fullfile(tmpDB, 'tmpDB') '" -evalue 10e-5 -outfmt "10 qseqid sseqid evalue pident length bitscore ppos" -num_threads "' cores '"']);
0119 if developMode
0120 blastReport.blastTxtOutput{numel(blastReport.blastTxtOutput)+1}=importdata([outFile '_' num2str(i)]);
0121 end
0122 if status~=0
0123 EM=['blastp did not run successfully, error: ', num2str(status)];
0124 dispEM(EM,true);
0125 end
0126 end
0127 delete([tmpDB filesep 'tmpDB*']);
0128
0129
0130
0131 for i=1:numel(refFastaFiles)
0132 if ~hideVerbose
0133 fprintf(['BLASTing "' organismID{1} '" against "' modelIDs{i} '"..\n']);
0134 end
0135 [status, ~]=system(['"' fullfile(ravenPath,'software','blast+',['makeblastdb' binEnd]) '" -in ' refFastaFiles{i} ' -out "' fullfile(tmpDB, 'tmpDB') '" -dbtype prot']);
0136 if status~=0
0137 EM=['makeblastdb did not run successfully, error: ', num2str(status)];
0138 dispEM(EM,true);
0139 end
0140 [status, ~]=system(['"' fullfile(ravenPath,'software','blast+',['blastp' binEnd]) '" -query ' fastaFile{1} ' -out "' outFile '_r' num2str(i) '" -db "' fullfile(tmpDB, 'tmpDB') '" -evalue 10e-5 -outfmt "10 qseqid sseqid evalue pident length bitscore ppos" -num_threads "' cores '"']);
0141 if developMode
0142 blastReport.dbHashes.phr{numel(blastReport.dbHashes.phr)+1}=getMD5Hash(fullfile(tmpDB, 'tmpDB.phr'));
0143 blastReport.dbHashes.pot{numel(blastReport.dbHashes.pot)+1}=getMD5Hash(fullfile(tmpDB, 'tmpDB.pot'));
0144 blastReport.dbHashes.psq{numel(blastReport.dbHashes.psq)+1}=getMD5Hash(fullfile(tmpDB, 'tmpDB.psq'));
0145 blastReport.dbHashes.pto{numel(blastReport.dbHashes.pto)+1}=getMD5Hash(fullfile(tmpDB, 'tmpDB.pto'));
0146 blastReport.blastTxtOutput{numel(blastReport.blastTxtOutput)+1}=importdata([outFile '_r' num2str(i)]);
0147 end
0148 if status~=0
0149 EM=['blastp did not run successfully, error: ', num2str(status)];
0150 dispEM(EM,true);
0151 end
0152 delete([tmpDB filesep 'tmpDB*']);
0153 end
0154
0155
0156 for i=1:numel(refFastaFiles)*2
0157 tempStruct=[];
0158 if i<=numel(refFastaFiles)
0159 tempStruct.fromId=modelIDs{i};
0160 tempStruct.toId=organismID{1};
0161 A=readtable([outFile '_' num2str(i)],'Delimiter',',','Format','%s%s%f%f%f%f%f');
0162 else
0163 tempStruct.fromId=organismID{1};
0164 tempStruct.toId=modelIDs{i-numel(refFastaFiles)};
0165 A=readtable([outFile '_r' num2str(i-numel(refFastaFiles))],'Delimiter',',','Format','%s%s%f%f%f%f%f');
0166 end
0167 tempStruct.fromGenes=A{:,1};
0168 tempStruct.toGenes=A{:,2};
0169 tempStruct.evalue=table2array(A(:,3));
0170 tempStruct.identity=table2array(A(:,4));
0171 tempStruct.aligLen=table2array(A(:,5));
0172 tempStruct.bitscore=table2array(A(:,6));
0173 tempStruct.ppos=table2array(A(:,7));
0174 blastStructure=[blastStructure tempStruct];
0175 end
0176
0177
0178 delete([outFile '*']);
0179
0180 delete(files{:})
0181 end