0001 function [blastStructure,blastReport]=getBlast(organismID,fastaFile,...
0002 modelIDs,refFastaFiles,developMode,hideVerbose)
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039 if nargin<5
0040 developMode = false;
0041 end
0042 if nargin<6
0043 hideVerbose = false;
0044 end
0045
0046
0047 organismID=convertCharArray(organismID);
0048 fastaFile=convertCharArray(fastaFile);
0049 modelIDs=convertCharArray(modelIDs);
0050 refFastaFiles=convertCharArray(refFastaFiles);
0051
0052
0053 blastStructure=[];
0054 blastReport.dbHashes.phr={};
0055 blastReport.dbHashes.pot={};
0056 blastReport.dbHashes.psq={};
0057 blastReport.dbHashes.pto={};
0058 blastReport.blastTxtOutput={};
0059
0060
0061 ravenPath=findRAVENroot();
0062
0063
0064 tmpDB=tempname;
0065 outFile=tempname;
0066 if ispc && contains(tmpDB,' ')
0067 warning(['MATLAB assigned ''%s'' as temporary file path, but it '...
0068 'contains a space character, which is not compatible with '...
0069 'BLAST. Instead, a temporary folder will be initiated at '...
0070 '''C:\\tempRAVEN\\'' which should manually be removed when '...
0071 'finished.'],tmpDB)
0072 tmpDB=tempname('C:\tempRAVEN');
0073 outFile=tempname('C:\tempRAVEN');
0074 end
0075
0076
0077
0078 if isrow(refFastaFiles)
0079 files=horzcat(fastaFile,refFastaFiles);
0080 else
0081 files=vertcat(fastaFile,refFastaFiles);
0082 end
0083
0084 files=checkFileExistence(files,2);
0085 fastaFile = files(1);
0086 refFastaFiles = files(2:end);
0087
0088
0089 if isunix
0090 if ismac
0091 binEnd='.mac';
0092 else
0093 binEnd='';
0094 end
0095 elseif ispc
0096 binEnd='.exe';
0097 setenv('BLASTDB_LMDB_MAP_SIZE','1000000');
0098 else
0099 dispEM('Unknown OS, exiting.')
0100 return
0101 end
0102
0103
0104 cores = evalc('feature(''numcores'')');
0105 cores = strsplit(cores, 'MATLAB was assigned: ');
0106 cores = regexp(cores{2},'^\d*','match');
0107 cores = cores{1};
0108
0109
0110
0111 [status, message]=system(['"' fullfile(ravenPath,'software','blast+',['makeblastdb' binEnd]) '" -in "' fastaFile{1} '" -out "' fullfile(tmpDB, 'tmpDB') '" -dbtype prot']);
0112 if developMode
0113 blastReport.dbHashes.phr{numel(blastReport.dbHashes.phr)+1}=getMD5Hash(fullfile(tmpDB, 'tmpDB.phr'));
0114 blastReport.dbHashes.pot{numel(blastReport.dbHashes.pot)+1}=getMD5Hash(fullfile(tmpDB, 'tmpDB.pot'));
0115 blastReport.dbHashes.psq{numel(blastReport.dbHashes.psq)+1}=getMD5Hash(fullfile(tmpDB, 'tmpDB.psq'));
0116 blastReport.dbHashes.pto{numel(blastReport.dbHashes.pto)+1}=getMD5Hash(fullfile(tmpDB, 'tmpDB.pto'));
0117 end
0118 if status~=0
0119 error('makeblastdb did not run successfully, error:\n%s',strip(message))
0120 end
0121
0122 for i=1:numel(refFastaFiles)
0123 if ~hideVerbose
0124 fprintf(['BLASTing "' modelIDs{i} '" against "' organismID{1} '"..\n']);
0125 end
0126 [status, message]=system(['"' fullfile(ravenPath,'software','blast+',['blastp' binEnd]) '" -query "' refFastaFiles{i} '" -out "' outFile '_' num2str(i) '" -db "' fullfile(tmpDB, 'tmpDB') '" -evalue 10e-5 -outfmt "10 qseqid sseqid evalue pident length bitscore ppos" -num_threads "' cores '"']);
0127 if developMode
0128 blastReport.blastTxtOutput{numel(blastReport.blastTxtOutput)+1}=importdata([outFile '_' num2str(i)]);
0129 end
0130 if status~=0
0131 error('blastp did not run successfully, error:\n%s',strip(message))
0132 end
0133 end
0134 delete([tmpDB filesep 'tmpDB*']);
0135
0136
0137
0138 for i=1:numel(refFastaFiles)
0139 if ~hideVerbose
0140 fprintf(['BLASTing "' organismID{1} '" against "' modelIDs{i} '"..\n']);
0141 end
0142 [status, message]=system(['"' fullfile(ravenPath,'software','blast+',['makeblastdb' binEnd]) '" -in "' refFastaFiles{i} '" -out "' fullfile(tmpDB, 'tmpDB') '" -dbtype prot']);
0143 if status~=0
0144 error('makeblastdb did not run successfully, error:\n%s',strip(message))
0145 end
0146 [status, message]=system(['"' fullfile(ravenPath,'software','blast+',['blastp' binEnd]) '" -query "' fastaFile{1} '" -out "' outFile '_r' num2str(i) '" -db "' fullfile(tmpDB, 'tmpDB') '" -evalue 10e-5 -outfmt "10 qseqid sseqid evalue pident length bitscore ppos" -num_threads "' cores '"']);
0147 if developMode
0148 blastReport.dbHashes.phr{numel(blastReport.dbHashes.phr)+1}=getMD5Hash(fullfile(tmpDB, 'tmpDB.phr'));
0149 blastReport.dbHashes.pot{numel(blastReport.dbHashes.pot)+1}=getMD5Hash(fullfile(tmpDB, 'tmpDB.pot'));
0150 blastReport.dbHashes.psq{numel(blastReport.dbHashes.psq)+1}=getMD5Hash(fullfile(tmpDB, 'tmpDB.psq'));
0151 blastReport.dbHashes.pto{numel(blastReport.dbHashes.pto)+1}=getMD5Hash(fullfile(tmpDB, 'tmpDB.pto'));
0152 blastReport.blastTxtOutput{numel(blastReport.blastTxtOutput)+1}=importdata([outFile '_r' num2str(i)]);
0153 end
0154 if status~=0
0155 error('blastp did not run successfully, error:\n%s',strip(message))
0156 end
0157 delete([tmpDB filesep 'tmpDB*']);
0158 end
0159
0160
0161 for i=1:numel(refFastaFiles)*2
0162 tempStruct=[];
0163 if i<=numel(refFastaFiles)
0164 tempStruct.fromId=modelIDs{i};
0165 tempStruct.toId=organismID{1};
0166 A=readtable([outFile '_' num2str(i)],'Delimiter',',','Format','%s%s%f%f%f%f%f');
0167 else
0168 tempStruct.fromId=organismID{1};
0169 tempStruct.toId=modelIDs{i-numel(refFastaFiles)};
0170 A=readtable([outFile '_r' num2str(i-numel(refFastaFiles))],'Delimiter',',','Format','%s%s%f%f%f%f%f');
0171 end
0172 tempStruct.fromGenes=A{:,1};
0173 tempStruct.toGenes=A{:,2};
0174 tempStruct.evalue=table2array(A(:,3));
0175 tempStruct.identity=table2array(A(:,4));
0176 tempStruct.aligLen=table2array(A(:,5));
0177 tempStruct.bitscore=table2array(A(:,6));
0178 tempStruct.ppos=table2array(A(:,7));
0179 blastStructure=[blastStructure tempStruct];
0180 end
0181
0182
0183 delete([outFile '*']);
0184
0185 delete(files{:})
0186 end