0001 function [blastStructure,diamondReport]=getDiamond(organismID,fastaFile,...
0002 modelIDs,refFastaFiles,developMode,hideVerbose)
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040 if nargin<5
0041 developMode = false;
0042 end
0043 if nargin<6
0044 hideVerbose = false;
0045 end
0046
0047
0048 organismID=convertCharArray(organismID);
0049 fastaFile=convertCharArray(fastaFile);
0050 modelIDs=convertCharArray(modelIDs);
0051 refFastaFiles=convertCharArray(refFastaFiles);
0052
0053
0054 blastStructure=[];
0055 diamondReport.dbHashes={};
0056 diamondReport.diamondTxtOutput={};
0057
0058
0059 ravenPath=findRAVENroot();
0060
0061
0062 tmpDB=tempname;
0063 outFile=tempname;
0064
0065
0066
0067 if isrow(refFastaFiles)
0068 files=horzcat(fastaFile,refFastaFiles);
0069 else
0070 files=vertcat(fastaFile,refFastaFiles);
0071 end
0072
0073 files=checkFileExistence(files,2);
0074 fastaFile = files(1);
0075 refFastaFiles = files(2:end);
0076
0077
0078 if isunix
0079 if ismac
0080 binEnd='.mac';
0081 else
0082 binEnd='';
0083 end
0084 elseif ispc
0085 binEnd='.exe';
0086 else
0087 dispEM('Unknown OS, exiting.')
0088 return
0089 end
0090
0091
0092 cores = evalc('feature(''numcores'')');
0093 cores = strsplit(cores, 'MATLAB was assigned: ');
0094 cores = regexp(cores{2},'^\d*','match');
0095 cores = cores{1};
0096
0097
0098
0099 [status, message]=system(['"' fullfile(ravenPath,'software','diamond',['diamond' binEnd]) '" makedb --in "' fastaFile{1} '" --db "' fullfile(tmpDB) '"']);
0100 if developMode
0101 diamondReport.dbHashes{numel(diamondReport.dbHashes)+1} = char(regexp(message,'[a-f0-9]{32}','match'));
0102 end
0103 if status~=0
0104 error('DIAMOND makedb did not run successfully, error:\n%s',strip(message))
0105 end
0106
0107 for i=1:numel(refFastaFiles)
0108 if ~hideVerbose
0109 fprintf(['Running DIAMOND blastp with "' modelIDs{i} '" against "' organismID{1} '"..\n']);
0110 end
0111 [status, message]=system(['"' fullfile(ravenPath,'software','diamond',['diamond' binEnd]) '" blastp --query "' refFastaFiles{i} '" --out "' outFile '_' num2str(i) '" --db "' fullfile(tmpDB) '" --more-sensitive --outfmt 6 qseqid sseqid evalue pident length bitscore ppos --threads ' cores ]);
0112 if developMode
0113 diamondReport.diamondTxtOutput{numel(diamondReport.diamondTxtOutput)+1}=importdata([outFile '_' num2str(i)]);
0114 end
0115 if status~=0
0116 error('DIAMOND blastp did not run successfully, error:\n%s',strip(message))
0117 end
0118 end
0119 delete([tmpDB filesep 'tmpDB*']);
0120
0121
0122
0123 for i=1:numel(refFastaFiles)
0124 if ~hideVerbose
0125 fprintf(['Running DIAMOND blastp with "' organismID{1} '" against "' modelIDs{i} '"..\n']);
0126 end
0127 [status, message1]=system(['"' fullfile(ravenPath,'software','diamond',['diamond' binEnd]) '" makedb --in "' refFastaFiles{i} '" --db "' fullfile(tmpDB) '"']);
0128 if status~=0
0129 error('DIAMOND makedb did not run successfully, error:\n%s',strip(message1))
0130 end
0131 [status, message]=system(['"' fullfile(ravenPath,'software','diamond',['diamond' binEnd]) '" blastp --query "' fastaFile{1} '" --out "' outFile '_r' num2str(i) '" --db "' fullfile(tmpDB) '" --more-sensitive --outfmt 6 qseqid sseqid evalue pident length bitscore ppos --threads ' cores]);
0132 if developMode
0133 diamondReport.dbHashes{numel(diamondReport.dbHashes)+1} = char(regexp(message1,'[a-f0-9]{32}','match'));
0134 diamondReport.diamondTxtOutput{numel(diamondReport.diamondTxtOutput)+1}=importdata([outFile '_r' num2str(i)]);
0135 end
0136 if status~=0
0137 error('DIAMOND blastp did not run successfully, error:\n%s',strip(message))
0138 end
0139 delete([tmpDB filesep 'tmpDB*']);
0140 end
0141
0142
0143 for i=1:numel(refFastaFiles)*2
0144 tempStruct=[];
0145 if i<=numel(refFastaFiles)
0146 tempStruct.fromId=modelIDs{i};
0147 tempStruct.toId=organismID{1};
0148 A=readtable([outFile '_' num2str(i)],'Delimiter','\t','Format','%s%s%f%f%f%f%f');
0149 else
0150 tempStruct.fromId=organismID{1};
0151 tempStruct.toId=modelIDs{i-numel(refFastaFiles)};
0152 A=readtable([outFile '_r' num2str(i-numel(refFastaFiles))],'Delimiter','\t','Format','%s%s%f%f%f%f%f');
0153 end
0154 tempStruct.fromGenes=A{:,1};
0155 tempStruct.toGenes=A{:,2};
0156 tempStruct.evalue=table2array(A(:,3));
0157 tempStruct.identity=table2array(A(:,4));
0158 tempStruct.aligLen=table2array(A(:,5));
0159 tempStruct.bitscore=table2array(A(:,6));
0160 tempStruct.ppos=table2array(A(:,7));
0161 blastStructure=[blastStructure tempStruct];
0162 end
0163
0164
0165 delete([outFile '*']);
0166
0167 delete(files{:})
0168 end