0001 function [blastStructure,diamondReport]=getDiamond(organismID,fastaFile,...
0002 modelIDs,refFastaFiles,developMode,hideVerbose)
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040 if nargin<5
0041 developMode = false;
0042 end
0043 if nargin<6
0044 hideVerbose = false;
0045 end
0046
0047
0048 organismID=convertCharArray(organismID);
0049 fastaFile=convertCharArray(fastaFile);
0050 modelIDs=convertCharArray(modelIDs);
0051 refFastaFiles=convertCharArray(refFastaFiles);
0052
0053
0054 blastStructure=[];
0055 diamondReport.dbHashes={};
0056 diamondReport.diamondTxtOutput={};
0057
0058
0059 ravenPath=findRAVENroot();
0060
0061
0062 tmpDB=tempname;
0063 outFile=tempname;
0064
0065
0066
0067 if isrow(refFastaFiles)
0068 files=horzcat(fastaFile,refFastaFiles);
0069 else
0070 files=vertcat(fastaFile,refFastaFiles);
0071 end
0072
0073 files=checkFileExistence(files,2);
0074 fastaFile = files(1);
0075 refFastaFiles = files(2:end);
0076
0077
0078 if isunix
0079 if ismac
0080 binEnd='.mac';
0081 else
0082 binEnd='';
0083 end
0084 elseif ispc
0085 binEnd='.exe';
0086 else
0087 dispEM('Unknown OS, exiting.')
0088 return
0089 end
0090
0091
0092 cores = evalc('feature(''numcores'')');
0093 cores = strsplit(cores, 'MATLAB was assigned: ');
0094 cores = regexp(cores{2},'^\d*','match');
0095 cores = cores{1};
0096
0097
0098
0099 [status, message]=system(['"' fullfile(ravenPath,'software','diamond',['diamond' binEnd]) '" makedb --in "' fastaFile{1} '" --db "' fullfile(tmpDB) '"']);
0100 if developMode
0101 diamondReport.dbHashes{numel(diamondReport.dbHashes)+1} = char(regexp(message,'[a-f0-9]{32}','match'));
0102 end
0103 if status~=0
0104 EM=['DIAMOND makedb did not run successfully, error: ', num2str(status)];
0105 dispEM(EM,true);
0106 end
0107
0108 for i=1:numel(refFastaFiles)
0109 if ~hideVerbose
0110 fprintf(['Running DIAMOND blastp with "' modelIDs{i} '" against "' organismID{1} '"..\n']);
0111 end
0112 [status, ~]=system(['"' fullfile(ravenPath,'software','diamond',['diamond' binEnd]) '" blastp --query "' refFastaFiles{i} '" --out "' outFile '_' num2str(i) '" --db "' fullfile(tmpDB) '" --more-sensitive --outfmt 6 qseqid sseqid evalue pident length bitscore ppos --threads ' cores ]);
0113 if developMode
0114 diamondReport.diamondTxtOutput{numel(diamondReport.diamondTxtOutput)+1}=importdata([outFile '_' num2str(i)]);
0115 end
0116 if status~=0
0117 EM=['DIAMOND blastp did not run successfully, error: ', num2str(status)];
0118 dispEM(EM,true);
0119 end
0120 end
0121 delete([tmpDB filesep 'tmpDB*']);
0122
0123
0124
0125 for i=1:numel(refFastaFiles)
0126 if ~hideVerbose
0127 fprintf(['Running DIAMOND blastp with "' organismID{1} '" against "' modelIDs{i} '"..\n']);
0128 end
0129 [status, message]=system(['"' fullfile(ravenPath,'software','diamond',['diamond' binEnd]) '" makedb --in "' refFastaFiles{i} '" --db "' fullfile(tmpDB) '"']);
0130 if status~=0
0131 EM=['DIAMOND makedb did not run successfully, error: ', num2str(status)];
0132 dispEM(EM,true);
0133 end
0134 [status, ~]=system(['"' fullfile(ravenPath,'software','diamond',['diamond' binEnd]) '" blastp --query "' fastaFile{1} '" --out "' outFile '_r' num2str(i) '" --db "' fullfile(tmpDB) '" --more-sensitive --outfmt 6 qseqid sseqid evalue pident length bitscore ppos --threads ' cores]);
0135 if developMode
0136 diamondReport.dbHashes{numel(diamondReport.dbHashes)+1} = char(regexp(message,'[a-f0-9]{32}','match'));
0137 diamondReport.diamondTxtOutput{numel(diamondReport.diamondTxtOutput)+1}=importdata([outFile '_r' num2str(i)]);
0138 end
0139 if status~=0
0140 EM=['DIAMOND blastp did not run successfully, error: ', num2str(status)];
0141 dispEM(EM,true);
0142 end
0143 delete([tmpDB filesep 'tmpDB*']);
0144 end
0145
0146
0147 for i=1:numel(refFastaFiles)*2
0148 tempStruct=[];
0149 if i<=numel(refFastaFiles)
0150 tempStruct.fromId=modelIDs{i};
0151 tempStruct.toId=organismID{1};
0152 A=readtable([outFile '_' num2str(i)],'Delimiter','\t','Format','%s%s%f%f%f%f%f');
0153 else
0154 tempStruct.fromId=organismID{1};
0155 tempStruct.toId=modelIDs{i-numel(refFastaFiles)};
0156 A=readtable([outFile '_r' num2str(i-numel(refFastaFiles))],'Delimiter','\t','Format','%s%s%f%f%f%f%f');
0157 end
0158 tempStruct.fromGenes=A{:,1};
0159 tempStruct.toGenes=A{:,2};
0160 tempStruct.evalue=table2array(A(:,3));
0161 tempStruct.identity=table2array(A(:,4));
0162 tempStruct.aligLen=table2array(A(:,5));
0163 tempStruct.bitscore=table2array(A(:,6));
0164 tempStruct.ppos=table2array(A(:,7));
0165 blastStructure=[blastStructure tempStruct];
0166 end
0167
0168
0169 delete([outFile '*']);
0170
0171 delete(files{:})
0172 end