0001 function [model, metacycBlastStruct] = getMetaCycModelForOrganism(organismID,fastaFile,...
0002 keepTransportRxns,keepUnbalanced,keepUndetermined,minScore,minPositives,useDiamond,metacycBlastStruct)
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040 organismID=char(organismID);
0041 if nargin<2
0042 EM='No query protein fasta file is specified';
0043 dispEM(EM);
0044 else
0045 fastaFile=char(fastaFile);
0046 end
0047 if nargin<3
0048 keepTransportRxns=false;
0049 end
0050 if nargin<4
0051 keepUnbalanced=false;
0052 end
0053 if nargin<5
0054 keepUndetermined=false;
0055 end
0056 if nargin<6
0057 minScore=100;
0058 end
0059 if nargin<7
0060 minPositives=45;
0061 end
0062 if nargin<8
0063 useDiamond=true;
0064 end
0065 if nargin<9
0066 metacycBlastStruct=[];
0067 end
0068
0069 if isempty(fastaFile)
0070 error('*** The query FASTA filename cannot be empty! ***');
0071 else
0072 fprintf('\nChecking existence of query FASTA file... ');
0073
0074 fastaFile=checkFileExistence(fastaFile,2);
0075 fprintf('done\n');
0076 end
0077
0078
0079 metaCycModel=getModelFromMetaCyc([],keepTransportRxns,keepUnbalanced,keepUndetermined);
0080 fprintf('The full MetaCyc model loaded\n');
0081
0082
0083 model.id=organismID;
0084 model.name='Generated by homology with MetaCyc database';
0085 model.rxns=metaCycModel.rxns;
0086 model.rxnNames=metaCycModel.rxnNames;
0087 model.eccodes=metaCycModel.eccodes;
0088 model.subSystems=metaCycModel.subSystems;
0089 model.rxnMiriams=metaCycModel.rxnMiriams;
0090 model.rxnReferences=metaCycModel.rxnReferences;
0091 model.lb=metaCycModel.lb;
0092 model.ub=metaCycModel.ub;
0093 model.rev=metaCycModel.rev;
0094 model.c=metaCycModel.c;
0095 model.equations=metaCycModel.equations;
0096
0097
0098 ravenPath=findRAVENroot();
0099
0100
0101 if isempty(metacycBlastStruct)
0102 if useDiamond
0103 blastStruc=getDiamond(organismID,fastaFile,{'MetaCyc'},fullfile(ravenPath,'external','metacyc','protseq.fsa'));
0104 else
0105 blastStruc=getBlast(organismID,fastaFile,{'MetaCyc'},fullfile(ravenPath,'external','metacyc','protseq.fsa'));
0106 end
0107
0108 blastStructure=blastStruc(2);
0109 metacycBlastStruct=blastStructure;
0110 else
0111 blastStructure=metacycBlastStruct;
0112 end
0113
0114
0115 indexes=blastStructure.bitscore>=minScore & blastStructure.ppos>=minPositives;
0116 blastStructure.toGenes(~indexes)=[];
0117 blastStructure.fromGenes(~indexes)=[];
0118 blastStructure.evalue(~indexes)=[];
0119 blastStructure.aligLen(~indexes)=[];
0120 blastStructure.identity(~indexes)=[];
0121 blastStructure.bitscore(~indexes)=[];
0122 blastStructure.ppos(~indexes)=[];
0123 fprintf('Completed searching against MetaCyc protein sequences.\n');
0124
0125
0126 model.genes=cell(100000,1);
0127 model.proteins=cell(100000,1);
0128 model.bitscore=zeros(100000,1);
0129 model.ppos=zeros(100000,1);
0130 num=1;
0131
0132
0133 for i=1:numel(blastStructure.fromGenes)
0134 if num==1
0135 model.genes(num)=blastStructure.fromGenes(i);
0136 model.proteins(num)=strrep(blastStructure.toGenes(i), 'gnl|META|', '');
0137 model.bitscore(num)=blastStructure.bitscore(i);
0138 model.ppos(num)=blastStructure.ppos(i);
0139 num=num+1;
0140 lastGene=blastStructure.fromGenes(i);
0141 else
0142 if ~isequal(lastGene,blastStructure.fromGenes(i))
0143 model.genes(num)=blastStructure.fromGenes(i);
0144 model.proteins(num)=strrep(blastStructure.toGenes(i), 'gnl|META|', '');
0145 model.bitscore(num)=blastStructure.bitscore(i);
0146 model.ppos(num)=blastStructure.ppos(i);
0147 num=num+1;
0148 lastGene=blastStructure.fromGenes(i);
0149 else
0150 if model.bitscore(num)<blastStructure.bitscore(i)
0151 model.bitscore(num)=blastStructure.bitscore(i);
0152 model.proteins(num)=strrep(blastStructure.toGenes(i), 'gnl|META|', '');
0153 model.ppos(num)=blastStructure.ppos(i);
0154 end
0155 end
0156 end
0157 end
0158 model.genes=model.genes(1:num-1);
0159 model.proteins=model.proteins(1:num-1);
0160 model.bitscore=model.bitscore(1:num-1);
0161 model.ppos=model.ppos(1:num-1);
0162
0163
0164
0165 [hits, indexes]=ismember(model.proteins,metaCycModel.genes);
0166 found = find(hits);
0167 model.genes=model.genes(found);
0168
0169
0170 model.rxnGeneMat=sparse(metaCycModel.rxnGeneMat(:,indexes(found)));
0171
0172
0173 hasGenes=any(model.rxnGeneMat,2);
0174 model=removeReactions(model,~hasGenes,true);
0175
0176
0177
0178 rxnNum=numel(model.rxns);
0179 model.rxnConfidenceScores=NaN(rxnNum,1);
0180 model.rxnConfidenceScores(:)=2;
0181 model.grRules=cell(rxnNum,1);
0182
0183 for j=1:rxnNum
0184 model.grRules{j}='';
0185 I=find(model.rxnGeneMat(j,:));
0186 for k=1:numel(I)
0187 if isempty(model.grRules{j})
0188 model.grRules(j)=model.genes(I(k));
0189
0190 else
0191 model.grRules(j)=strcat(model.grRules(j),{' or '},model.genes(I(k)));
0192
0193
0194 end
0195 end
0196 end
0197
0198 model.genes=model.genes(any(model.rxnGeneMat));
0199
0200
0201 [S, mets, badRxns]=constructS(model.equations);
0202 model.S=S;
0203 model.mets=mets;
0204
0205
0206
0207
0208 metaCycMets=getMetsFromMetaCyc([]);
0209
0210
0211 [a, b]=ismember(model.mets,metaCycMets.mets);
0212 a=find(a);
0213 b=b(a);
0214
0215 if ~isfield(model,'metNames')
0216 model.metNames=cell(numel(model.mets),1);
0217 model.metNames(:)={''};
0218 end
0219 model.metNames(a)=metaCycMets.metNames(b);
0220
0221 if ~isfield(model,'metFormulas')
0222 model.metFormulas=cell(numel(model.mets),1);
0223 model.metFormulas(:)={''};
0224 end
0225 model.metFormulas(a)=metaCycMets.metFormulas(b);
0226
0227 if ~isfield(model,'metCharges')
0228 model.metCharges=zeros(numel(model.mets),1);
0229 end
0230 model.metCharges(a)=metaCycMets.metCharges(b);
0231
0232 if ~isfield(model,'b')
0233 model.b=zeros(numel(model.mets),1);
0234 end
0235
0236
0237 if ~isfield(model,'inchis')
0238 model.inchis=cell(numel(model.mets),1);
0239 model.inchis(:)={''};
0240 end
0241 model.inchis(a)=metaCycMets.inchis(b);
0242
0243 if ~isfield(model,'metMiriams')
0244 model.metMiriams=cell(numel(model.mets),1);
0245 end
0246 model.metMiriams(a)=metaCycMets.metMiriams(b);
0247
0248
0249
0250 model.comps={'s'};
0251 model.compNames={'System'};
0252 model.metComps=ones(numel(model.mets),1);
0253
0254
0255
0256 I=cellfun(@isempty,model.metNames);
0257 model.metNames(I)=model.mets(I);
0258
0259
0260 model=rmfield(model,{'proteins','bitscore','ppos'});
0261
0262
0263 [grRules,rxnGeneMat] = standardizeGrRules(model,false);
0264 model.grRules = grRules;
0265 model.rxnGeneMat = rxnGeneMat;
0266
0267 delete(fastaFile)
0268 end