0001 function [model, metacycBlastStruct] = getMetaCycModelForOrganism(organismID,fastaFile,...
0002     keepTransportRxns,keepUnbalanced,keepUndetermined,minScore,minPositives,useDiamond,metacycBlastStruct)
0003 
0004 
0005 
0006 
0007 
0008 
0009 
0010 
0011 
0012 
0013 
0014 
0015 
0016 
0017 
0018 
0019 
0020 
0021 
0022 
0023 
0024 
0025 
0026 
0027 
0028 
0029 
0030 
0031 
0032 
0033 
0034 
0035 
0036 
0037 
0038 
0039 
0040 organismID=char(organismID);
0041 if nargin<2
0042     EM='No query protein fasta file is specified';
0043     dispEM(EM);
0044 else
0045     fastaFile=char(fastaFile);
0046 end
0047 if nargin<3
0048     keepTransportRxns=false;
0049 end
0050 if nargin<4
0051     keepUnbalanced=false;
0052 end
0053 if nargin<5
0054     keepUndetermined=false;
0055 end
0056 if nargin<6
0057     minScore=100;
0058 end
0059 if nargin<7
0060     minPositives=45;
0061 end
0062 if nargin<8
0063     useDiamond=true;
0064 end
0065 if nargin<9
0066     metacycBlastStruct=[];
0067 end
0068 
0069 if isempty(fastaFile)
0070     error('*** The query FASTA filename cannot be empty! ***');
0071 else
0072     fprintf('\nChecking existence of query FASTA file... ');
0073     
0074     fastaFile=checkFileExistence(fastaFile,2); 
0075     fprintf('done\n');
0076 end
0077 
0078 
0079 metaCycModel=getModelFromMetaCyc([],keepTransportRxns,keepUnbalanced,keepUndetermined);
0080 fprintf('The full MetaCyc model loaded\n');
0081 
0082 
0083 model.id=organismID;
0084 model.name='Generated by homology with MetaCyc database';
0085 model.rxns=metaCycModel.rxns;
0086 model.rxnNames=metaCycModel.rxnNames;
0087 model.eccodes=metaCycModel.eccodes;
0088 model.subSystems=metaCycModel.subSystems;
0089 model.rxnMiriams=metaCycModel.rxnMiriams;
0090 model.rxnReferences=metaCycModel.rxnReferences;
0091 model.lb=metaCycModel.lb;
0092 model.ub=metaCycModel.ub;
0093 model.rev=metaCycModel.rev;
0094 model.c=metaCycModel.c;
0095 model.equations=metaCycModel.equations;
0096 
0097 
0098 ravenPath=findRAVENroot();
0099 
0100 
0101 if isempty(metacycBlastStruct)
0102     if useDiamond
0103         blastStruc=getDiamond(organismID,fastaFile,{'MetaCyc'},fullfile(ravenPath,'external','metacyc','protseq.fsa'));
0104     else
0105         blastStruc=getBlast(organismID,fastaFile,{'MetaCyc'},fullfile(ravenPath,'external','metacyc','protseq.fsa'));
0106     end
0107     
0108     blastStructure=blastStruc(2);
0109     metacycBlastStruct=blastStructure;
0110 else
0111     blastStructure=metacycBlastStruct;
0112 end
0113 
0114 
0115 indexes=blastStructure.bitscore>=minScore & blastStructure.ppos>=minPositives;
0116 blastStructure.toGenes(~indexes)=[];
0117 blastStructure.fromGenes(~indexes)=[];
0118 blastStructure.evalue(~indexes)=[];
0119 blastStructure.aligLen(~indexes)=[];
0120 blastStructure.identity(~indexes)=[];
0121 blastStructure.bitscore(~indexes)=[];
0122 blastStructure.ppos(~indexes)=[];
0123 fprintf('Completed searching against MetaCyc protein sequences.\n');
0124 
0125 
0126 model.genes=cell(100000,1);
0127 model.proteins=cell(100000,1);
0128 model.bitscore=zeros(100000,1);
0129 model.ppos=zeros(100000,1);
0130 num=1;
0131 
0132 
0133 for i=1:numel(blastStructure.fromGenes)
0134     if num==1
0135         model.genes(num)=blastStructure.fromGenes(i);
0136         model.proteins(num)=strrep(blastStructure.toGenes(i), 'gnl|META|', '');
0137         model.bitscore(num)=blastStructure.bitscore(i);
0138         model.ppos(num)=blastStructure.ppos(i);
0139         num=num+1;
0140         lastGene=blastStructure.fromGenes(i);
0141     else
0142         if ~isequal(lastGene,blastStructure.fromGenes(i))
0143             model.genes(num)=blastStructure.fromGenes(i);
0144             model.proteins(num)=strrep(blastStructure.toGenes(i), 'gnl|META|', '');
0145             model.bitscore(num)=blastStructure.bitscore(i);
0146             model.ppos(num)=blastStructure.ppos(i);
0147             num=num+1;
0148             lastGene=blastStructure.fromGenes(i);
0149         else
0150             if model.bitscore(num)<blastStructure.bitscore(i)
0151                 model.bitscore(num)=blastStructure.bitscore(i);
0152                 model.proteins(num)=strrep(blastStructure.toGenes(i), 'gnl|META|', '');
0153                 model.ppos(num)=blastStructure.ppos(i);
0154             end
0155         end
0156     end
0157 end
0158 model.genes=model.genes(1:num-1);
0159 model.proteins=model.proteins(1:num-1);
0160 model.bitscore=model.bitscore(1:num-1);
0161 model.ppos=model.ppos(1:num-1);
0162 
0163 
0164 
0165 [hits, indexes]=ismember(model.proteins,metaCycModel.genes);
0166 found = find(hits);
0167 model.genes=model.genes(found);
0168 
0169 
0170 model.rxnGeneMat=sparse(metaCycModel.rxnGeneMat(:,indexes(found)));
0171 
0172 
0173 hasGenes=any(model.rxnGeneMat,2);
0174 model=removeReactions(model,~hasGenes,true);
0175 
0176 
0177 
0178 rxnNum=numel(model.rxns);
0179 model.rxnConfidenceScores=NaN(rxnNum,1);
0180 model.rxnConfidenceScores(:)=2;
0181 model.grRules=cell(rxnNum,1);
0182 
0183 for j=1:rxnNum
0184     model.grRules{j}='';
0185     I=find(model.rxnGeneMat(j,:));
0186     for k=1:numel(I)
0187         if isempty(model.grRules{j})
0188             model.grRules(j)=model.genes(I(k));
0189             
0190         else
0191             model.grRules(j)=strcat(model.grRules(j),{' or '},model.genes(I(k)));
0192             
0193             
0194         end
0195     end
0196 end
0197 
0198 model.genes=model.genes(any(model.rxnGeneMat));
0199 
0200 
0201 [S, mets, badRxns]=constructS(model.equations);
0202 model.S=S;
0203 model.mets=mets;
0204 
0205 
0206 
0207 
0208 metaCycMets=getMetsFromMetaCyc([]);
0209 
0210 
0211 [a, b]=ismember(model.mets,metaCycMets.mets);
0212 a=find(a);
0213 b=b(a);
0214 
0215 if ~isfield(model,'metNames')
0216     model.metNames=cell(numel(model.mets),1);
0217     model.metNames(:)={''};
0218 end
0219 model.metNames(a)=metaCycMets.metNames(b);
0220 
0221 if ~isfield(model,'metFormulas')
0222     model.metFormulas=cell(numel(model.mets),1);
0223     model.metFormulas(:)={''};
0224 end
0225 model.metFormulas(a)=metaCycMets.metFormulas(b);
0226 
0227 if ~isfield(model,'metCharges')
0228     model.metCharges=zeros(numel(model.mets),1);
0229 end
0230 model.metCharges(a)=metaCycMets.metCharges(b);
0231 
0232 if ~isfield(model,'b')
0233     model.b=zeros(numel(model.mets),1);
0234 end
0235 
0236 
0237 if ~isfield(model,'inchis')
0238     model.inchis=cell(numel(model.mets),1);
0239     model.inchis(:)={''};
0240 end
0241 model.inchis(a)=metaCycMets.inchis(b);
0242 
0243 if ~isfield(model,'metMiriams')
0244     model.metMiriams=cell(numel(model.mets),1);
0245 end
0246 model.metMiriams(a)=metaCycMets.metMiriams(b);
0247 
0248 
0249 
0250 model.comps={'s'};
0251 model.compNames={'System'};
0252 model.metComps=ones(numel(model.mets),1);
0253 
0254 
0255 
0256 I=cellfun(@isempty,model.metNames);
0257 model.metNames(I)=model.mets(I);
0258 
0259 
0260 model=rmfield(model,{'proteins','bitscore','ppos'});
0261 
0262 
0263 [grRules,rxnGeneMat] = standardizeGrRules(model,false); 
0264 model.grRules = grRules;
0265 model.rxnGeneMat = rxnGeneMat;
0266 
0267 delete(fastaFile)
0268 end