0001 function model=combineMetaCycKEGGModels(metacycModel,keggModel)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 if nargin<2
0019 disp('Missing input model');
0020 return;
0021 end
0022
0023
0024 model=metacycModel;
0025 model.id='COMBINED';
0026 model.name='Combined model from MetaCyc and KEGG draft models';
0027
0028
0029 model.rxnFrom=cell(numel(model.rxns),1);
0030 model.rxnFrom(:)={'MetaCyc'};
0031 model.metFrom=cell(numel(model.mets),1);
0032 model.metFrom(:)={'MetaCyc'};
0033 model.geneFrom=cell(numel(model.genes),1);
0034 model.geneFrom(:)={'MetaCyc'};
0035 model.grRulesKEGG=cell(numel(model.rxns),1);
0036 model.grRulesKEGG(:)={''};
0037
0038
0039 if isfield(model,'S')
0040 model=rmfield(model,'S');
0041 end
0042 if isfield(model,'rxnGeneMat')
0043 model=rmfield(model,'rxnGeneMat');
0044 end
0045
0046
0047
0048
0049 sharedGenes=intersect(keggModel.genes,model.genes);
0050 [~, b]=ismember(sharedGenes,model.genes);
0051 model.geneFrom(b)={'Both'};
0052
0053
0054 C=setdiff(keggModel.genes,model.genes);
0055 model.genes=[model.genes;C];
0056 geneFrom=cell(numel(C),1);
0057 geneFrom(:)={'KEGG'};
0058 model.geneFrom=[model.geneFrom;geneFrom];
0059
0060
0061 if isfield(keggModel,'grRules')
0062 keggModel.grRules=strrep(keggModel.grRules,'(','');
0063 keggModel.grRules=strrep(keggModel.grRules,')','');
0064 end
0065
0066
0067
0068
0069
0070 mappedRxns=[];
0071 rxnsToMove=[];
0072 grRulesToMove={};
0073
0074
0075 load('metaCycRxns.mat');
0076 num=0;
0077 numToMove=0;
0078
0079
0080 for i=1:numel(keggModel.rxns)
0081 [~, b]=ismember(rxnLinks.kegg,keggModel.rxns{i});
0082 I=find(b);
0083
0084 if numel(I)==1
0085
0086 num=num+1;
0087
0088 mappedRxns=[mappedRxns,i];
0089
0090
0091 [c, d]=ismember(rxnLinks.metacyc{I(1)},model.rxns);
0092 if c
0093 model.rxnFrom{d}='Both';
0094
0095
0096
0097 k=strfind(model.grRules{d},keggModel.grRules{i});
0098 if isempty(k)
0099 if isempty(model.grRulesKEGG{d})
0100 model.grRulesKEGG{d}=keggModel.grRules{i};
0101 else
0102 model.grRulesKEGG{d}=strcat(model.grRulesKEGG{d},{' or '},keggModel.grRules{i});
0103 end
0104 end
0105 else
0106
0107
0108 [~, y]=ismember(rxnLinks.metacyc{I(1)},metaCycRxns.rxns);
0109
0110
0111 [Repeat, Index]=ismember(y,rxnsToMove);
0112 if ~Repeat
0113 rxnsToMove=[rxnsToMove;y];
0114 if isempty(keggModel.grRules{i})
0115
0116
0117 grRulesToMove=[grRulesToMove;{''}];
0118 else
0119 grRulesToMove=[grRulesToMove;keggModel.grRules{i}];
0120 end
0121 numToMove=numToMove+1;
0122 else
0123
0124
0125 k=strfind(grRulesToMove{Index},keggModel.grRules{i});
0126 if isempty(k)
0127 grRulesToMove{Index}=strcat(grRulesToMove{Index},{' or '},keggModel.grRules{i});
0128 end
0129 end
0130 end
0131 else
0132
0133
0134
0135
0136 end
0137 end
0138 fprintf('NOTE: A total of %d reactions in the KEGG model were mapped to MetaCyc.\n',num);
0139 fprintf('NOTE: %d reactions already in MetaCyc model, %d will be combined.\n',num-numToMove,numToMove);
0140
0141
0142
0143 rxnFrom=cell(numel(rxnsToMove),1);
0144 rxnFrom(:)={'KEGG'};
0145 model.rxnFrom=[model.rxnFrom;rxnFrom];
0146 model.rxns=[model.rxns;metaCycRxns.rxns(rxnsToMove)];
0147
0148 rxnNames=metaCycRxns.rxnNames(rxnsToMove);
0149
0150 load('metaCycEnzymes.mat');
0151 for i=1:numel(rxnsToMove)
0152 [a, b]=ismember(metaCycRxns.rxns{rxnsToMove(i)},metaCycEnzymes.rxns);
0153 if a
0154 rxnNames{i}=metaCycEnzymes.rxnNames{b};
0155 end
0156 end
0157 model.rxnNames=[model.rxnNames;rxnNames];
0158
0159 model.eccodes=[model.eccodes;metaCycRxns.eccodes(rxnsToMove)];
0160 model.subSystems=[model.subSystems;metaCycRxns.subSystems(rxnsToMove)];
0161 model.rxnMiriams=[model.rxnMiriams;metaCycRxns.rxnMiriams(rxnsToMove)];
0162 model.rxnReferences=[model.rxnReferences;metaCycRxns.rxnReferences(rxnsToMove)];
0163 model.rev=[model.rev;metaCycRxns.rev(rxnsToMove)];
0164 model.equations=[model.equations;metaCycRxns.equations(rxnsToMove)];
0165 model.lb=[model.lb;metaCycRxns.lb(rxnsToMove)];
0166 model.ub=[model.ub;metaCycRxns.ub(rxnsToMove)];
0167 model.c=[model.c;metaCycRxns.c(rxnsToMove)];
0168 model.grRules=[model.grRules;grRulesToMove];
0169
0170
0171 rxnFields=cell(numel(rxnsToMove),1);
0172 rxnFields(:)={''};
0173
0174 model.grRulesKEGG=[model.grRulesKEGG;rxnFields];
0175
0176
0177 pureKeggModel=removeReactions(keggModel,mappedRxns,true,true);
0178 fprintf('NOTE: A global KEGG model with %d reactions and %d metabolites was obtained.\n',numel(pureKeggModel.rxns),numel(pureKeggModel.mets));
0179
0180
0181
0182
0183 load('metaCycMets.mat');
0184
0185 if ~isfield(pureKeggModel,'metCharges')
0186 pureKeggModel.metCharges=NaN(numel(pureKeggModel.mets),1);
0187 end
0188
0189 num=0;
0190 for i=1:numel(pureKeggModel.mets)
0191 [a, b]=ismember(pureKeggModel.mets{i},metaCycMets.keggid);
0192 if a
0193
0194 num=num+1;
0195 pureKeggModel.mets{i}=metaCycMets.mets{b};
0196
0197 end
0198 end
0199 fprintf('NOTE: A total of %d metabolites from the global KEGG model will be re-mapped to MetaCyc.\n',num);
0200
0201
0202 equationStrings=constructEquations(pureKeggModel,'',false,false,false,true);
0203
0204
0205 rxnFrom=cell(numel(pureKeggModel.rxns),1);
0206 rxnFrom(:)={'KEGG'};
0207 model.rxnFrom=[model.rxnFrom;rxnFrom];
0208 model.rxns=[model.rxns;pureKeggModel.rxns];
0209 model.equations=[model.equations;equationStrings];
0210 model.rxnNames=[model.rxnNames;pureKeggModel.rxnNames];
0211 model.eccodes=[model.eccodes;pureKeggModel.eccodes];
0212 model.subSystems=[model.subSystems;pureKeggModel.subSystems];
0213 model.rxnMiriams=[model.rxnMiriams;pureKeggModel.rxnMiriams];
0214 model.rev=[model.rev;pureKeggModel.rev];
0215 model.lb=[model.lb;pureKeggModel.lb];
0216 model.ub=[model.ub;pureKeggModel.ub];
0217 model.c=[model.c;pureKeggModel.c];
0218 model.grRules=[model.grRules;pureKeggModel.grRules];
0219 rxnFields=cell(numel(pureKeggModel.rxns),1);
0220 rxnFields(:)={''};
0221 model.grRulesKEGG=[model.grRulesKEGG;rxnFields];
0222 model.rxnReferences=[model.rxnReferences;rxnFields];
0223
0224
0225
0226
0227
0228 I=cellfun(@isempty,model.rxnNames);
0229 model.rxnNames(I)=model.rxns(I);
0230
0231
0232 [S, mets, ~]=constructS(model.equations);
0233 model.S=S;
0234 model.mets=mets;
0235
0236
0237 if isfield(model,'metFrom')
0238 model=rmfield(model,'metFrom');
0239 end
0240 model.metFrom=cell(numel(model.mets),1);
0241 model.metFrom(:)={''};
0242
0243 if isfield(model,'metNames')
0244 model=rmfield(model,'metNames');
0245 end
0246 model.metNames=cell(numel(model.mets),1);
0247 model.metNames(:)={''};
0248
0249 if isfield(model,'metFormulas')
0250 model=rmfield(model,'metFormulas');
0251 end
0252 model.metFormulas=cell(numel(model.mets),1);
0253 model.metFormulas(:)={''};
0254
0255 if ~isfield(model,'metCharges')
0256 model=rmfield(model,'metCharges');
0257 end
0258 model.metCharges=NaN(numel(model.mets),1);
0259
0260 if ~isfield(model,'inchis')
0261 model=rmfield(model,'inchis');
0262 end
0263 model.inchis=cell(numel(model.mets),1);
0264 model.inchis(:)={''};
0265
0266 if ~isfield(model,'metMiriams')
0267 model=rmfield(model,'metMiriams');
0268 end
0269 model.metMiriams=cell(numel(model.mets),1);
0270 model.metMiriams(:)={''};
0271
0272 keggMets=getMetsFromKEGG();
0273 for i=1:numel(model.mets)
0274 [a, b]=ismember(model.mets{i},metaCycMets.mets);
0275 if a
0276
0277 model.mets{i}=metaCycMets.mets{b};
0278 model.metNames{i}=metaCycMets.metNames{b};
0279 model.metFormulas{i}=metaCycMets.metFormulas{b};
0280 model.metMiriams{i}=metaCycMets.metMiriams{b};
0281 model.inchis{i}=metaCycMets.inchis{b};
0282 model.metCharges(i,1)=metaCycMets.metCharges(b,1);
0283 num=num+1;
0284 else
0285 [c, d]=ismember(model.mets{i},keggMets.mets);
0286 if c
0287 model.mets{i}=keggMets.mets{d};
0288 model.metNames{i}=keggMets.metNames{d};
0289 model.metFormulas{i}=keggMets.metFormulas{d};
0290 model.metMiriams{i}=keggMets.metMiriams{d};
0291 model.inchis{i}=keggMets.inchis{d};
0292 num=num+1;
0293 end
0294 end
0295
0296 end
0297
0298
0299
0300 I=cellfun(@isempty,model.metNames);
0301 model.metNames(I)=model.mets(I);
0302
0303
0304 if isfield(model,'rxnConfidenceScores')
0305 model=rmfield(model,'rxnConfidenceScores');
0306 end
0307 model.rxnConfidenceScores=NaN(numel(model.rxns),1);
0308 model.rxnConfidenceScores(:)=2;
0309
0310
0311
0312 model.comps={'s'};
0313 model.compNames={'System'};
0314 model.metComps=ones(numel(model.mets),1);
0315 model.b=zeros(numel(model.mets),1);
0316
0317
0318 model.rxnGeneMat=sparse(numel(model.rxns),numel(model.genes));
0319
0320 end