Home > external > combineMetaCycKEGGModels.m

combineMetaCycKEGGModels

PURPOSE ^

combineMetaCycKEGGModels

SYNOPSIS ^

function model=combineMetaCycKEGGModels(metacycModel,keggModel)

DESCRIPTION ^

 combineMetaCycKEGGModels
    Combine MetaCyc and KEGG draft models into one model structure.

    Input:
    metacycModel    the reconstructed model from MetaCyc
    keggModel       the reconstructed model from KEGG

    Output:
    model           a model structure generated by integrating information
                   from draft models reconstructed using MetaCyc and KEGG
                   databases. The 'rxnFrom/metFrom/geneFrom' fields are
                   included to indicate the source.

    Usage: model=combineMetaCycKEGGModels(metacycModel,keggModel)

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 function model=combineMetaCycKEGGModels(metacycModel,keggModel)
0002 % combineMetaCycKEGGModels
0003 %    Combine MetaCyc and KEGG draft models into one model structure.
0004 %
0005 %    Input:
0006 %    metacycModel    the reconstructed model from MetaCyc
0007 %    keggModel       the reconstructed model from KEGG
0008 %
0009 %    Output:
0010 %    model           a model structure generated by integrating information
0011 %                   from draft models reconstructed using MetaCyc and KEGG
0012 %                   databases. The 'rxnFrom/metFrom/geneFrom' fields are
0013 %                   included to indicate the source.
0014 %
0015 %    Usage: model=combineMetaCycKEGGModels(metacycModel,keggModel)
0016 
0017 %Just return the model
0018 if nargin<2
0019     disp('Missing input model');
0020     return;
0021 end
0022 
0023 %Add MetaCyc model as template
0024 model=metacycModel;
0025 model.id='COMBINED';
0026 model.name='Combined model from MetaCyc and KEGG draft models';
0027 
0028 %Use MetaCyc model as template
0029 model.rxnFrom=cell(numel(model.rxns),1);
0030 model.rxnFrom(:)={'MetaCyc'};
0031 model.metFrom=cell(numel(model.mets),1);
0032 model.metFrom(:)={'MetaCyc'};
0033 model.geneFrom=cell(numel(model.genes),1);
0034 model.geneFrom(:)={'MetaCyc'};
0035 model.grRulesKEGG=cell(numel(model.rxns),1);
0036 model.grRulesKEGG(:)={''};
0037 
0038 %Remove the S and rxnGeneMat matrix
0039 if isfield(model,'S')
0040     model=rmfield(model,'S');
0041 end
0042 if isfield(model,'rxnGeneMat')
0043     model=rmfield(model,'rxnGeneMat');
0044 end
0045 
0046 %Arrange the genes related fields
0047 
0048 %Identify the shared genes between KEGG and MetaCyc
0049 sharedGenes=intersect(keggModel.genes,model.genes);
0050 [~, b]=ismember(sharedGenes,model.genes);
0051 model.geneFrom(b)={'Both'};
0052 
0053 %Add unique genes from KEGG and update geneFrom
0054 C=setdiff(keggModel.genes,model.genes);
0055 model.genes=[model.genes;C];
0056 geneFrom=cell(numel(C),1);
0057 geneFrom(:)={'KEGG'};
0058 model.geneFrom=[model.geneFrom;geneFrom];
0059 
0060 %Prepare for matching grRules
0061 if isfield(keggModel,'grRules')
0062     keggModel.grRules=strrep(keggModel.grRules,'(','');
0063     keggModel.grRules=strrep(keggModel.grRules,')','');
0064 end
0065 
0066 %Replace rxns in KEGG model with the corresponding ones in MetaCyc by
0067 %updating values rxns field to MetaCyc version whenever possible
0068 
0069 %Collect the ones found in MetaCyc as mappedRxns
0070 mappedRxns=[];
0071 rxnsToMove=[];
0072 grRulesToMove={}; %Storing the grRules to be moved to MetaCyc model
0073 
0074 %Read in dbLinks, note that linkMetaCycKEGGRxns need to be run in advance
0075 load('metaCycRxns.mat');
0076 num=0;
0077 numToMove=0;
0078 %Loop through the KEGG model and replace the rxn id from KEGG to MetaCyc
0079 %version
0080 for i=1:numel(keggModel.rxns)
0081     [~, b]=ismember(rxnLinks.kegg,keggModel.rxns{i});
0082     I=find(b);
0083     
0084     if numel(I)==1
0085         %Find out the corresponding MetaCyc reactions
0086         num=num+1;
0087         %Record the mapped rxn index in KEGG model and remove them later
0088         mappedRxns=[mappedRxns,i];
0089         %keggModel.rxns{i}=rxnLinks.metacyc{I(1)};
0090         
0091         [c, d]=ismember(rxnLinks.metacyc{I(1)},model.rxns);
0092         if c
0093             model.rxnFrom{d}='Both';
0094             %Combine the grRule info Check if KEGG grRules equals or a
0095             %subset of MetaCyc grRules If not, save the KEGG grRules to a
0096             %new field grRulesKEGG for manual curation
0097             k=strfind(model.grRules{d},keggModel.grRules{i});
0098             if isempty(k)
0099                 if isempty(model.grRulesKEGG{d})
0100                     model.grRulesKEGG{d}=keggModel.grRules{i};
0101                 else
0102                     model.grRulesKEGG{d}=strcat(model.grRulesKEGG{d},{' or '},keggModel.grRules{i});
0103                 end
0104             end
0105         else
0106             %For the KEGG rxns have MetaCyc version but not in MetaCyc
0107             %draft model
0108             [~, y]=ismember(rxnLinks.metacyc{I(1)},metaCycRxns.rxns);
0109             
0110             %Check if this reaction is repitetive and save the grRules
0111             [Repeat, Index]=ismember(y,rxnsToMove);
0112             if ~Repeat
0113                 rxnsToMove=[rxnsToMove;y];  %Record rxns to be moved from KEGG to MetaCyc model
0114                 if isempty(keggModel.grRules{i})
0115                     %Rxns may have empty grRules (e.g. spontaneous) that
0116                     %are identified and added back here
0117                     grRulesToMove=[grRulesToMove;{''}];
0118                 else
0119                     grRulesToMove=[grRulesToMove;keggModel.grRules{i}];
0120                 end
0121                 numToMove=numToMove+1;
0122             else
0123                 %If this reaction has been recorded, append the grRules if
0124                 %it is different
0125                 k=strfind(grRulesToMove{Index},keggModel.grRules{i});
0126                 if isempty(k)
0127                     grRulesToMove{Index}=strcat(grRulesToMove{Index},{' or '},keggModel.grRules{i});
0128                 end
0129             end
0130         end
0131     else
0132         %Here are the case of one KEGG rxn id link to several MetaCyc ids
0133         %for j=2:numel(I) Ignore this issue for now, and resolve it later,
0134         %because it never happened This case can be solved by adding above
0135         %section into a loop below disp(I(j)); end
0136     end
0137 end
0138 fprintf('NOTE: A total of %d reactions in the KEGG model were mapped to MetaCyc.\n',num);
0139 fprintf('NOTE: %d reactions already in MetaCyc model, %d will be combined.\n',num-numToMove,numToMove);
0140 
0141 %Append mapped MetaCyc rxns in KEGG model that are absent from MetaCyc
0142 %model
0143 rxnFrom=cell(numel(rxnsToMove),1);
0144 rxnFrom(:)={'KEGG'};
0145 model.rxnFrom=[model.rxnFrom;rxnFrom];
0146 model.rxns=[model.rxns;metaCycRxns.rxns(rxnsToMove)];
0147 
0148 rxnNames=metaCycRxns.rxnNames(rxnsToMove);
0149 %Replace rxnNames with those from metaCycEnzymes
0150 load('metaCycEnzymes.mat');
0151 for i=1:numel(rxnsToMove)
0152     [a, b]=ismember(metaCycRxns.rxns{rxnsToMove(i)},metaCycEnzymes.rxns);
0153     if a
0154         rxnNames{i}=metaCycEnzymes.rxnNames{b};
0155     end
0156 end
0157 model.rxnNames=[model.rxnNames;rxnNames];
0158 
0159 model.eccodes=[model.eccodes;metaCycRxns.eccodes(rxnsToMove)];
0160 model.subSystems=[model.subSystems;metaCycRxns.subSystems(rxnsToMove)];
0161 model.rxnMiriams=[model.rxnMiriams;metaCycRxns.rxnMiriams(rxnsToMove)];
0162 model.rxnReferences=[model.rxnReferences;metaCycRxns.rxnReferences(rxnsToMove)];
0163 model.rev=[model.rev;metaCycRxns.rev(rxnsToMove)];
0164 model.equations=[model.equations;metaCycRxns.equations(rxnsToMove)];
0165 model.lb=[model.lb;metaCycRxns.lb(rxnsToMove)];
0166 model.ub=[model.ub;metaCycRxns.ub(rxnsToMove)];
0167 model.c=[model.c;metaCycRxns.c(rxnsToMove)];
0168 model.grRules=[model.grRules;grRulesToMove];
0169 %model.grRules=[model.grRules;keggModel.grRules(rxnsToMove(:,2))];
0170 %model.grRulesKEGG=[model.grRulesKEGG;keggModel.grRules(rxnsToMove(:,2))];
0171 rxnFields=cell(numel(rxnsToMove),1);
0172 rxnFields(:)={''};
0173 %model.rxnNotes=[model.rxnNotes;rxnFields];
0174 model.grRulesKEGG=[model.grRulesKEGG;rxnFields];
0175 
0176 %Remove all mapped reactions from KEGG model
0177 pureKeggModel=removeReactions(keggModel,mappedRxns,true,true);
0178 fprintf('NOTE: A global KEGG model with %d reactions and %d metabolites was obtained.\n',numel(pureKeggModel.rxns),numel(pureKeggModel.mets));
0179 
0180 %Replace mets information in KEGG model with the corresponding ones in
0181 %MetaCyc This includes updating all metabolite-related fields, except S
0182 %matrix.
0183 load('metaCycMets.mat');
0184 
0185 if ~isfield(pureKeggModel,'metCharges')
0186     pureKeggModel.metCharges=NaN(numel(pureKeggModel.mets),1);
0187 end
0188 
0189 num=0;
0190 for i=1:numel(pureKeggModel.mets)
0191     [a, b]=ismember(pureKeggModel.mets{i},metaCycMets.keggid);
0192     if a
0193         %For now only replace the met ids
0194         num=num+1;
0195         pureKeggModel.mets{i}=metaCycMets.mets{b};
0196         %Record the replaced mets into a vector for later
0197     end
0198 end
0199 fprintf('NOTE: A total of %d metabolites from the global KEGG model will be re-mapped to MetaCyc.\n',num);
0200 
0201 %Generate equations for pureKeggModel
0202 equationStrings=constructEquations(pureKeggModel,'',false,false,false,true);
0203 
0204 %Add the pure KEGG reactions to MetaCyc model
0205 rxnFrom=cell(numel(pureKeggModel.rxns),1);
0206 rxnFrom(:)={'KEGG'};
0207 model.rxnFrom=[model.rxnFrom;rxnFrom];
0208 model.rxns=[model.rxns;pureKeggModel.rxns];
0209 model.equations=[model.equations;equationStrings];
0210 model.rxnNames=[model.rxnNames;pureKeggModel.rxnNames];
0211 model.eccodes=[model.eccodes;pureKeggModel.eccodes];
0212 model.subSystems=[model.subSystems;pureKeggModel.subSystems];
0213 model.rxnMiriams=[model.rxnMiriams;pureKeggModel.rxnMiriams];
0214 model.rev=[model.rev;pureKeggModel.rev];
0215 model.lb=[model.lb;pureKeggModel.lb];
0216 model.ub=[model.ub;pureKeggModel.ub];
0217 model.c=[model.c;pureKeggModel.c];
0218 model.grRules=[model.grRules;pureKeggModel.grRules];
0219 rxnFields=cell(numel(pureKeggModel.rxns),1);
0220 rxnFields(:)={''};
0221 model.grRulesKEGG=[model.grRulesKEGG;rxnFields];
0222 model.rxnReferences=[model.rxnReferences;rxnFields];
0223 %model.rxnNotes=[model.rxnNotes;rxnFields];
0224 
0225 %It could also be that the metabolite and reaction names are empty for some
0226 %reasons. In that case, use the ID instead
0227 %I=cellfun(@isempty,model.metNames); model.metNames(I)=model.mets(I);
0228 I=cellfun(@isempty,model.rxnNames);
0229 model.rxnNames(I)=model.rxns(I);
0230 
0231 %Generate S matrix and mets
0232 [S, mets, ~]=constructS(model.equations);
0233 model.S=S;
0234 model.mets=mets;
0235 
0236 %Remap metabolite information
0237 if isfield(model,'metFrom')
0238     model=rmfield(model,'metFrom');
0239 end
0240 model.metFrom=cell(numel(model.mets),1);
0241 model.metFrom(:)={''};
0242 
0243 if isfield(model,'metNames')
0244     model=rmfield(model,'metNames');
0245 end
0246 model.metNames=cell(numel(model.mets),1);
0247 model.metNames(:)={''};
0248 
0249 if isfield(model,'metFormulas')
0250     model=rmfield(model,'metFormulas');
0251 end
0252 model.metFormulas=cell(numel(model.mets),1);
0253 model.metFormulas(:)={''};
0254 
0255 if ~isfield(model,'metCharges')
0256     model=rmfield(model,'metCharges');
0257 end
0258 model.metCharges=NaN(numel(model.mets),1);
0259 
0260 if ~isfield(model,'inchis')
0261     model=rmfield(model,'inchis');
0262 end
0263 model.inchis=cell(numel(model.mets),1);
0264 model.inchis(:)={''};
0265 
0266 if ~isfield(model,'metMiriams')
0267     model=rmfield(model,'metMiriams');
0268 end
0269 model.metMiriams=cell(numel(model.mets),1);
0270 model.metMiriams(:)={''};
0271 
0272 keggMets=getMetsFromKEGG();
0273 for i=1:numel(model.mets)
0274     [a, b]=ismember(model.mets{i},metaCycMets.mets);
0275     if a
0276         %Replace the met related info
0277         model.mets{i}=metaCycMets.mets{b};
0278         model.metNames{i}=metaCycMets.metNames{b};
0279         model.metFormulas{i}=metaCycMets.metFormulas{b};
0280         model.metMiriams{i}=metaCycMets.metMiriams{b};
0281         model.inchis{i}=metaCycMets.inchis{b};
0282         model.metCharges(i,1)=metaCycMets.metCharges(b,1);
0283         num=num+1;
0284     else
0285         [c, d]=ismember(model.mets{i},keggMets.mets);
0286         if c
0287             model.mets{i}=keggMets.mets{d};
0288             model.metNames{i}=keggMets.metNames{d};
0289             model.metFormulas{i}=keggMets.metFormulas{d};
0290             model.metMiriams{i}=keggMets.metMiriams{d};
0291             model.inchis{i}=keggMets.inchis{d};
0292             num=num+1;
0293         end
0294     end
0295     
0296 end
0297 
0298 %It could also be that the metabolite and reaction names are empty for some
0299 %reasons. In that case, use the ID instead
0300 I=cellfun(@isempty,model.metNames);
0301 model.metNames(I)=model.mets(I);
0302 
0303 %Add confidenceScores to model in a rough way
0304 if isfield(model,'rxnConfidenceScores')
0305     model=rmfield(model,'rxnConfidenceScores');
0306 end
0307 model.rxnConfidenceScores=NaN(numel(model.rxns),1);
0308 model.rxnConfidenceScores(:)=2;
0309 
0310 %Put all metabolites in one compartment called 's' (for system). This is
0311 %done just to be more compatible with the rest of the code
0312 model.comps={'s'};
0313 model.compNames={'System'};
0314 model.metComps=ones(numel(model.mets),1);
0315 model.b=zeros(numel(model.mets),1);
0316 
0317 %Create the new rxnGene matrix, get back to this later
0318 model.rxnGeneMat=sparse(numel(model.rxns),numel(model.genes));
0319 
0320 end

Generated by m2html © 2005