Home > struct_conversion > ravenCobraWrapper.m

ravenCobraWrapper

PURPOSE ^

ravenCobraWrapper

SYNOPSIS ^

function newModel=ravenCobraWrapper(model)

DESCRIPTION ^

 ravenCobraWrapper
   Converts between RAVEN and COBRA structures

   Input: model          a RAVEN/COBRA-compatible model structure

   Ouput: newModel       a COBRA/RAVEN-compatible model structure
   
   This function is a bidirectional tool to convert between RAVEN and
   COBRA structures. It recognises COBRA structure by checking field
   'rules' existense, which is only found in COBRA Toolbox structure. If
   the COBRA model also has a grRules field, then this will be used
   instead of parsing the rules field.

   NOTE: During RAVEN -> COBRA -> RAVEN conversion cycle the following
   fields are lost: annotation, compOutside, compMiriams, rxnComps,
   geneComps, unconstrained. Boundary metabolites are lost, because COBRA
   structure does not involve boundary metabolites, so they are removed
   using simplifyModel before RAVEN -> COBRA conversion. The field 'rev'
   is also partially lost, but during COBRA -> RAVEN conversion it's
   reconstructed based on lower bound reaction values

   NOTE: During COBRA -> RAVEN -> COBRA conversion cycle the following
   fields are lost: geneEntrezID, modelVersion, proteins

   NOTE: The information about mandatory RAVEN fields was taken from
   checkModelStruct function, whereas the corresponding information about
   COBRA fields was fetched from verifyModel function

 Usage: newModel=ravenCobraWrapper(model)

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SUBFUNCTIONS ^

SOURCE CODE ^

0001 function newModel=ravenCobraWrapper(model)
0002 % ravenCobraWrapper
0003 %   Converts between RAVEN and COBRA structures
0004 %
0005 %   Input: model          a RAVEN/COBRA-compatible model structure
0006 %
0007 %   Ouput: newModel       a COBRA/RAVEN-compatible model structure
0008 %
0009 %   This function is a bidirectional tool to convert between RAVEN and
0010 %   COBRA structures. It recognises COBRA structure by checking field
0011 %   'rules' existense, which is only found in COBRA Toolbox structure. If
0012 %   the COBRA model also has a grRules field, then this will be used
0013 %   instead of parsing the rules field.
0014 %
0015 %   NOTE: During RAVEN -> COBRA -> RAVEN conversion cycle the following
0016 %   fields are lost: annotation, compOutside, compMiriams, rxnComps,
0017 %   geneComps, unconstrained. Boundary metabolites are lost, because COBRA
0018 %   structure does not involve boundary metabolites, so they are removed
0019 %   using simplifyModel before RAVEN -> COBRA conversion. The field 'rev'
0020 %   is also partially lost, but during COBRA -> RAVEN conversion it's
0021 %   reconstructed based on lower bound reaction values
0022 %
0023 %   NOTE: During COBRA -> RAVEN -> COBRA conversion cycle the following
0024 %   fields are lost: geneEntrezID, modelVersion, proteins
0025 %
0026 %   NOTE: The information about mandatory RAVEN fields was taken from
0027 %   checkModelStruct function, whereas the corresponding information about
0028 %   COBRA fields was fetched from verifyModel function
0029 %
0030 % Usage: newModel=ravenCobraWrapper(model)
0031 
0032 if isfield(model,'rules')
0033     isRaven=false;
0034 else
0035     isRaven=true;
0036 end
0037 
0038 ravenPath=findRAVENroot();
0039 
0040 % Load COBRA field information
0041 fid             = fopen(fullfile(ravenPath,'struct_conversion','COBRA_structure_fields.csv')); % Taken from https://github.com/opencobra/cobratoolbox/blob/develop/src/base/io/definitions/COBRA_structure_fields.csv
0042 fieldFile       = textscan(fid,repmat('%s',1,15),'Delimiter','\t','HeaderLines',1);
0043 dbFields        = ~cellfun(@isempty,fieldFile{5}); % Only keep fields with database annotations that should be translated to xxxMiriams
0044 dbFields        = dbFields & ~contains(fieldFile{1},{'metInChIString','metKEGGID','metPubChemID','rxnECNumbers'});
0045 COBRAnamespace  = fieldFile{5}(dbFields);
0046 COBRAnamespace  = regexprep(COBRAnamespace,';.*',''); % Only keep first suggested namespace
0047 COBRAfields     = fieldFile{1}(dbFields);
0048 fclose(fid);
0049 
0050 % Load conversion between additional COBRA fields and namespaces:
0051 fid             = fopen(fullfile(ravenPath,'struct_conversion','cobraNamespaces.csv'));
0052 fieldFile       = textscan(fid,'%s %s','Delimiter',',','HeaderLines',0);
0053 COBRAfields     = [COBRAfields; fieldFile{1}];
0054 COBRAnamespace  = [COBRAnamespace; fieldFile{2}];
0055 rxnCOBRAfields  = COBRAfields(startsWith(COBRAfields,'rxn'));
0056 rxnNamespaces   = COBRAnamespace(startsWith(COBRAfields,'rxn'));
0057 metCOBRAfields  = COBRAfields(startsWith(COBRAfields,'met'));
0058 metNamespaces   = COBRAnamespace(startsWith(COBRAfields,'met'));
0059 geneCOBRAfields = COBRAfields(startsWith(COBRAfields,'gene'));
0060 geneNamespaces  = COBRAnamespace(startsWith(COBRAfields,'gene'));
0061 fclose(fid);
0062 
0063 if isRaven
0064     %Firstly remove boundary metabolites
0065     model=simplifyModel(model);
0066 end
0067 
0068 % Keep fields that have identical names and content
0069 newModel.S=model.S;
0070 newModel.lb=model.lb;
0071 newModel.ub=model.ub;
0072 if isfield(model,'c')
0073     newModel.c=model.c;
0074 else
0075     newModel.c=zeros(numel(model.rxns),1);
0076 end
0077 newModel.rxns=model.rxns;
0078 optFields = {'rxnNames','rxnNotes','rxnConfidenceScores','rxnDeltaG',...
0079     'rxnGeneMat','rev','subSystems','comps','compNames','metCharges',...
0080     'metDeltaG','metFormulas','metNotes','metSmiles','genes','proteins'};
0081 for i=1:length(optFields)
0082     if isfield(model,optFields{i})
0083         newModel.(optFields{i})=model.(optFields{i});
0084     end
0085 end
0086     
0087 % Convert unique fields
0088 if isRaven
0089     fprintf('Converting RAVEN structure to COBRA..\n');
0090     %Convert from RAVEN to COBRA structure
0091     
0092     %Mandatory COBRA fields
0093     newModel.rxns=model.rxns;
0094     if all(~cellfun(@isempty,regexp(model.mets,'\[[^\]]+\]$')))
0095         newModel.mets=model.mets;
0096     else
0097         %Check if model has compartment info as "met_c" suffix in all metabolites:
0098         BiGGformat = false(size(model.mets));
0099         for i=1:numel(model.comps)
0100             compPos=model.metComps==i;
0101             BiGGformat(compPos)=~cellfun(@isempty,regexp(model.mets(compPos),['_' model.comps{i} '$']));
0102         end
0103         if all(BiGGformat)
0104             newModel.mets=model.mets;
0105             for i=1:numel(model.comps)
0106                 newModel.mets=regexprep(newModel.mets,['_' model.comps{i} '$'],['[' model.comps{i} ']']);
0107             end
0108         else
0109             newModel.mets=strcat(model.mets,'[',model.comps(model.metComps),']');
0110         end
0111     end
0112 
0113     %b, csense, osenseStr, genes, rules are also mandatory, but defined
0114     %later to match the order of fields
0115     
0116     %Optional COBRA fields
0117     if isfield(model,'id')
0118         newModel.modelID=model.id;
0119     end
0120     if isfield(model,'name')
0121         newModel.modelName=model.name;
0122     end
0123     if isfield(model,'eccodes')
0124         newModel.rxnECNumbers=model.eccodes;
0125     end
0126     if isfield(model,'rxnMiriams')
0127         [miriams,extractedMiriamNames]=extractMiriam(model.rxnMiriams);
0128         for i = 1:length(rxnCOBRAfields)
0129             j=ismember(extractedMiriamNames,rxnNamespaces{i});
0130             if any(j)
0131                 eval(['newModel.' rxnCOBRAfields{i} ' = miriams(:,j);'])
0132             end
0133         end
0134     end
0135     if isfield(model,'rxnReferences') % Concatenate model.rxnReferences to those extracted from model.rxnMiriams
0136         if isfield(newModel,'rxnReferences')
0137             newModel.rxnReferences = strcat(newModel.rxnReferences,{'; '},model.rxnReferences);
0138             newModel.rxnReferences = regexprep(newModel.rxnReferences,'^; $','');
0139         else
0140             newModel.rxnReferences = model.rxnReferences;
0141         end
0142     end
0143     if isfield(model,'metNames')
0144         newModel.metNames=strcat(model.metNames,' [',model.compNames(model.metComps),']');
0145     end
0146     if isfield(model,'metMiriams')
0147         [miriams,extractedMiriamNames]=extractMiriam(model.metMiriams);
0148         %Shorten miriam names for KEGG and PubChem. These shorter names
0149         %will be used later to concatenate KEGG COMPOUND/GLYCAN and PubChem
0150         %Compound/Substance, into corresponding COBRA model fields
0151         extractedMiriamNames=regexprep(extractedMiriamNames,'^kegg\..+','kegg');
0152         extractedMiriamNames=regexprep(extractedMiriamNames,'^pubchem\..+','pubchem');
0153         i=ismember(extractedMiriamNames,'kegg');
0154         if any(i) % Combine KEGG compounds and glycans
0155             for j=1:length(i)
0156                 if i(j) && isfield(newModel,'metKEGGID')~=1
0157                     newModel.metKEGGID=miriams(:,j);
0158                 elseif i(j)
0159                     newModel.metKEGGID=strcat(newModel.metKEGGID,';',miriams(:,j));
0160                 end
0161             end
0162             newModel.metKEGGID=regexprep(newModel.metKEGGID,'^;|;$','');
0163         end
0164         i=ismember(extractedMiriamNames,'pubchem');
0165         if any(i) % Combine Pubchem compounds and substances
0166             for j=1:length(i)
0167                 if i(j) && isfield(newModel,'metPubChemID')~=1
0168                     newModel.metPubChemID=miriams(:,j);
0169                 elseif i(j)
0170                     newModel.metPubChemID=strcat(newModel.metPubChemID,';',miriams(:,j));
0171                 end
0172             end
0173             newModel.metPubChemID=regexprep(newModel.metPubChemID,'^;|;$','');
0174         end
0175         %All other Miriams can be directly parsed with no modifications:
0176         for i = 1:length(metCOBRAfields)
0177             j=ismember(extractedMiriamNames,metNamespaces{i});
0178             if any(j)
0179                 eval(['newModel.' metCOBRAfields{i} ' = miriams(:,j);'])
0180             end
0181         end
0182     end
0183     if isfield(model,'inchis')
0184         newModel.metInChIString=regexprep(strcat('InChI=', model.inchis),'^InChI=$','');
0185     end
0186     newModel.b=zeros(numel(model.mets),1);
0187     newModel.csense=repmat('E',size(model.mets));
0188     if isfield(model,'geneMiriams')
0189         [~,extractedMiriamNames]=extractMiriam(model.geneMiriams);
0190         for i = 1:length(geneCOBRAfields)
0191             j=ismember(extractedMiriamNames,geneNamespaces{i});
0192             if any(j)
0193                 eval(['newModel.' geneCOBRAfields{i} ' = miriams(:,j);'])
0194             end
0195         end
0196     end
0197     if isfield(model,'geneShortNames')
0198         newModel.geneNames=model.geneShortNames;
0199     end
0200     if isfield(model,'genes')
0201         newModel.rules=grrulesToRules(model);
0202     else
0203         fprintf('WARNING: no genes detected. The model therefore may not be exportable to SBML file with writeCbModel\n');
0204     end
0205     % If needed, normalise subSystems to cell-of-cells format
0206     if isfield(newModel,'subSystems') && ~isempty(newModel.subSystems)
0207         if ~iscell(newModel.subSystems{1})
0208             newModel.subSystems = cellfun(@(x) {x}, newModel.subSystems, 'UniformOutput', false);
0209         end
0210     end
0211 else
0212     fprintf('Converting COBRA structure to RAVEN..\n');
0213     %Convert from COBRA to RAVEN structure
0214 
0215     %Mandatory RAVEN fields
0216     newModel.mets=model.mets;
0217     if ~isfield(model,'comps')
0218         %Since 'comps' field is not mandatory in COBRA, it may be required
0219         %to obtain the non-redundant list of comps from metabolite ids, if
0220         %'comps' field is not available
0221         newModel.comps = unique(regexprep(model.mets,'.*\[([^\]]+)\]$','$1'));
0222         newModel.compNames = newModel.comps;
0223     end
0224     for i=1:numel(newModel.comps)
0225         newModel.mets=regexprep(newModel.mets,['\[', newModel.comps{i}, '\]$'],'');
0226         newModel.mets=regexprep(newModel.mets,['\[', newModel.compNames{i}, '\]$'],'');
0227     end
0228     
0229     %In some cases (e.g. any model that uses BiGG ids as main ids), there
0230     %may be overlapping mets due to removal of compartment info. To avoid
0231     %this, we change compartments from e.g. [c] into _c
0232     if numel(unique(newModel.mets))~=numel(model.mets)
0233         newModel.mets=model.mets;
0234         for i=1:numel(newModel.comps)
0235             newModel.mets=regexprep(newModel.mets,['\[' newModel.comps{i} '\]$'],['_' newModel.comps{i}]);
0236         end
0237     end
0238     %Since COBRA no longer contains rev field it is assumed that rxn is
0239     %reversible if its lower bound is set below zero
0240     if ~isfield(model,'rev')
0241         for i=1:numel(model.rxns)
0242             if model.lb(i)<0
0243                 newModel.rev(i,1)=1;
0244             else
0245                 newModel.rev(i,1)=0;
0246             end
0247         end
0248     end
0249     newModel.b=zeros(numel(model.mets),1);
0250    
0251     %metComps is also mandatory, but defined later to match the order of
0252     %fields
0253     
0254     %Fields 'name' and 'id' are also considered as mandatory, but
0255     %these are added to the model during exportModel/exportToExcelFormat
0256     %anyway, so there is no point to add this information here
0257     
0258     %Optional RAVEN fields
0259     if isfield(model,'modelID')
0260         newModel.id=model.modelID;
0261     end
0262     if isfield(model,'modelName')
0263         newModel.name=model.modelName;
0264     end
0265     if isfield(model,'rules') && ~isfield(model,'grRules')
0266         model.grRules        = rulesTogrrules(model);
0267     end
0268     if isfield(model,'grRules')
0269         [grRules,rxnGeneMat] = standardizeGrRules(model,true);
0270         newModel.grRules     = grRules;
0271         newModel.rxnGeneMat  = rxnGeneMat;
0272     end
0273     if isfield(model,'rxnECNumbers')
0274         newModel.eccodes=regexprep(model.rxnECNumbers,'EC|EC:','');
0275     end
0276     if any(isfield(model,rxnCOBRAfields))
0277         for i=1:numel(model.rxns)
0278             counter=1;
0279             newModel.rxnMiriams{i,1}=[];
0280             if isfield(model,'rxnReferences')
0281                 if ~isempty(model.rxnReferences{i})
0282                     pmids = model.rxnReferences{i};
0283                     pmids = strsplit(pmids,'; ');
0284                     nonPmids = cellfun(@isempty,regexp(pmids,'^\d+$','match','once'));
0285                     if any(nonPmids) %Not a pubmed id, keep in rxnReferences instead
0286                         newModel.rxnReferences{i,1} = strjoin(pmids(nonPmids),', ');
0287                         pmids(nonPmids)=[];
0288                     end
0289                     for j = 1:length(pmids)
0290                         newModel.rxnMiriams{i,1}.name{counter,1} = 'pubmed';
0291                         newModel.rxnMiriams{i,1}.value{counter,1} = pmids{j};
0292                         counter=counter+1;
0293                     end
0294                 end
0295             end
0296             for j = 2:length(rxnCOBRAfields) %Start from 2, as 1 is rxnReferences
0297                 if isfield(model,rxnCOBRAfields{j})
0298                     rxnAnnotation = eval(['model.' rxnCOBRAfields{j} '{i}']);
0299                     if ~isempty(rxnAnnotation)
0300                         rxnAnnotation = strtrim(strsplit(rxnAnnotation,';'));
0301                         for a=1:length(rxnAnnotation)
0302                             newModel.rxnMiriams{i,1}.name{counter,1} = rxnNamespaces{j};
0303                             newModel.rxnMiriams{i,1}.value{counter,1} = rxnAnnotation{a};
0304                             counter=counter+1;
0305                         end
0306                     end
0307                 end
0308             end
0309         end
0310     end
0311     if isfield(newModel,'rxnReferences')
0312         emptyEntry = cellfun(@isempty,newModel.rxnReferences);
0313         newModel.rxnReferences(emptyEntry)={''};
0314         diffNumel = numel(newModel.rxns) - numel(newModel.rxnReferences);
0315         if diffNumel > 0
0316             newModel.rxnReferences(end+1:end+diffNumel) = {''};
0317         end
0318     end
0319     if any(isfield(model,geneCOBRAfields))
0320         for i=1:numel(model.genes)
0321             counter=1;
0322             newModel.geneMiriams{i,1}=[];
0323             for j = 1:length(geneCOBRAfields)
0324                 if isfield(model,geneCOBRAfields{j})
0325                     geneAnnotation = eval(['model.' geneCOBRAfields{j} '{i}']);
0326                     if ~isempty(geneAnnotation)
0327                         geneAnnotation = strtrim(strsplit(geneAnnotation,';'));
0328                         for a=1:length(geneAnnotation)
0329                             newModel.geneMiriams{i,1}.name{counter,1} = geneNamespaces{j};
0330                             newModel.geneMiriams{i,1}.value{counter,1} = geneAnnotation{a};
0331                             counter=counter+1;
0332                         end
0333                     end
0334                 end
0335             end
0336         end
0337     end
0338     if isfield(model,'geneNames')
0339         newModel.geneShortNames=model.geneNames;
0340     end
0341     newModel.metNames=model.metNames;
0342     for i=1:numel(newModel.comps)
0343         newModel.metNames=regexprep(newModel.metNames,['\[', newModel.comps{i}, '\]$'],'');
0344         newModel.metNames=regexprep(newModel.metNames,['\[', newModel.compNames{i}, '\]$'],'');
0345     end
0346     newModel.metNames=deblank(newModel.metNames);
0347     newModel.metComps=regexprep(model.mets,'^.+\[','');
0348     newModel.metComps=regexprep(newModel.metComps,'\]$','');
0349     [~, newModel.metComps]=ismember(newModel.metComps,newModel.comps);
0350     if isfield(model,'metInChIString')
0351         newModel.inchis=regexprep(model.metInChIString,'^InChI=','');
0352     end
0353     printWarning=false;
0354     if any(isfield(model,[metCOBRAfields;'metKEGGID';'metPubChemID']))
0355         for i=1:numel(model.mets)
0356             counter=1;
0357             newModel.metMiriams{i,1}=[];
0358             if isfield(model,'metKEGGID')
0359                 if ~isempty(model.metKEGGID{i})
0360                     if strcmp(model.metKEGGID{i}(1),'C')
0361                         newModel.metMiriams{i,1}.name{counter,1} = 'kegg.compound';
0362                         newModel.metMiriams{i,1}.value{counter,1} = model.metKEGGID{i};
0363                         counter=counter+1;
0364                     elseif strcmp(model.metKEGGID{i}(1),'G')
0365                         newModel.metMiriams{i,1}.name{counter,1} = 'kegg.glycan';
0366                         newModel.metMiriams{i,1}.value{counter,1} = model.metKEGGID{i};
0367                         counter=counter+1;
0368                     end
0369                 end
0370             end
0371             if isfield(model,'metPubChemID')
0372                 if ~isempty(model.metPubChemID{i})
0373                     if length(model.metPubChemID{i})>3 && strcmp(model.metPubChemID{i}(1:4),'CID:')
0374                         newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.compound';
0375                         newModel.metMiriams{i,1}.value{counter,1} = model.metPubChemID{i};
0376                         counter=counter+1;
0377                     elseif length(model.metPubChemID{i})>3 && strcmp(model.metPubChemID{i}(1:4),'SID:')
0378                         newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.substance';
0379                         newModel.metMiriams{i,1}.value{counter,1} = model.metPubChemID{i};
0380                         counter=counter+1;
0381                     else
0382                         newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.compound';
0383                         newModel.metMiriams{i,1}.value{counter,1} = model.metPubChemID{i};
0384                         counter=counter+1;
0385                         printWarning=true;
0386                     end
0387                 end
0388             end            
0389             for j = 1:length(metCOBRAfields)
0390                 if isfield(model,metCOBRAfields{j})
0391                     metAnnotation = eval(['model.' metCOBRAfields{j} '{i}']);
0392                     if ~isempty(metAnnotation)
0393                         metAnnotation = strtrim(strsplit(metAnnotation,';'));
0394                         for a=1:length(metAnnotation)
0395                             newModel.metMiriams{i,1}.name{counter,1} = metNamespaces{j};
0396                             newModel.metMiriams{i,1}.value{counter,1} = metAnnotation{a};
0397                             counter=counter+1;
0398                         end
0399                     end
0400                 end
0401             end
0402         end
0403     end
0404     % Flatten subSystems if every reaction has exactly one subsystem.
0405     if isfield(newModel,'subSystems') && ~isempty(newModel.subSystems)
0406         if all(cellfun(@(x) iscell(x) && isscalar(x), newModel.subSystems))
0407             newModel.subSystems = transpose([newModel.subSystems{:}]);
0408         end
0409     end
0410     if printWarning
0411         fprintf('Could not determine whether PubChemIDs are compounds (CID)\n or substances (SID). All annotated PubChemIDs will therefore \n be assigned as compounds (CID).\n');
0412     end
0413 end
0414 
0415 % Order fields
0416 newModel=standardizeModelFieldOrder(newModel); % Corrects for both RAVEN and COBRA models
0417 end
0418 
0419 function rules=grrulesToRules(model)
0420 %This function just takes grRules, changes all gene names to
0421 %'x(geneNumber)' and also changes 'or' and 'and' relations to corresponding
0422 %symbols
0423 replacingGenes=cell([size(model.genes,1) 1]);
0424 for i=1:numel(replacingGenes)
0425     replacingGenes{i}=strcat('x(',num2str(i),')');
0426 end
0427 rules = strcat({' '},model.grRules,{' '});
0428 for i=1:length(model.genes)
0429     rules=regexprep(rules,[' ' model.genes{i} ' '],[' ' replacingGenes{i} ' ']);
0430     rules=regexprep(rules,['(' model.genes{i} ' '],['(' replacingGenes{i} ' ']);
0431     rules=regexprep(rules,[' ' model.genes{i} ')'],[' ' replacingGenes{i} ')']);
0432 end
0433 rules=regexprep(rules,' and ',' & ');
0434 rules=regexprep(rules,' or ',' | ');
0435 rules=strtrim(rules);
0436 end
0437 
0438 function grRules=rulesTogrrules(model)
0439 %This function takes rules, replaces &/| for and/or, replaces the x(i)
0440 %format with the actual gene ID, and takes out extra whitespace and
0441 %redundant parenthesis introduced by COBRA, to create grRules.
0442 grRules = strrep(model.rules,'&','and');
0443 grRules = strrep(grRules,'|','or');
0444 for i = 1:length(model.genes)
0445     grRules = strrep(grRules,['x(' num2str(i) ')'],model.genes{i});
0446 end
0447 grRules = strrep(grRules,'( ','(');
0448 grRules = strrep(grRules,' )',')');
0449 grRules = regexprep(grRules,'^(',''); %rules that start with a "("
0450 grRules = regexprep(grRules,')$',''); %rules that end with a ")"
0451 end

Generated by m2html © 2005