Home > struct_conversion > ravenCobraWrapper.m

ravenCobraWrapper

PURPOSE ^

ravenCobraWrapper

SYNOPSIS ^

function newModel=ravenCobraWrapper(model)

DESCRIPTION ^

 ravenCobraWrapper
   Converts between RAVEN and COBRA structures

   Input: model          a RAVEN/COBRA-compatible model structure

   Ouput: newModel       a COBRA/RAVEN-compatible model structure
   
   This function is a bidirectional tool to convert between RAVEN and
   COBRA structures. It recognises COBRA structure by checking field
   'rules' existense, which is only found in COBRA Toolbox structure. If
   the COBRA model also has a grRules field, then this will be used
   instead of parsing the rules field.

   NOTE: During RAVEN -> COBRA -> RAVEN conversion cycle the following
   fields are lost: annotation, compOutside, compMiriams, rxnComps,
   geneComps, unconstrained. Boundary metabolites are lost, because COBRA
   structure does not involve boundary metabolites, so they are removed
   using simplifyModel before RAVEN -> COBRA conversion. The field 'rev'
   is also partially lost, but during COBRA -> RAVEN conversion it's
   reconstructed based on lower bound reaction values

   NOTE: During COBRA -> RAVEN -> COBRA conversion cycle the following
   fields are lost: geneEntrezID, metSmiles, modelVersion,
   proteinNames, proteins

   NOTE: The information about mandatory RAVEN fields was taken from
   checkModelStruct function, whereas the corresponding information about
   COBRA fields was fetched from verifyModel function

   Usage: newModel=ravenCobraWrapper(model)

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SUBFUNCTIONS ^

SOURCE CODE ^

0001 function newModel=ravenCobraWrapper(model)
0002 % ravenCobraWrapper
0003 %   Converts between RAVEN and COBRA structures
0004 %
0005 %   Input: model          a RAVEN/COBRA-compatible model structure
0006 %
0007 %   Ouput: newModel       a COBRA/RAVEN-compatible model structure
0008 %
0009 %   This function is a bidirectional tool to convert between RAVEN and
0010 %   COBRA structures. It recognises COBRA structure by checking field
0011 %   'rules' existense, which is only found in COBRA Toolbox structure. If
0012 %   the COBRA model also has a grRules field, then this will be used
0013 %   instead of parsing the rules field.
0014 %
0015 %   NOTE: During RAVEN -> COBRA -> RAVEN conversion cycle the following
0016 %   fields are lost: annotation, compOutside, compMiriams, rxnComps,
0017 %   geneComps, unconstrained. Boundary metabolites are lost, because COBRA
0018 %   structure does not involve boundary metabolites, so they are removed
0019 %   using simplifyModel before RAVEN -> COBRA conversion. The field 'rev'
0020 %   is also partially lost, but during COBRA -> RAVEN conversion it's
0021 %   reconstructed based on lower bound reaction values
0022 %
0023 %   NOTE: During COBRA -> RAVEN -> COBRA conversion cycle the following
0024 %   fields are lost: geneEntrezID, metSmiles, modelVersion,
0025 %   proteinNames, proteins
0026 %
0027 %   NOTE: The information about mandatory RAVEN fields was taken from
0028 %   checkModelStruct function, whereas the corresponding information about
0029 %   COBRA fields was fetched from verifyModel function
0030 %
0031 %   Usage: newModel=ravenCobraWrapper(model)
0032 
0033 if isfield(model,'rules')
0034     isRaven=false;
0035 else
0036     isRaven=true;
0037 end
0038 
0039 ravenPath=findRAVENroot();
0040 
0041 % Load COBRA field information
0042 fid             = fopen(fullfile(ravenPath,'struct_conversion','COBRA_structure_fields.csv')); % Taken from https://github.com/opencobra/cobratoolbox/blob/develop/src/base/io/definitions/COBRA_structure_fields.csv
0043 fieldFile       = textscan(fid,repmat('%s',1,15),'Delimiter','\t','HeaderLines',1);
0044 dbFields        = ~cellfun(@isempty,fieldFile{5}); % Only keep fields with database annotations that should be translated to xxxMiriams
0045 dbFields        = dbFields & ~contains(fieldFile{1},{'metInChIString','metKEGGID','metPubChemID','rxnECNumbers'});
0046 COBRAnamespace  = fieldFile{5}(dbFields);
0047 COBRAnamespace  = regexprep(COBRAnamespace,';.*',''); % Only keep first suggested namespace
0048 COBRAfields     = fieldFile{1}(dbFields);
0049 fclose(fid);
0050 
0051 % Load conversion between additional COBRA fields and namespaces:
0052 fid             = fopen(fullfile(ravenPath,'struct_conversion','cobraNamespaces.csv'));
0053 fieldFile       = textscan(fid,'%s %s','Delimiter',',','HeaderLines',0);
0054 COBRAfields     = [COBRAfields; fieldFile{1}];
0055 COBRAnamespace  = [COBRAnamespace; fieldFile{2}];
0056 rxnCOBRAfields  = COBRAfields(startsWith(COBRAfields,'rxn'));
0057 rxnNamespaces   = COBRAnamespace(startsWith(COBRAfields,'rxn'));
0058 metCOBRAfields  = COBRAfields(startsWith(COBRAfields,'met'));
0059 metNamespaces   = COBRAnamespace(startsWith(COBRAfields,'met'));
0060 geneCOBRAfields = COBRAfields(startsWith(COBRAfields,'gene'));
0061 geneNamespaces  = COBRAnamespace(startsWith(COBRAfields,'gene'));
0062 fclose(fid);
0063 
0064 if isRaven
0065     %Firstly remove boundary metabolites
0066     model=simplifyModel(model);
0067 end
0068 
0069 % Keep fields that have identical names and content
0070 newModel.S=model.S;
0071 newModel.lb=model.lb;
0072 newModel.ub=model.ub;
0073 if isfield(model,'c')
0074     newModel.c=model.c;
0075 else
0076     newModel.c=zeros(numel(model.rxns),1);
0077 end
0078 newModel.rxns=model.rxns;
0079 optFields = {'rxnNames','subSystems','rxnNotes','metDeltaG','rxnDeltaG',...
0080     'metFormulas','comps','compNames','metCharges','genes',...
0081     'rxnConfidenceScores','rxnGeneMat','metNotes','rev'};
0082 for i=1:length(optFields)
0083     if isfield(model,optFields{i})
0084         newModel.(optFields{i})=model.(optFields{i});
0085     end
0086 end
0087     
0088 % Convert unique fields
0089 if isRaven
0090     fprintf('Converting RAVEN structure to COBRA..\n');
0091     %Convert from RAVEN to COBRA structure
0092     
0093     %Mandatory COBRA fields
0094     newModel.rxns=model.rxns;
0095     if all(~cellfun(@isempty,regexp(model.mets,'\[[^\]]+\]$')))
0096         newModel.mets=model.mets;
0097     else
0098         %Check if model has compartment info as "met_c" suffix in all metabolites:
0099         BiGGformat = false(size(model.mets));
0100         for i=1:numel(model.comps)
0101             compPos=model.metComps==i;
0102             BiGGformat(compPos)=~cellfun(@isempty,regexp(model.mets(compPos),['_' model.comps{i} '$']));
0103         end
0104         if all(BiGGformat)
0105             newModel.mets=model.mets;
0106             for i=1:numel(model.comps)
0107                 newModel.mets=regexprep(newModel.mets,['_' model.comps{i} '$'],['[' model.comps{i} ']']);
0108             end
0109         else
0110             newModel.mets=strcat(model.mets,'[',model.comps(model.metComps),']');
0111         end
0112     end
0113 
0114     %b, csense, osenseStr, genes, rules are also mandatory, but defined
0115     %later to match the order of fields
0116     
0117     %Optional COBRA fields
0118     if isfield(model,'id')
0119         newModel.modelID=model.id;
0120     end
0121     if isfield(model,'name')
0122         newModel.modelName=model.name;
0123     end
0124     if isfield(model,'eccodes')
0125         newModel.rxnECNumbers=model.eccodes;
0126     end
0127     if isfield(model,'rxnMiriams')
0128         [miriams,extractedMiriamNames]=extractMiriam(model.rxnMiriams);
0129         for i = 1:length(rxnCOBRAfields)
0130             j=ismember(extractedMiriamNames,rxnNamespaces{i});
0131             if any(j)
0132                 eval(['newModel.' rxnCOBRAfields{i} ' = miriams(:,j);'])
0133             end
0134         end
0135     end
0136     if isfield(model,'rxnReferences') % Concatenate model.rxnReferences to those extracted from model.rxnMiriams
0137         if isfield(newModel,'rxnReferences')
0138             newModel.rxnReferences = strcat(newModel.rxnReferences,{'; '},model.rxnReferences);
0139             newModel.rxnReferences = regexprep(newModel.rxnReferences,'^; $','');
0140         else
0141             newModel.rxnReferences = model.rxnReferences;
0142         end
0143     end
0144     if isfield(model,'metNames')
0145         newModel.metNames=strcat(model.metNames,' [',model.compNames(model.metComps),']');
0146     end
0147     if isfield(model,'metMiriams')
0148         [miriams,extractedMiriamNames]=extractMiriam(model.metMiriams);
0149         %Shorten miriam names for KEGG and PubChem. These shorter names
0150         %will be used later to concatenate KEGG COMPOUND/GLYCAN and PubChem
0151         %Compound/Substance, into corresponding COBRA model fields
0152         extractedMiriamNames=regexprep(extractedMiriamNames,'^kegg\..+','kegg');
0153         extractedMiriamNames=regexprep(extractedMiriamNames,'^pubchem\..+','pubchem');
0154         i=ismember(extractedMiriamNames,'kegg');
0155         if any(i) % Combine KEGG compounds and glycans
0156             for j=1:length(i)
0157                 if i(j) && isfield(newModel,'metKEGGID')~=1
0158                     newModel.metKEGGID=miriams(:,j);
0159                 elseif i(j)
0160                     newModel.metKEGGID=strcat(newModel.metKEGGID,';',miriams(:,j));
0161                 end
0162             end
0163             newModel.metKEGGID=regexprep(newModel.metKEGGID,'^;|;$','');
0164         end
0165         i=ismember(extractedMiriamNames,'pubchem');
0166         if any(i) % Combine Pubchem compounds and substances
0167             for j=1:length(i)
0168                 if i(j) && isfield(newModel,'metPubChemID')~=1
0169                     newModel.metPubChemID=miriams(:,j);
0170                 elseif i(j)
0171                     newModel.metPubChemID=strcat(newModel.metPubChemID,';',miriams(:,j));
0172                 end
0173             end
0174             newModel.metPubChemID=regexprep(newModel.metPubChemID,'^;|;$','');
0175         end
0176         %All other Miriams can be directly parsed with no modifications:
0177         for i = 1:length(metCOBRAfields)
0178             j=ismember(extractedMiriamNames,metNamespaces{i});
0179             if any(j)
0180                 eval(['newModel.' metCOBRAfields{i} ' = miriams(:,j);'])
0181             end
0182         end
0183     end
0184     if isfield(model,'inchis')
0185         newModel.metInChIString=regexprep(strcat('InChI=', model.inchis),'^InChI=$','');
0186     end
0187     newModel.b=zeros(numel(model.mets),1);
0188     newModel.csense=repmat('E',size(model.mets));
0189     if isfield(model,'geneMiriams')
0190         [miriams,extractedMiriamNames]=extractMiriam(model.geneMiriams);
0191         for i = 1:length(geneCOBRAfields)
0192             j=ismember(extractedMiriamNames,geneNamespaces{i});
0193             if any(j)
0194                 eval(['newModel.' geneCOBRAfields{i} ' = miriams(:,j);'])
0195             end
0196         end
0197     end
0198     if isfield(model,'geneShortNames')
0199         newModel.geneNames=model.geneShortNames;
0200     end
0201     if isfield(model,'genes')
0202         newModel.rules=grrulesToRules(model);
0203     else
0204         fprintf('WARNING: no genes detected. The model therefore may not be exportable to SBML file with writeCbModel\n');
0205     end
0206     newModel.osenseStr='max';
0207 else
0208     fprintf('Converting COBRA structure to RAVEN..\n');
0209     %Convert from COBRA to RAVEN structure
0210     
0211     %Mandatory RAVEN fields
0212     newModel.mets=model.mets;
0213     if ~isfield(model,'comps')
0214         %Since 'comps' field is not mandatory in COBRA, it may be required
0215         %to obtain the non-redundant list of comps from metabolite ids, if
0216         %'comps' field is not available
0217         newModel.comps = unique(regexprep(model.mets,'.*\[([^\]]+)\]$','$1'));
0218         newModel.compNames = newModel.comps;
0219     end
0220     for i=1:numel(newModel.comps)
0221         newModel.mets=regexprep(newModel.mets,['\[', newModel.comps{i}, '\]$'],'');
0222         newModel.mets=regexprep(newModel.mets,['\[', newModel.compNames{i}, '\]$'],'');
0223     end
0224     
0225     %In some cases (e.g. any model that uses BiGG ids as main ids), there
0226     %may be overlapping mets due to removal of compartment info. To avoid
0227     %this, we change compartments from e.g. [c] into _c
0228     if numel(unique(newModel.mets))~=numel(model.mets)
0229         newModel.mets=model.mets;
0230         for i=1:numel(newModel.comps)
0231             newModel.mets=regexprep(newModel.mets,['\[' newModel.comps{i} '\]$'],['_' newModel.comps{i}]);
0232         end
0233     end
0234     %Since COBRA no longer contains rev field it is assumed that rxn is
0235     %reversible if its lower bound is set below zero
0236     if ~isfield(model,'rev')
0237         for i=1:numel(model.rxns)
0238             if model.lb(i)<0
0239                 newModel.rev(i,1)=1;
0240             else
0241                 newModel.rev(i,1)=0;
0242             end
0243         end
0244     end
0245     newModel.b=zeros(numel(model.mets),1);
0246    
0247     %metComps is also mandatory, but defined later to match the order of
0248     %fields
0249     
0250     %Fields 'name' and 'id' are also considered as mandatory, but
0251     %these are added to the model during exportModel/exportToExcelFormat
0252     %anyway, so there is no point to add this information here
0253     
0254     %Optional RAVEN fields
0255     if isfield(model,'modelID')
0256         newModel.id=model.modelID;
0257     end
0258     if isfield(model,'modelName')
0259         newModel.name=model.modelName;
0260     end
0261     if isfield(model,'rules') && ~isfield(model,'grRules')
0262         model.grRules        = rulesTogrrules(model);
0263     end
0264     if isfield(model,'grRules')
0265         [grRules,rxnGeneMat] = standardizeGrRules(model,true);
0266         newModel.grRules     = grRules;
0267         newModel.rxnGeneMat  = rxnGeneMat;
0268     end
0269     if isfield(model,'rxnECNumbers')
0270         newModel.eccodes=regexprep(model.rxnECNumbers,'EC|EC:','');
0271     end
0272     if any(isfield(model,rxnCOBRAfields))
0273         for i=1:numel(model.rxns)
0274             counter=1;
0275             newModel.rxnMiriams{i,1}=[];
0276             if isfield(model,'rxnReferences')
0277                 if ~isempty(model.rxnReferences{i})
0278                     pmids = model.rxnReferences{i};
0279                     pmids = strsplit(pmids,'; ');
0280                     nonPmids = cellfun(@isempty,regexp(pmids,'^\d+$','match','once'));
0281                     if any(nonPmids) %Not a pubmed id, keep in rxnReferences instead
0282                         newModel.rxnReferences{i,1} = strjoin(pmids(nonPmids),', ');
0283                         pmids(nonPmids)=[];
0284                     end
0285                     for j = 1:length(pmids)
0286                         newModel.rxnMiriams{i,1}.name{counter,1} = 'pubmed';
0287                         newModel.rxnMiriams{i,1}.value{counter,1} = pmids{j};
0288                         counter=counter+1;
0289                     end
0290                 end
0291             end
0292             for j = 2:length(rxnCOBRAfields) %Start from 2, as 1 is rxnReferences
0293                 if isfield(model,rxnCOBRAfields{j})
0294                     rxnAnnotation = eval(['model.' rxnCOBRAfields{j} '{i}']);
0295                     if ~isempty(rxnAnnotation)
0296                         rxnAnnotation = strtrim(strsplit(rxnAnnotation,';'));
0297                         for a=1:length(rxnAnnotation)
0298                             newModel.rxnMiriams{i,1}.name{counter,1} = rxnNamespaces{j};
0299                             newModel.rxnMiriams{i,1}.value{counter,1} = rxnAnnotation{a};
0300                             counter=counter+1;
0301                         end
0302                     end
0303                 end
0304             end
0305         end
0306     end
0307     if isfield(newModel,'rxnReferences')
0308         emptyEntry = cellfun(@isempty,newModel.rxnReferences);
0309         newModel.rxnReferences(emptyEntry)={''};
0310     end
0311     if any(isfield(model,geneCOBRAfields))
0312         for i=1:numel(model.genes)
0313             counter=1;
0314             newModel.geneMiriams{i,1}=[];
0315             for j = 1:length(geneCOBRAfields)
0316                 if isfield(model,geneCOBRAfields{j})
0317                     geneAnnotation = eval(['model.' geneCOBRAfields{j} '{i}']);
0318                     if ~isempty(geneAnnotation)
0319                         geneAnnotation = strtrim(strsplit(geneAnnotation,';'));
0320                         for a=1:length(geneAnnotation)
0321                             newModel.geneMiriams{i,1}.name{counter,1} = geneNamespaces{j};
0322                             newModel.geneMiriams{i,1}.value{counter,1} = geneAnnotation{a};
0323                             counter=counter+1;
0324                         end
0325                     end
0326                 end
0327             end
0328         end
0329     end
0330     if isfield(model,'geneNames')
0331         newModel.geneShortNames=model.geneNames;
0332     end
0333     newModel.metNames=model.metNames;
0334     for i=1:numel(newModel.comps)
0335         newModel.metNames=regexprep(newModel.metNames,['\[', newModel.comps{i}, '\]$'],'');
0336         newModel.metNames=regexprep(newModel.metNames,['\[', newModel.compNames{i}, '\]$'],'');
0337     end
0338     newModel.metNames=deblank(newModel.metNames);
0339     newModel.metComps=regexprep(model.mets,'^.+\[','');
0340     newModel.metComps=regexprep(newModel.metComps,'\]$','');
0341     [~, newModel.metComps]=ismember(newModel.metComps,newModel.comps);
0342     if isfield(model,'metInChIString')
0343         newModel.inchis=regexprep(model.metInChIString,'^InChI=','');
0344     end
0345     printWarning=false;
0346     if any(isfield(model,[metCOBRAfields;'metKEGGID';'metPubChemID']))
0347         for i=1:numel(model.mets)
0348             counter=1;
0349             newModel.metMiriams{i,1}=[];
0350             if isfield(model,'metKEGGID')
0351                 if ~isempty(model.metKEGGID{i})
0352                     if strcmp(model.metKEGGID{i}(1),'C')
0353                         newModel.metMiriams{i,1}.name{counter,1} = 'kegg.compound';
0354                         newModel.metMiriams{i,1}.value{counter,1} = model.metKEGGID{i};
0355                         counter=counter+1;
0356                     elseif strcmp(model.metKEGGID{i}(1),'G')
0357                         newModel.metMiriams{i,1}.name{counter,1} = 'kegg.glycan';
0358                         newModel.metMiriams{i,1}.value{counter,1} = model.metKEGGID{i};
0359                         counter=counter+1;
0360                     end
0361                 end
0362             end
0363             if isfield(model,'metPubChemID')
0364                 if ~isempty(model.metPubChemID{i})
0365                     if length(model.metPubChemID{i})>3 && strcmp(model.metPubChemID{i}(1:4),'CID:')
0366                         newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.compound';
0367                         newModel.metMiriams{i,1}.value{counter,1} = model.metPubChemID{i};
0368                         counter=counter+1;
0369                     elseif length(model.metPubChemID{i})>3 && strcmp(model.metPubChemID{i}(1:4),'SID:')
0370                         newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.substance';
0371                         newModel.metMiriams{i,1}.value{counter,1} = model.metPubChemID{i};
0372                         counter=counter+1;
0373                     else
0374                         newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.compound';
0375                         newModel.metMiriams{i,1}.value{counter,1} = model.metPubChemID{i};
0376                         counter=counter+1;
0377                         printWarning=true;
0378                     end
0379                 end
0380             end            
0381             for j = 1:length(metCOBRAfields)
0382                 if isfield(model,metCOBRAfields{j})
0383                     metAnnotation = eval(['model.' metCOBRAfields{j} '{i}']);
0384                     if ~isempty(metAnnotation)
0385                         metAnnotation = strtrim(strsplit(metAnnotation,';'));
0386                         for a=1:length(metAnnotation)
0387                             newModel.metMiriams{i,1}.name{counter,1} = metNamespaces{j};
0388                             newModel.metMiriams{i,1}.value{counter,1} = metAnnotation{a};
0389                             counter=counter+1;
0390                         end
0391                     end
0392                 end
0393             end
0394         end
0395     end
0396     if printWarning
0397         fprintf('Could not determine whether PubChemIDs are compounds (CID)\n or substances (SID). All annotated PubChemIDs will therefore \n be assigned as compounds (CID).\n');
0398     end
0399 end
0400 
0401 % Order fields
0402 newModel=standardizeModelFieldOrder(newModel); % Corrects for both RAVEN and COBRA models
0403 end
0404 
0405 function rules=grrulesToRules(model)
0406 %This function just takes grRules, changes all gene names to
0407 %'x(geneNumber)' and also changes 'or' and 'and' relations to corresponding
0408 %symbols
0409 replacingGenes=cell([size(model.genes,1) 1]);
0410 for i=1:numel(replacingGenes)
0411     replacingGenes{i}=strcat('x(',num2str(i),')');
0412 end
0413 rules = strcat({' '},model.grRules,{' '});
0414 for i=1:length(model.genes)
0415     rules=regexprep(rules,[' ' model.genes{i} ' '],[' ' replacingGenes{i} ' ']);
0416     rules=regexprep(rules,['(' model.genes{i} ' '],['(' replacingGenes{i} ' ']);
0417     rules=regexprep(rules,[' ' model.genes{i} ')'],[' ' replacingGenes{i} ')']);
0418 end
0419 rules=regexprep(rules,' and ',' & ');
0420 rules=regexprep(rules,' or ',' | ');
0421 rules=strtrim(rules);
0422 end
0423 
0424 function grRules=rulesTogrrules(model)
0425 %This function takes rules, replaces &/| for and/or, replaces the x(i)
0426 %format with the actual gene ID, and takes out extra whitespace and
0427 %redundant parenthesis introduced by COBRA, to create grRules.
0428 grRules = strrep(model.rules,'&','and');
0429 grRules = strrep(grRules,'|','or');
0430 for i = 1:length(model.genes)
0431     grRules = strrep(grRules,['x(' num2str(i) ')'],model.genes{i});
0432 end
0433 grRules = strrep(grRules,'( ','(');
0434 grRules = strrep(grRules,' )',')');
0435 grRules = regexprep(grRules,'^(',''); %rules that start with a "("
0436 grRules = regexprep(grRules,')$',''); %rules that end with a ")"
0437 end

Generated by m2html © 2005