Home > struct_conversion > ravenCobraWrapper.m

ravenCobraWrapper

PURPOSE ^

ravenCobraWrapper

SYNOPSIS ^

function newModel=ravenCobraWrapper(model)

DESCRIPTION ^

 ravenCobraWrapper
   Converts between RAVEN and COBRA structures

   Input: model          a RAVEN/COBRA-compatible model structure

   Ouput: newModel       a COBRA/RAVEN-compatible model structure
   
   This function is a bidirectional tool to convert between RAVEN and
   COBRA structures. It recognises COBRA structure by checking field
   'rules' existense, which is only found in COBRA Toolbox structure. If
   the COBRA model also has a grRules field, then this will be used
   instead of parsing the rules field.

   NOTE: During RAVEN -> COBRA -> RAVEN conversion cycle the following
   fields are lost: annotation, compOutside, compMiriams, rxnComps,
   geneComps, unconstrained. Boundary metabolites are lost, because COBRA
   structure does not involve boundary metabolites, so they are removed
   using simplifyModel before RAVEN -> COBRA conversion. The field 'rev'
   is also partially lost, but during COBRA -> RAVEN conversion it's
   reconstructed based on lower bound reaction values

   NOTE: During COBRA -> RAVEN -> COBRA conversion cycle the following
   fields are lost: geneEntrezID, modelVersion, proteins

   NOTE: The information about mandatory RAVEN fields was taken from
   checkModelStruct function, whereas the corresponding information about
   COBRA fields was fetched from verifyModel function

 Usage: newModel=ravenCobraWrapper(model)

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SUBFUNCTIONS ^

SOURCE CODE ^

0001 function newModel=ravenCobraWrapper(model)
0002 % ravenCobraWrapper
0003 %   Converts between RAVEN and COBRA structures
0004 %
0005 %   Input: model          a RAVEN/COBRA-compatible model structure
0006 %
0007 %   Ouput: newModel       a COBRA/RAVEN-compatible model structure
0008 %
0009 %   This function is a bidirectional tool to convert between RAVEN and
0010 %   COBRA structures. It recognises COBRA structure by checking field
0011 %   'rules' existense, which is only found in COBRA Toolbox structure. If
0012 %   the COBRA model also has a grRules field, then this will be used
0013 %   instead of parsing the rules field.
0014 %
0015 %   NOTE: During RAVEN -> COBRA -> RAVEN conversion cycle the following
0016 %   fields are lost: annotation, compOutside, compMiriams, rxnComps,
0017 %   geneComps, unconstrained. Boundary metabolites are lost, because COBRA
0018 %   structure does not involve boundary metabolites, so they are removed
0019 %   using simplifyModel before RAVEN -> COBRA conversion. The field 'rev'
0020 %   is also partially lost, but during COBRA -> RAVEN conversion it's
0021 %   reconstructed based on lower bound reaction values
0022 %
0023 %   NOTE: During COBRA -> RAVEN -> COBRA conversion cycle the following
0024 %   fields are lost: geneEntrezID, modelVersion, proteins
0025 %
0026 %   NOTE: The information about mandatory RAVEN fields was taken from
0027 %   checkModelStruct function, whereas the corresponding information about
0028 %   COBRA fields was fetched from verifyModel function
0029 %
0030 % Usage: newModel=ravenCobraWrapper(model)
0031 
0032 if isfield(model,'rules')
0033     isRaven=false;
0034 else
0035     isRaven=true;
0036 end
0037 
0038 ravenPath=findRAVENroot();
0039 
0040 % Load COBRA field information
0041 fid             = fopen(fullfile(ravenPath,'struct_conversion','COBRA_structure_fields.csv')); % Taken from https://github.com/opencobra/cobratoolbox/blob/develop/src/base/io/definitions/COBRA_structure_fields.csv
0042 fieldFile       = textscan(fid,repmat('%s',1,15),'Delimiter','\t','HeaderLines',1);
0043 dbFields        = ~cellfun(@isempty,fieldFile{5}); % Only keep fields with database annotations that should be translated to xxxMiriams
0044 dbFields        = dbFields & ~contains(fieldFile{1},{'metInChIString','metKEGGID','metPubChemID','rxnECNumbers'});
0045 COBRAnamespace  = fieldFile{5}(dbFields);
0046 COBRAnamespace  = regexprep(COBRAnamespace,';.*',''); % Only keep first suggested namespace
0047 COBRAfields     = fieldFile{1}(dbFields);
0048 fclose(fid);
0049 
0050 % Load conversion between additional COBRA fields and namespaces:
0051 fid             = fopen(fullfile(ravenPath,'struct_conversion','cobraNamespaces.csv'));
0052 fieldFile       = textscan(fid,'%s %s','Delimiter',',','HeaderLines',0);
0053 COBRAfields     = [COBRAfields; fieldFile{1}];
0054 COBRAnamespace  = [COBRAnamespace; fieldFile{2}];
0055 rxnCOBRAfields  = COBRAfields(startsWith(COBRAfields,'rxn'));
0056 rxnNamespaces   = COBRAnamespace(startsWith(COBRAfields,'rxn'));
0057 metCOBRAfields  = COBRAfields(startsWith(COBRAfields,'met'));
0058 metNamespaces   = COBRAnamespace(startsWith(COBRAfields,'met'));
0059 geneCOBRAfields = COBRAfields(startsWith(COBRAfields,'gene'));
0060 geneNamespaces  = COBRAnamespace(startsWith(COBRAfields,'gene'));
0061 fclose(fid);
0062 
0063 if isRaven
0064     %Firstly remove boundary metabolites
0065     model=simplifyModel(model);
0066 end
0067 
0068 % Keep fields that have identical names and content
0069 newModel.S=model.S;
0070 newModel.lb=model.lb;
0071 newModel.ub=model.ub;
0072 if isfield(model,'c')
0073     newModel.c=model.c;
0074 else
0075     newModel.c=zeros(numel(model.rxns),1);
0076 end
0077 newModel.rxns=model.rxns;
0078 optFields = {'rxnNames','rxnNotes','rxnConfidenceScores','rxnDeltaG',...
0079     'rxnGeneMat','rev','subSystems','comps','compNames','metCharges',...
0080     'metDeltaG','metFormulas','metNotes','metSmiles','genes','proteins'};
0081 for i=1:length(optFields)
0082     if isfield(model,optFields{i})
0083         newModel.(optFields{i})=model.(optFields{i});
0084     end
0085 end
0086     
0087 % Convert unique fields
0088 if isRaven
0089     fprintf('Converting RAVEN structure to COBRA..\n');
0090     %Convert from RAVEN to COBRA structure
0091     
0092     %Mandatory COBRA fields
0093     newModel.rxns=model.rxns;
0094     if all(~cellfun(@isempty,regexp(model.mets,'\[[^\]]+\]$')))
0095         newModel.mets=model.mets;
0096     else
0097         %Check if model has compartment info as "met_c" suffix in all metabolites:
0098         BiGGformat = false(size(model.mets));
0099         for i=1:numel(model.comps)
0100             compPos=model.metComps==i;
0101             BiGGformat(compPos)=~cellfun(@isempty,regexp(model.mets(compPos),['_' model.comps{i} '$']));
0102         end
0103         if all(BiGGformat)
0104             newModel.mets=model.mets;
0105             for i=1:numel(model.comps)
0106                 newModel.mets=regexprep(newModel.mets,['_' model.comps{i} '$'],['[' model.comps{i} ']']);
0107             end
0108         else
0109             newModel.mets=strcat(model.mets,'[',model.comps(model.metComps),']');
0110         end
0111     end
0112 
0113     %b, csense, osenseStr, genes, rules are also mandatory, but defined
0114     %later to match the order of fields
0115     
0116     %Optional COBRA fields
0117     if isfield(model,'id')
0118         newModel.modelID=model.id;
0119     end
0120     if isfield(model,'name')
0121         newModel.modelName=model.name;
0122     end
0123     if isfield(model,'eccodes')
0124         newModel.rxnECNumbers=model.eccodes;
0125     end
0126     if isfield(model,'rxnMiriams')
0127         [miriams,extractedMiriamNames]=extractMiriam(model.rxnMiriams);
0128         for i = 1:length(rxnCOBRAfields)
0129             j=ismember(extractedMiriamNames,rxnNamespaces{i});
0130             if any(j)
0131                 eval(['newModel.' rxnCOBRAfields{i} ' = miriams(:,j);'])
0132             end
0133         end
0134     end
0135     if isfield(model,'rxnReferences') % Concatenate model.rxnReferences to those extracted from model.rxnMiriams
0136         if isfield(newModel,'rxnReferences')
0137             newModel.rxnReferences = strcat(newModel.rxnReferences,{'; '},model.rxnReferences);
0138             newModel.rxnReferences = regexprep(newModel.rxnReferences,'^; $','');
0139         else
0140             newModel.rxnReferences = model.rxnReferences;
0141         end
0142     end
0143     if isfield(model,'metNames')
0144         newModel.metNames=strcat(model.metNames,' [',model.compNames(model.metComps),']');
0145     end
0146     if isfield(model,'metMiriams')
0147         [miriams,extractedMiriamNames]=extractMiriam(model.metMiriams);
0148         %Shorten miriam names for KEGG and PubChem. These shorter names
0149         %will be used later to concatenate KEGG COMPOUND/GLYCAN and PubChem
0150         %Compound/Substance, into corresponding COBRA model fields
0151         extractedMiriamNames=regexprep(extractedMiriamNames,'^kegg\..+','kegg');
0152         extractedMiriamNames=regexprep(extractedMiriamNames,'^pubchem\..+','pubchem');
0153         i=ismember(extractedMiriamNames,'kegg');
0154         if any(i) % Combine KEGG compounds and glycans
0155             for j=1:length(i)
0156                 if i(j) && isfield(newModel,'metKEGGID')~=1
0157                     newModel.metKEGGID=miriams(:,j);
0158                 elseif i(j)
0159                     newModel.metKEGGID=strcat(newModel.metKEGGID,';',miriams(:,j));
0160                 end
0161             end
0162             newModel.metKEGGID=regexprep(newModel.metKEGGID,'^;|;$','');
0163         end
0164         i=ismember(extractedMiriamNames,'pubchem');
0165         if any(i) % Combine Pubchem compounds and substances
0166             for j=1:length(i)
0167                 if i(j) && isfield(newModel,'metPubChemID')~=1
0168                     newModel.metPubChemID=miriams(:,j);
0169                 elseif i(j)
0170                     newModel.metPubChemID=strcat(newModel.metPubChemID,';',miriams(:,j));
0171                 end
0172             end
0173             newModel.metPubChemID=regexprep(newModel.metPubChemID,'^;|;$','');
0174         end
0175         %All other Miriams can be directly parsed with no modifications:
0176         for i = 1:length(metCOBRAfields)
0177             j=ismember(extractedMiriamNames,metNamespaces{i});
0178             if any(j)
0179                 eval(['newModel.' metCOBRAfields{i} ' = miriams(:,j);'])
0180             end
0181         end
0182     end
0183     if isfield(model,'inchis')
0184         newModel.metInChIString=regexprep(strcat('InChI=', model.inchis),'^InChI=$','');
0185     end
0186     newModel.b=zeros(numel(model.mets),1);
0187     newModel.csense=repmat('E',size(model.mets));
0188     if isfield(model,'geneMiriams')
0189         [miriams,extractedMiriamNames]=extractMiriam(model.geneMiriams);
0190         for i = 1:length(geneCOBRAfields)
0191             j=ismember(extractedMiriamNames,geneNamespaces{i});
0192             if any(j)
0193                 eval(['newModel.' geneCOBRAfields{i} ' = miriams(:,j);'])
0194             end
0195         end
0196     end
0197     if isfield(model,'geneShortNames')
0198         newModel.geneNames=model.geneShortNames;
0199     end
0200     if isfield(model,'genes')
0201         newModel.rules=grrulesToRules(model);
0202     else
0203         fprintf('WARNING: no genes detected. The model therefore may not be exportable to SBML file with writeCbModel\n');
0204     end
0205     newModel.osenseStr='max';
0206 else
0207     fprintf('Converting COBRA structure to RAVEN..\n');
0208     %Convert from COBRA to RAVEN structure
0209     
0210     %Mandatory RAVEN fields
0211     newModel.mets=model.mets;
0212     if ~isfield(model,'comps')
0213         %Since 'comps' field is not mandatory in COBRA, it may be required
0214         %to obtain the non-redundant list of comps from metabolite ids, if
0215         %'comps' field is not available
0216         newModel.comps = unique(regexprep(model.mets,'.*\[([^\]]+)\]$','$1'));
0217         newModel.compNames = newModel.comps;
0218     end
0219     for i=1:numel(newModel.comps)
0220         newModel.mets=regexprep(newModel.mets,['\[', newModel.comps{i}, '\]$'],'');
0221         newModel.mets=regexprep(newModel.mets,['\[', newModel.compNames{i}, '\]$'],'');
0222     end
0223     
0224     %In some cases (e.g. any model that uses BiGG ids as main ids), there
0225     %may be overlapping mets due to removal of compartment info. To avoid
0226     %this, we change compartments from e.g. [c] into _c
0227     if numel(unique(newModel.mets))~=numel(model.mets)
0228         newModel.mets=model.mets;
0229         for i=1:numel(newModel.comps)
0230             newModel.mets=regexprep(newModel.mets,['\[' newModel.comps{i} '\]$'],['_' newModel.comps{i}]);
0231         end
0232     end
0233     %Since COBRA no longer contains rev field it is assumed that rxn is
0234     %reversible if its lower bound is set below zero
0235     if ~isfield(model,'rev')
0236         for i=1:numel(model.rxns)
0237             if model.lb(i)<0
0238                 newModel.rev(i,1)=1;
0239             else
0240                 newModel.rev(i,1)=0;
0241             end
0242         end
0243     end
0244     newModel.b=zeros(numel(model.mets),1);
0245    
0246     %metComps is also mandatory, but defined later to match the order of
0247     %fields
0248     
0249     %Fields 'name' and 'id' are also considered as mandatory, but
0250     %these are added to the model during exportModel/exportToExcelFormat
0251     %anyway, so there is no point to add this information here
0252     
0253     %Optional RAVEN fields
0254     if isfield(model,'modelID')
0255         newModel.id=model.modelID;
0256     end
0257     if isfield(model,'modelName')
0258         newModel.name=model.modelName;
0259     end
0260     if isfield(model,'rules') && ~isfield(model,'grRules')
0261         model.grRules        = rulesTogrrules(model);
0262     end
0263     if isfield(model,'grRules')
0264         [grRules,rxnGeneMat] = standardizeGrRules(model,true);
0265         newModel.grRules     = grRules;
0266         newModel.rxnGeneMat  = rxnGeneMat;
0267     end
0268     if isfield(model,'rxnECNumbers')
0269         newModel.eccodes=regexprep(model.rxnECNumbers,'EC|EC:','');
0270     end
0271     if any(isfield(model,rxnCOBRAfields))
0272         for i=1:numel(model.rxns)
0273             counter=1;
0274             newModel.rxnMiriams{i,1}=[];
0275             if isfield(model,'rxnReferences')
0276                 if ~isempty(model.rxnReferences{i})
0277                     pmids = model.rxnReferences{i};
0278                     pmids = strsplit(pmids,'; ');
0279                     nonPmids = cellfun(@isempty,regexp(pmids,'^\d+$','match','once'));
0280                     if any(nonPmids) %Not a pubmed id, keep in rxnReferences instead
0281                         newModel.rxnReferences{i,1} = strjoin(pmids(nonPmids),', ');
0282                         pmids(nonPmids)=[];
0283                     end
0284                     for j = 1:length(pmids)
0285                         newModel.rxnMiriams{i,1}.name{counter,1} = 'pubmed';
0286                         newModel.rxnMiriams{i,1}.value{counter,1} = pmids{j};
0287                         counter=counter+1;
0288                     end
0289                 end
0290             end
0291             for j = 2:length(rxnCOBRAfields) %Start from 2, as 1 is rxnReferences
0292                 if isfield(model,rxnCOBRAfields{j})
0293                     rxnAnnotation = eval(['model.' rxnCOBRAfields{j} '{i}']);
0294                     if ~isempty(rxnAnnotation)
0295                         rxnAnnotation = strtrim(strsplit(rxnAnnotation,';'));
0296                         for a=1:length(rxnAnnotation)
0297                             newModel.rxnMiriams{i,1}.name{counter,1} = rxnNamespaces{j};
0298                             newModel.rxnMiriams{i,1}.value{counter,1} = rxnAnnotation{a};
0299                             counter=counter+1;
0300                         end
0301                     end
0302                 end
0303             end
0304         end
0305     end
0306     if isfield(newModel,'rxnReferences')
0307         emptyEntry = cellfun(@isempty,newModel.rxnReferences);
0308         newModel.rxnReferences(emptyEntry)={''};
0309         diffNumel = numel(newModel.rxns) - numel(newModel.rxnReferences);
0310         if diffNumel > 0
0311             newModel.rxnReferences(end+1:end+diffNumel) = {''};
0312         end
0313     end
0314     if any(isfield(model,geneCOBRAfields))
0315         for i=1:numel(model.genes)
0316             counter=1;
0317             newModel.geneMiriams{i,1}=[];
0318             for j = 1:length(geneCOBRAfields)
0319                 if isfield(model,geneCOBRAfields{j})
0320                     geneAnnotation = eval(['model.' geneCOBRAfields{j} '{i}']);
0321                     if ~isempty(geneAnnotation)
0322                         geneAnnotation = strtrim(strsplit(geneAnnotation,';'));
0323                         for a=1:length(geneAnnotation)
0324                             newModel.geneMiriams{i,1}.name{counter,1} = geneNamespaces{j};
0325                             newModel.geneMiriams{i,1}.value{counter,1} = geneAnnotation{a};
0326                             counter=counter+1;
0327                         end
0328                     end
0329                 end
0330             end
0331         end
0332     end
0333     if isfield(model,'geneNames')
0334         newModel.geneShortNames=model.geneNames;
0335     end
0336     newModel.metNames=model.metNames;
0337     for i=1:numel(newModel.comps)
0338         newModel.metNames=regexprep(newModel.metNames,['\[', newModel.comps{i}, '\]$'],'');
0339         newModel.metNames=regexprep(newModel.metNames,['\[', newModel.compNames{i}, '\]$'],'');
0340     end
0341     newModel.metNames=deblank(newModel.metNames);
0342     newModel.metComps=regexprep(model.mets,'^.+\[','');
0343     newModel.metComps=regexprep(newModel.metComps,'\]$','');
0344     [~, newModel.metComps]=ismember(newModel.metComps,newModel.comps);
0345     if isfield(model,'metInChIString')
0346         newModel.inchis=regexprep(model.metInChIString,'^InChI=','');
0347     end
0348     printWarning=false;
0349     if any(isfield(model,[metCOBRAfields;'metKEGGID';'metPubChemID']))
0350         for i=1:numel(model.mets)
0351             counter=1;
0352             newModel.metMiriams{i,1}=[];
0353             if isfield(model,'metKEGGID')
0354                 if ~isempty(model.metKEGGID{i})
0355                     if strcmp(model.metKEGGID{i}(1),'C')
0356                         newModel.metMiriams{i,1}.name{counter,1} = 'kegg.compound';
0357                         newModel.metMiriams{i,1}.value{counter,1} = model.metKEGGID{i};
0358                         counter=counter+1;
0359                     elseif strcmp(model.metKEGGID{i}(1),'G')
0360                         newModel.metMiriams{i,1}.name{counter,1} = 'kegg.glycan';
0361                         newModel.metMiriams{i,1}.value{counter,1} = model.metKEGGID{i};
0362                         counter=counter+1;
0363                     end
0364                 end
0365             end
0366             if isfield(model,'metPubChemID')
0367                 if ~isempty(model.metPubChemID{i})
0368                     if length(model.metPubChemID{i})>3 && strcmp(model.metPubChemID{i}(1:4),'CID:')
0369                         newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.compound';
0370                         newModel.metMiriams{i,1}.value{counter,1} = model.metPubChemID{i};
0371                         counter=counter+1;
0372                     elseif length(model.metPubChemID{i})>3 && strcmp(model.metPubChemID{i}(1:4),'SID:')
0373                         newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.substance';
0374                         newModel.metMiriams{i,1}.value{counter,1} = model.metPubChemID{i};
0375                         counter=counter+1;
0376                     else
0377                         newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.compound';
0378                         newModel.metMiriams{i,1}.value{counter,1} = model.metPubChemID{i};
0379                         counter=counter+1;
0380                         printWarning=true;
0381                     end
0382                 end
0383             end            
0384             for j = 1:length(metCOBRAfields)
0385                 if isfield(model,metCOBRAfields{j})
0386                     metAnnotation = eval(['model.' metCOBRAfields{j} '{i}']);
0387                     if ~isempty(metAnnotation)
0388                         metAnnotation = strtrim(strsplit(metAnnotation,';'));
0389                         for a=1:length(metAnnotation)
0390                             newModel.metMiriams{i,1}.name{counter,1} = metNamespaces{j};
0391                             newModel.metMiriams{i,1}.value{counter,1} = metAnnotation{a};
0392                             counter=counter+1;
0393                         end
0394                     end
0395                 end
0396             end
0397         end
0398     end
0399     if printWarning
0400         fprintf('Could not determine whether PubChemIDs are compounds (CID)\n or substances (SID). All annotated PubChemIDs will therefore \n be assigned as compounds (CID).\n');
0401     end
0402 end
0403 
0404 % Order fields
0405 newModel=standardizeModelFieldOrder(newModel); % Corrects for both RAVEN and COBRA models
0406 end
0407 
0408 function rules=grrulesToRules(model)
0409 %This function just takes grRules, changes all gene names to
0410 %'x(geneNumber)' and also changes 'or' and 'and' relations to corresponding
0411 %symbols
0412 replacingGenes=cell([size(model.genes,1) 1]);
0413 for i=1:numel(replacingGenes)
0414     replacingGenes{i}=strcat('x(',num2str(i),')');
0415 end
0416 rules = strcat({' '},model.grRules,{' '});
0417 for i=1:length(model.genes)
0418     rules=regexprep(rules,[' ' model.genes{i} ' '],[' ' replacingGenes{i} ' ']);
0419     rules=regexprep(rules,['(' model.genes{i} ' '],['(' replacingGenes{i} ' ']);
0420     rules=regexprep(rules,[' ' model.genes{i} ')'],[' ' replacingGenes{i} ')']);
0421 end
0422 rules=regexprep(rules,' and ',' & ');
0423 rules=regexprep(rules,' or ',' | ');
0424 rules=strtrim(rules);
0425 end
0426 
0427 function grRules=rulesTogrrules(model)
0428 %This function takes rules, replaces &/| for and/or, replaces the x(i)
0429 %format with the actual gene ID, and takes out extra whitespace and
0430 %redundant parenthesis introduced by COBRA, to create grRules.
0431 grRules = strrep(model.rules,'&','and');
0432 grRules = strrep(grRules,'|','or');
0433 for i = 1:length(model.genes)
0434     grRules = strrep(grRules,['x(' num2str(i) ')'],model.genes{i});
0435 end
0436 grRules = strrep(grRules,'( ','(');
0437 grRules = strrep(grRules,' )',')');
0438 grRules = regexprep(grRules,'^(',''); %rules that start with a "("
0439 grRules = regexprep(grRules,')$',''); %rules that end with a ")"
0440 end

Generated by m2html © 2005