0001 function newModel=ravenCobraWrapper(model)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033 if isfield(model,'rules')
0034 isRaven=false;
0035 else
0036 isRaven=true;
0037 end
0038
0039 ravenPath=findRAVENroot();
0040
0041
0042 fid = fopen(fullfile(ravenPath,'struct_conversion','COBRA_structure_fields.csv'));
0043 fieldFile = textscan(fid,repmat('%s',1,15),'Delimiter','\t','HeaderLines',1);
0044 dbFields = ~cellfun(@isempty,fieldFile{5});
0045 dbFields = dbFields & ~contains(fieldFile{1},{'metInChIString','metKEGGID','metPubChemID','rxnECNumbers'});
0046 COBRAnamespace = fieldFile{5}(dbFields);
0047 COBRAnamespace = regexprep(COBRAnamespace,';.*','');
0048 COBRAfields = fieldFile{1}(dbFields);
0049 fclose(fid);
0050
0051
0052 fid = fopen(fullfile(ravenPath,'struct_conversion','cobraNamespaces.csv'));
0053 fieldFile = textscan(fid,'%s %s','Delimiter',',','HeaderLines',0);
0054 COBRAfields = [COBRAfields; fieldFile{1}];
0055 COBRAnamespace = [COBRAnamespace; fieldFile{2}];
0056 rxnCOBRAfields = COBRAfields(startsWith(COBRAfields,'rxn'));
0057 rxnNamespaces = COBRAnamespace(startsWith(COBRAfields,'rxn'));
0058 metCOBRAfields = COBRAfields(startsWith(COBRAfields,'met'));
0059 metNamespaces = COBRAnamespace(startsWith(COBRAfields,'met'));
0060 geneCOBRAfields = COBRAfields(startsWith(COBRAfields,'gene'));
0061 geneNamespaces = COBRAnamespace(startsWith(COBRAfields,'gene'));
0062 fclose(fid);
0063
0064 if isRaven
0065
0066 model=simplifyModel(model);
0067 end
0068
0069
0070 newModel.S=model.S;
0071 newModel.lb=model.lb;
0072 newModel.ub=model.ub;
0073 if isfield(model,'c')
0074 newModel.c=model.c;
0075 else
0076 newModel.c=zeros(numel(model.rxns),1);
0077 end
0078 newModel.rxns=model.rxns;
0079 optFields = {'rxnNames','subSystems','rxnNotes','metDeltaG','rxnDeltaG',...
0080 'metFormulas','comps','compNames','metCharges','genes',...
0081 'rxnConfidenceScores','rxnGeneMat','metNotes','rev'};
0082 for i=1:length(optFields)
0083 if isfield(model,optFields{i})
0084 newModel.(optFields{i})=model.(optFields{i});
0085 end
0086 end
0087
0088
0089 if isRaven
0090 fprintf('Converting RAVEN structure to COBRA..\n');
0091
0092
0093
0094 newModel.rxns=model.rxns;
0095 if all(~cellfun(@isempty,regexp(model.mets,'\[[^\]]+\]$')))
0096 newModel.mets=model.mets;
0097 else
0098
0099 BiGGformat = false(size(model.mets));
0100 for i=1:numel(model.comps)
0101 compPos=model.metComps==i;
0102 BiGGformat(compPos)=~cellfun(@isempty,regexp(model.mets(compPos),['_' model.comps{i} '$']));
0103 end
0104 if all(BiGGformat)
0105 newModel.mets=model.mets;
0106 for i=1:numel(model.comps)
0107 newModel.mets=regexprep(newModel.mets,['_' model.comps{i} '$'],['[' model.comps{i} ']']);
0108 end
0109 else
0110 newModel.mets=strcat(model.mets,'[',model.comps(model.metComps),']');
0111 end
0112 end
0113
0114
0115
0116
0117
0118 if isfield(model,'id')
0119 newModel.modelID=model.id;
0120 end
0121 if isfield(model,'name')
0122 newModel.modelName=model.name;
0123 end
0124 if isfield(model,'eccodes')
0125 newModel.rxnECNumbers=model.eccodes;
0126 end
0127 if isfield(model,'rxnMiriams')
0128 [miriams,extractedMiriamNames]=extractMiriam(model.rxnMiriams);
0129 for i = 1:length(rxnCOBRAfields)
0130 j=ismember(extractedMiriamNames,rxnNamespaces{i});
0131 if any(j)
0132 eval(['newModel.' rxnCOBRAfields{i} ' = miriams(:,j);'])
0133 end
0134 end
0135 end
0136 if isfield(model,'rxnReferences')
0137 if isfield(newModel,'rxnReferences')
0138 newModel.rxnReferences = strcat(newModel.rxnReferences,{'; '},model.rxnReferences);
0139 newModel.rxnReferences = regexprep(newModel.rxnReferences,'^; $','');
0140 else
0141 newModel.rxnReferences = model.rxnReferences;
0142 end
0143 end
0144 if isfield(model,'metNames')
0145 newModel.metNames=strcat(model.metNames,' [',model.compNames(model.metComps),']');
0146 end
0147 if isfield(model,'metMiriams')
0148 [miriams,extractedMiriamNames]=extractMiriam(model.metMiriams);
0149
0150
0151
0152 extractedMiriamNames=regexprep(extractedMiriamNames,'^kegg\..+','kegg');
0153 extractedMiriamNames=regexprep(extractedMiriamNames,'^pubchem\..+','pubchem');
0154 i=ismember(extractedMiriamNames,'kegg');
0155 if any(i)
0156 for j=1:length(i)
0157 if i(j) && isfield(newModel,'metKEGGID')~=1
0158 newModel.metKEGGID=miriams(:,j);
0159 elseif i(j)
0160 newModel.metKEGGID=strcat(newModel.metKEGGID,';',miriams(:,j));
0161 end
0162 end
0163 newModel.metKEGGID=regexprep(newModel.metKEGGID,'^;|;$','');
0164 end
0165 i=ismember(extractedMiriamNames,'pubchem');
0166 if any(i)
0167 for j=1:length(i)
0168 if i(j) && isfield(newModel,'metPubChemID')~=1
0169 newModel.metPubChemID=miriams(:,j);
0170 elseif i(j)
0171 newModel.metPubChemID=strcat(newModel.metPubChemID,';',miriams(:,j));
0172 end
0173 end
0174 newModel.metPubChemID=regexprep(newModel.metPubChemID,'^;|;$','');
0175 end
0176
0177 for i = 1:length(metCOBRAfields)
0178 j=ismember(extractedMiriamNames,metNamespaces{i});
0179 if any(j)
0180 eval(['newModel.' metCOBRAfields{i} ' = miriams(:,j);'])
0181 end
0182 end
0183 end
0184 if isfield(model,'inchis')
0185 newModel.metInChIString=regexprep(strcat('InChI=', model.inchis),'^InChI=$','');
0186 end
0187 newModel.b=zeros(numel(model.mets),1);
0188 newModel.csense=repmat('E',size(model.mets));
0189 if isfield(model,'geneMiriams')
0190 [miriams,extractedMiriamNames]=extractMiriam(model.geneMiriams);
0191 for i = 1:length(geneCOBRAfields)
0192 j=ismember(extractedMiriamNames,geneNamespaces{i});
0193 if any(j)
0194 eval(['newModel.' geneCOBRAfields{i} ' = miriams(:,j);'])
0195 end
0196 end
0197 end
0198 if isfield(model,'geneShortNames')
0199 newModel.geneNames=model.geneShortNames;
0200 end
0201 if isfield(model,'genes')
0202 newModel.rules=grrulesToRules(model);
0203 else
0204 fprintf('WARNING: no genes detected. The model therefore may not be exportable to SBML file with writeCbModel\n');
0205 end
0206 newModel.osenseStr='max';
0207 else
0208 fprintf('Converting COBRA structure to RAVEN..\n');
0209
0210
0211
0212 newModel.mets=model.mets;
0213 if ~isfield(model,'comps')
0214
0215
0216
0217 newModel.comps = unique(regexprep(model.mets,'.*\[([^\]]+)\]$','$1'));
0218 newModel.compNames = newModel.comps;
0219 end
0220 for i=1:numel(newModel.comps)
0221 newModel.mets=regexprep(newModel.mets,['\[', newModel.comps{i}, '\]$'],'');
0222 newModel.mets=regexprep(newModel.mets,['\[', newModel.compNames{i}, '\]$'],'');
0223 end
0224
0225
0226
0227
0228 if numel(unique(newModel.mets))~=numel(model.mets)
0229 newModel.mets=model.mets;
0230 for i=1:numel(newModel.comps)
0231 newModel.mets=regexprep(newModel.mets,['\[' newModel.comps{i} '\]$'],['_' newModel.comps{i}]);
0232 end
0233 end
0234
0235
0236 if ~isfield(model,'rev')
0237 for i=1:numel(model.rxns)
0238 if model.lb(i)<0
0239 newModel.rev(i,1)=1;
0240 else
0241 newModel.rev(i,1)=0;
0242 end
0243 end
0244 end
0245 newModel.b=zeros(numel(model.mets),1);
0246
0247
0248
0249
0250
0251
0252
0253
0254
0255 if isfield(model,'modelID')
0256 newModel.id=model.modelID;
0257 end
0258 if isfield(model,'modelName')
0259 newModel.name=model.modelName;
0260 end
0261 if isfield(model,'rules') && ~isfield(model,'grRules')
0262 model.grRules = rulesTogrrules(model);
0263 end
0264 if isfield(model,'grRules')
0265 [grRules,rxnGeneMat] = standardizeGrRules(model,true);
0266 newModel.grRules = grRules;
0267 newModel.rxnGeneMat = rxnGeneMat;
0268 end
0269 if isfield(model,'rxnECNumbers')
0270 newModel.eccodes=regexprep(model.rxnECNumbers,'EC|EC:','');
0271 end
0272 if any(isfield(model,rxnCOBRAfields))
0273 for i=1:numel(model.rxns)
0274 counter=1;
0275 newModel.rxnMiriams{i,1}=[];
0276 if isfield(model,'rxnReferences')
0277 if ~isempty(model.rxnReferences{i})
0278 pmids = model.rxnReferences{i};
0279 pmids = strsplit(pmids,'; ');
0280 nonPmids = cellfun(@isempty,regexp(pmids,'^\d+$','match','once'));
0281 if any(nonPmids)
0282 newModel.rxnReferences{i,1} = strjoin(pmids(nonPmids),', ');
0283 pmids(nonPmids)=[];
0284 end
0285 for j = 1:length(pmids)
0286 newModel.rxnMiriams{i,1}.name{counter,1} = 'pubmed';
0287 newModel.rxnMiriams{i,1}.value{counter,1} = pmids{j};
0288 counter=counter+1;
0289 end
0290 end
0291 end
0292 for j = 2:length(rxnCOBRAfields)
0293 if isfield(model,rxnCOBRAfields{j})
0294 rxnAnnotation = eval(['model.' rxnCOBRAfields{j} '{i}']);
0295 if ~isempty(rxnAnnotation)
0296 rxnAnnotation = strtrim(strsplit(rxnAnnotation,';'));
0297 for a=1:length(rxnAnnotation)
0298 newModel.rxnMiriams{i,1}.name{counter,1} = rxnNamespaces{j};
0299 newModel.rxnMiriams{i,1}.value{counter,1} = rxnAnnotation{a};
0300 counter=counter+1;
0301 end
0302 end
0303 end
0304 end
0305 end
0306 end
0307 if isfield(newModel,'rxnReferences')
0308 emptyEntry = cellfun(@isempty,newModel.rxnReferences);
0309 newModel.rxnReferences(emptyEntry)={''};
0310 end
0311 if any(isfield(model,geneCOBRAfields))
0312 for i=1:numel(model.genes)
0313 counter=1;
0314 newModel.geneMiriams{i,1}=[];
0315 for j = 1:length(geneCOBRAfields)
0316 if isfield(model,geneCOBRAfields{j})
0317 geneAnnotation = eval(['model.' geneCOBRAfields{j} '{i}']);
0318 if ~isempty(geneAnnotation)
0319 geneAnnotation = strtrim(strsplit(geneAnnotation,';'));
0320 for a=1:length(geneAnnotation)
0321 newModel.geneMiriams{i,1}.name{counter,1} = geneNamespaces{j};
0322 newModel.geneMiriams{i,1}.value{counter,1} = geneAnnotation{a};
0323 counter=counter+1;
0324 end
0325 end
0326 end
0327 end
0328 end
0329 end
0330 if isfield(model,'geneNames')
0331 newModel.geneShortNames=model.geneNames;
0332 end
0333 newModel.metNames=model.metNames;
0334 for i=1:numel(newModel.comps)
0335 newModel.metNames=regexprep(newModel.metNames,['\[', newModel.comps{i}, '\]$'],'');
0336 newModel.metNames=regexprep(newModel.metNames,['\[', newModel.compNames{i}, '\]$'],'');
0337 end
0338 newModel.metNames=deblank(newModel.metNames);
0339 newModel.metComps=regexprep(model.mets,'^.+\[','');
0340 newModel.metComps=regexprep(newModel.metComps,'\]$','');
0341 [~, newModel.metComps]=ismember(newModel.metComps,newModel.comps);
0342 if isfield(model,'metInChIString')
0343 newModel.inchis=regexprep(model.metInChIString,'^InChI=','');
0344 end
0345 printWarning=false;
0346 if any(isfield(model,[metCOBRAfields;'metKEGGID';'metPubChemID']))
0347 for i=1:numel(model.mets)
0348 counter=1;
0349 newModel.metMiriams{i,1}=[];
0350 if isfield(model,'metKEGGID')
0351 if ~isempty(model.metKEGGID{i})
0352 if strcmp(model.metKEGGID{i}(1),'C')
0353 newModel.metMiriams{i,1}.name{counter,1} = 'kegg.compound';
0354 newModel.metMiriams{i,1}.value{counter,1} = model.metKEGGID{i};
0355 counter=counter+1;
0356 elseif strcmp(model.metKEGGID{i}(1),'G')
0357 newModel.metMiriams{i,1}.name{counter,1} = 'kegg.glycan';
0358 newModel.metMiriams{i,1}.value{counter,1} = model.metKEGGID{i};
0359 counter=counter+1;
0360 end
0361 end
0362 end
0363 if isfield(model,'metPubChemID')
0364 if ~isempty(model.metPubChemID{i})
0365 if length(model.metPubChemID{i})>3 && strcmp(model.metPubChemID{i}(1:4),'CID:')
0366 newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.compound';
0367 newModel.metMiriams{i,1}.value{counter,1} = model.metPubChemID{i};
0368 counter=counter+1;
0369 elseif length(model.metPubChemID{i})>3 && strcmp(model.metPubChemID{i}(1:4),'SID:')
0370 newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.substance';
0371 newModel.metMiriams{i,1}.value{counter,1} = model.metPubChemID{i};
0372 counter=counter+1;
0373 else
0374 newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.compound';
0375 newModel.metMiriams{i,1}.value{counter,1} = model.metPubChemID{i};
0376 counter=counter+1;
0377 printWarning=true;
0378 end
0379 end
0380 end
0381 for j = 1:length(metCOBRAfields)
0382 if isfield(model,metCOBRAfields{j})
0383 metAnnotation = eval(['model.' metCOBRAfields{j} '{i}']);
0384 if ~isempty(metAnnotation)
0385 metAnnotation = strtrim(strsplit(metAnnotation,';'));
0386 for a=1:length(metAnnotation)
0387 newModel.metMiriams{i,1}.name{counter,1} = metNamespaces{j};
0388 newModel.metMiriams{i,1}.value{counter,1} = metAnnotation{a};
0389 counter=counter+1;
0390 end
0391 end
0392 end
0393 end
0394 end
0395 end
0396 if printWarning
0397 fprintf('Could not determine whether PubChemIDs are compounds (CID)\n or substances (SID). All annotated PubChemIDs will therefore \n be assigned as compounds (CID).\n');
0398 end
0399 end
0400
0401
0402 newModel=standardizeModelFieldOrder(newModel);
0403 end
0404
0405 function rules=grrulesToRules(model)
0406
0407
0408
0409 replacingGenes=cell([size(model.genes,1) 1]);
0410 for i=1:numel(replacingGenes)
0411 replacingGenes{i}=strcat('x(',num2str(i),')');
0412 end
0413 rules = strcat({' '},model.grRules,{' '});
0414 for i=1:length(model.genes)
0415 rules=regexprep(rules,[' ' model.genes{i} ' '],[' ' replacingGenes{i} ' ']);
0416 rules=regexprep(rules,['(' model.genes{i} ' '],['(' replacingGenes{i} ' ']);
0417 rules=regexprep(rules,[' ' model.genes{i} ')'],[' ' replacingGenes{i} ')']);
0418 end
0419 rules=regexprep(rules,' and ',' & ');
0420 rules=regexprep(rules,' or ',' | ');
0421 rules=strtrim(rules);
0422 end
0423
0424 function grRules=rulesTogrrules(model)
0425
0426
0427
0428 grRules = strrep(model.rules,'&','and');
0429 grRules = strrep(grRules,'|','or');
0430 for i = 1:length(model.genes)
0431 grRules = strrep(grRules,['x(' num2str(i) ')'],model.genes{i});
0432 end
0433 grRules = strrep(grRules,'( ','(');
0434 grRules = strrep(grRules,' )',')');
0435 grRules = regexprep(grRules,'^(','');
0436 grRules = regexprep(grRules,')$','');
0437 end