0001 function newModel=ravenCobraWrapper(model)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032 if isfield(model,'rules')
0033 isRaven=false;
0034 else
0035 isRaven=true;
0036 end
0037
0038 ravenPath=findRAVENroot();
0039
0040
0041 fid = fopen(fullfile(ravenPath,'struct_conversion','COBRA_structure_fields.csv'));
0042 fieldFile = textscan(fid,repmat('%s',1,15),'Delimiter','\t','HeaderLines',1);
0043 dbFields = ~cellfun(@isempty,fieldFile{5});
0044 dbFields = dbFields & ~contains(fieldFile{1},{'metInChIString','metKEGGID','metPubChemID','rxnECNumbers'});
0045 COBRAnamespace = fieldFile{5}(dbFields);
0046 COBRAnamespace = regexprep(COBRAnamespace,';.*','');
0047 COBRAfields = fieldFile{1}(dbFields);
0048 fclose(fid);
0049
0050
0051 fid = fopen(fullfile(ravenPath,'struct_conversion','cobraNamespaces.csv'));
0052 fieldFile = textscan(fid,'%s %s','Delimiter',',','HeaderLines',0);
0053 COBRAfields = [COBRAfields; fieldFile{1}];
0054 COBRAnamespace = [COBRAnamespace; fieldFile{2}];
0055 rxnCOBRAfields = COBRAfields(startsWith(COBRAfields,'rxn'));
0056 rxnNamespaces = COBRAnamespace(startsWith(COBRAfields,'rxn'));
0057 metCOBRAfields = COBRAfields(startsWith(COBRAfields,'met'));
0058 metNamespaces = COBRAnamespace(startsWith(COBRAfields,'met'));
0059 geneCOBRAfields = COBRAfields(startsWith(COBRAfields,'gene'));
0060 geneNamespaces = COBRAnamespace(startsWith(COBRAfields,'gene'));
0061 fclose(fid);
0062
0063 if isRaven
0064
0065 model=simplifyModel(model);
0066 end
0067
0068
0069 newModel.S=model.S;
0070 newModel.lb=model.lb;
0071 newModel.ub=model.ub;
0072 if isfield(model,'c')
0073 newModel.c=model.c;
0074 else
0075 newModel.c=zeros(numel(model.rxns),1);
0076 end
0077 newModel.rxns=model.rxns;
0078 optFields = {'rxnNames','rxnNotes','rxnConfidenceScores','rxnDeltaG',...
0079 'rxnGeneMat','rev','subSystems','comps','compNames','metCharges',...
0080 'metDeltaG','metFormulas','metNotes','metSmiles','genes','proteins'};
0081 for i=1:length(optFields)
0082 if isfield(model,optFields{i})
0083 newModel.(optFields{i})=model.(optFields{i});
0084 end
0085 end
0086
0087
0088 if isRaven
0089 fprintf('Converting RAVEN structure to COBRA..\n');
0090
0091
0092
0093 newModel.rxns=model.rxns;
0094 if all(~cellfun(@isempty,regexp(model.mets,'\[[^\]]+\]$')))
0095 newModel.mets=model.mets;
0096 else
0097
0098 BiGGformat = false(size(model.mets));
0099 for i=1:numel(model.comps)
0100 compPos=model.metComps==i;
0101 BiGGformat(compPos)=~cellfun(@isempty,regexp(model.mets(compPos),['_' model.comps{i} '$']));
0102 end
0103 if all(BiGGformat)
0104 newModel.mets=model.mets;
0105 for i=1:numel(model.comps)
0106 newModel.mets=regexprep(newModel.mets,['_' model.comps{i} '$'],['[' model.comps{i} ']']);
0107 end
0108 else
0109 newModel.mets=strcat(model.mets,'[',model.comps(model.metComps),']');
0110 end
0111 end
0112
0113
0114
0115
0116
0117 if isfield(model,'id')
0118 newModel.modelID=model.id;
0119 end
0120 if isfield(model,'name')
0121 newModel.modelName=model.name;
0122 end
0123 if isfield(model,'eccodes')
0124 newModel.rxnECNumbers=model.eccodes;
0125 end
0126 if isfield(model,'rxnMiriams')
0127 [miriams,extractedMiriamNames]=extractMiriam(model.rxnMiriams);
0128 for i = 1:length(rxnCOBRAfields)
0129 j=ismember(extractedMiriamNames,rxnNamespaces{i});
0130 if any(j)
0131 eval(['newModel.' rxnCOBRAfields{i} ' = miriams(:,j);'])
0132 end
0133 end
0134 end
0135 if isfield(model,'rxnReferences')
0136 if isfield(newModel,'rxnReferences')
0137 newModel.rxnReferences = strcat(newModel.rxnReferences,{'; '},model.rxnReferences);
0138 newModel.rxnReferences = regexprep(newModel.rxnReferences,'^; $','');
0139 else
0140 newModel.rxnReferences = model.rxnReferences;
0141 end
0142 end
0143 if isfield(model,'metNames')
0144 newModel.metNames=strcat(model.metNames,' [',model.compNames(model.metComps),']');
0145 end
0146 if isfield(model,'metMiriams')
0147 [miriams,extractedMiriamNames]=extractMiriam(model.metMiriams);
0148
0149
0150
0151 extractedMiriamNames=regexprep(extractedMiriamNames,'^kegg\..+','kegg');
0152 extractedMiriamNames=regexprep(extractedMiriamNames,'^pubchem\..+','pubchem');
0153 i=ismember(extractedMiriamNames,'kegg');
0154 if any(i)
0155 for j=1:length(i)
0156 if i(j) && isfield(newModel,'metKEGGID')~=1
0157 newModel.metKEGGID=miriams(:,j);
0158 elseif i(j)
0159 newModel.metKEGGID=strcat(newModel.metKEGGID,';',miriams(:,j));
0160 end
0161 end
0162 newModel.metKEGGID=regexprep(newModel.metKEGGID,'^;|;$','');
0163 end
0164 i=ismember(extractedMiriamNames,'pubchem');
0165 if any(i)
0166 for j=1:length(i)
0167 if i(j) && isfield(newModel,'metPubChemID')~=1
0168 newModel.metPubChemID=miriams(:,j);
0169 elseif i(j)
0170 newModel.metPubChemID=strcat(newModel.metPubChemID,';',miriams(:,j));
0171 end
0172 end
0173 newModel.metPubChemID=regexprep(newModel.metPubChemID,'^;|;$','');
0174 end
0175
0176 for i = 1:length(metCOBRAfields)
0177 j=ismember(extractedMiriamNames,metNamespaces{i});
0178 if any(j)
0179 eval(['newModel.' metCOBRAfields{i} ' = miriams(:,j);'])
0180 end
0181 end
0182 end
0183 if isfield(model,'inchis')
0184 newModel.metInChIString=regexprep(strcat('InChI=', model.inchis),'^InChI=$','');
0185 end
0186 newModel.b=zeros(numel(model.mets),1);
0187 newModel.csense=repmat('E',size(model.mets));
0188 if isfield(model,'geneMiriams')
0189 [miriams,extractedMiriamNames]=extractMiriam(model.geneMiriams);
0190 for i = 1:length(geneCOBRAfields)
0191 j=ismember(extractedMiriamNames,geneNamespaces{i});
0192 if any(j)
0193 eval(['newModel.' geneCOBRAfields{i} ' = miriams(:,j);'])
0194 end
0195 end
0196 end
0197 if isfield(model,'geneShortNames')
0198 newModel.geneNames=model.geneShortNames;
0199 end
0200 if isfield(model,'genes')
0201 newModel.rules=grrulesToRules(model);
0202 else
0203 fprintf('WARNING: no genes detected. The model therefore may not be exportable to SBML file with writeCbModel\n');
0204 end
0205 newModel.osenseStr='max';
0206 else
0207 fprintf('Converting COBRA structure to RAVEN..\n');
0208
0209
0210
0211 newModel.mets=model.mets;
0212 if ~isfield(model,'comps')
0213
0214
0215
0216 newModel.comps = unique(regexprep(model.mets,'.*\[([^\]]+)\]$','$1'));
0217 newModel.compNames = newModel.comps;
0218 end
0219 for i=1:numel(newModel.comps)
0220 newModel.mets=regexprep(newModel.mets,['\[', newModel.comps{i}, '\]$'],'');
0221 newModel.mets=regexprep(newModel.mets,['\[', newModel.compNames{i}, '\]$'],'');
0222 end
0223
0224
0225
0226
0227 if numel(unique(newModel.mets))~=numel(model.mets)
0228 newModel.mets=model.mets;
0229 for i=1:numel(newModel.comps)
0230 newModel.mets=regexprep(newModel.mets,['\[' newModel.comps{i} '\]$'],['_' newModel.comps{i}]);
0231 end
0232 end
0233
0234
0235 if ~isfield(model,'rev')
0236 for i=1:numel(model.rxns)
0237 if model.lb(i)<0
0238 newModel.rev(i,1)=1;
0239 else
0240 newModel.rev(i,1)=0;
0241 end
0242 end
0243 end
0244 newModel.b=zeros(numel(model.mets),1);
0245
0246
0247
0248
0249
0250
0251
0252
0253
0254 if isfield(model,'modelID')
0255 newModel.id=model.modelID;
0256 end
0257 if isfield(model,'modelName')
0258 newModel.name=model.modelName;
0259 end
0260 if isfield(model,'rules') && ~isfield(model,'grRules')
0261 model.grRules = rulesTogrrules(model);
0262 end
0263 if isfield(model,'grRules')
0264 [grRules,rxnGeneMat] = standardizeGrRules(model,true);
0265 newModel.grRules = grRules;
0266 newModel.rxnGeneMat = rxnGeneMat;
0267 end
0268 if isfield(model,'rxnECNumbers')
0269 newModel.eccodes=regexprep(model.rxnECNumbers,'EC|EC:','');
0270 end
0271 if any(isfield(model,rxnCOBRAfields))
0272 for i=1:numel(model.rxns)
0273 counter=1;
0274 newModel.rxnMiriams{i,1}=[];
0275 if isfield(model,'rxnReferences')
0276 if ~isempty(model.rxnReferences{i})
0277 pmids = model.rxnReferences{i};
0278 pmids = strsplit(pmids,'; ');
0279 nonPmids = cellfun(@isempty,regexp(pmids,'^\d+$','match','once'));
0280 if any(nonPmids)
0281 newModel.rxnReferences{i,1} = strjoin(pmids(nonPmids),', ');
0282 pmids(nonPmids)=[];
0283 end
0284 for j = 1:length(pmids)
0285 newModel.rxnMiriams{i,1}.name{counter,1} = 'pubmed';
0286 newModel.rxnMiriams{i,1}.value{counter,1} = pmids{j};
0287 counter=counter+1;
0288 end
0289 end
0290 end
0291 for j = 2:length(rxnCOBRAfields)
0292 if isfield(model,rxnCOBRAfields{j})
0293 rxnAnnotation = eval(['model.' rxnCOBRAfields{j} '{i}']);
0294 if ~isempty(rxnAnnotation)
0295 rxnAnnotation = strtrim(strsplit(rxnAnnotation,';'));
0296 for a=1:length(rxnAnnotation)
0297 newModel.rxnMiriams{i,1}.name{counter,1} = rxnNamespaces{j};
0298 newModel.rxnMiriams{i,1}.value{counter,1} = rxnAnnotation{a};
0299 counter=counter+1;
0300 end
0301 end
0302 end
0303 end
0304 end
0305 end
0306 if isfield(newModel,'rxnReferences')
0307 emptyEntry = cellfun(@isempty,newModel.rxnReferences);
0308 newModel.rxnReferences(emptyEntry)={''};
0309 diffNumel = numel(newModel.rxns) - numel(newModel.rxnReferences);
0310 if diffNumel > 0
0311 newModel.rxnReferences(end+1:end+diffNumel) = {''};
0312 end
0313 end
0314 if any(isfield(model,geneCOBRAfields))
0315 for i=1:numel(model.genes)
0316 counter=1;
0317 newModel.geneMiriams{i,1}=[];
0318 for j = 1:length(geneCOBRAfields)
0319 if isfield(model,geneCOBRAfields{j})
0320 geneAnnotation = eval(['model.' geneCOBRAfields{j} '{i}']);
0321 if ~isempty(geneAnnotation)
0322 geneAnnotation = strtrim(strsplit(geneAnnotation,';'));
0323 for a=1:length(geneAnnotation)
0324 newModel.geneMiriams{i,1}.name{counter,1} = geneNamespaces{j};
0325 newModel.geneMiriams{i,1}.value{counter,1} = geneAnnotation{a};
0326 counter=counter+1;
0327 end
0328 end
0329 end
0330 end
0331 end
0332 end
0333 if isfield(model,'geneNames')
0334 newModel.geneShortNames=model.geneNames;
0335 end
0336 newModel.metNames=model.metNames;
0337 for i=1:numel(newModel.comps)
0338 newModel.metNames=regexprep(newModel.metNames,['\[', newModel.comps{i}, '\]$'],'');
0339 newModel.metNames=regexprep(newModel.metNames,['\[', newModel.compNames{i}, '\]$'],'');
0340 end
0341 newModel.metNames=deblank(newModel.metNames);
0342 newModel.metComps=regexprep(model.mets,'^.+\[','');
0343 newModel.metComps=regexprep(newModel.metComps,'\]$','');
0344 [~, newModel.metComps]=ismember(newModel.metComps,newModel.comps);
0345 if isfield(model,'metInChIString')
0346 newModel.inchis=regexprep(model.metInChIString,'^InChI=','');
0347 end
0348 printWarning=false;
0349 if any(isfield(model,[metCOBRAfields;'metKEGGID';'metPubChemID']))
0350 for i=1:numel(model.mets)
0351 counter=1;
0352 newModel.metMiriams{i,1}=[];
0353 if isfield(model,'metKEGGID')
0354 if ~isempty(model.metKEGGID{i})
0355 if strcmp(model.metKEGGID{i}(1),'C')
0356 newModel.metMiriams{i,1}.name{counter,1} = 'kegg.compound';
0357 newModel.metMiriams{i,1}.value{counter,1} = model.metKEGGID{i};
0358 counter=counter+1;
0359 elseif strcmp(model.metKEGGID{i}(1),'G')
0360 newModel.metMiriams{i,1}.name{counter,1} = 'kegg.glycan';
0361 newModel.metMiriams{i,1}.value{counter,1} = model.metKEGGID{i};
0362 counter=counter+1;
0363 end
0364 end
0365 end
0366 if isfield(model,'metPubChemID')
0367 if ~isempty(model.metPubChemID{i})
0368 if length(model.metPubChemID{i})>3 && strcmp(model.metPubChemID{i}(1:4),'CID:')
0369 newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.compound';
0370 newModel.metMiriams{i,1}.value{counter,1} = model.metPubChemID{i};
0371 counter=counter+1;
0372 elseif length(model.metPubChemID{i})>3 && strcmp(model.metPubChemID{i}(1:4),'SID:')
0373 newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.substance';
0374 newModel.metMiriams{i,1}.value{counter,1} = model.metPubChemID{i};
0375 counter=counter+1;
0376 else
0377 newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.compound';
0378 newModel.metMiriams{i,1}.value{counter,1} = model.metPubChemID{i};
0379 counter=counter+1;
0380 printWarning=true;
0381 end
0382 end
0383 end
0384 for j = 1:length(metCOBRAfields)
0385 if isfield(model,metCOBRAfields{j})
0386 metAnnotation = eval(['model.' metCOBRAfields{j} '{i}']);
0387 if ~isempty(metAnnotation)
0388 metAnnotation = strtrim(strsplit(metAnnotation,';'));
0389 for a=1:length(metAnnotation)
0390 newModel.metMiriams{i,1}.name{counter,1} = metNamespaces{j};
0391 newModel.metMiriams{i,1}.value{counter,1} = metAnnotation{a};
0392 counter=counter+1;
0393 end
0394 end
0395 end
0396 end
0397 end
0398 end
0399 if printWarning
0400 fprintf('Could not determine whether PubChemIDs are compounds (CID)\n or substances (SID). All annotated PubChemIDs will therefore \n be assigned as compounds (CID).\n');
0401 end
0402 end
0403
0404
0405 newModel=standardizeModelFieldOrder(newModel);
0406 end
0407
0408 function rules=grrulesToRules(model)
0409
0410
0411
0412 replacingGenes=cell([size(model.genes,1) 1]);
0413 for i=1:numel(replacingGenes)
0414 replacingGenes{i}=strcat('x(',num2str(i),')');
0415 end
0416 rules = strcat({' '},model.grRules,{' '});
0417 for i=1:length(model.genes)
0418 rules=regexprep(rules,[' ' model.genes{i} ' '],[' ' replacingGenes{i} ' ']);
0419 rules=regexprep(rules,['(' model.genes{i} ' '],['(' replacingGenes{i} ' ']);
0420 rules=regexprep(rules,[' ' model.genes{i} ')'],[' ' replacingGenes{i} ')']);
0421 end
0422 rules=regexprep(rules,' and ',' & ');
0423 rules=regexprep(rules,' or ',' | ');
0424 rules=strtrim(rules);
0425 end
0426
0427 function grRules=rulesTogrrules(model)
0428
0429
0430
0431 grRules = strrep(model.rules,'&','and');
0432 grRules = strrep(grRules,'|','or');
0433 for i = 1:length(model.genes)
0434 grRules = strrep(grRules,['x(' num2str(i) ')'],model.genes{i});
0435 end
0436 grRules = strrep(grRules,'( ','(');
0437 grRules = strrep(grRules,' )',')');
0438 grRules = regexprep(grRules,'^(','');
0439 grRules = regexprep(grRules,')$','');
0440 end