0001 function newModel=ravenCobraWrapper(model)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032 if isfield(model,'rules')
0033 isRaven=false;
0034 else
0035 isRaven=true;
0036 end
0037
0038 ravenPath=findRAVENroot();
0039
0040
0041 fid = fopen(fullfile(ravenPath,'struct_conversion','COBRA_structure_fields.csv'));
0042 fieldFile = textscan(fid,repmat('%s',1,15),'Delimiter','\t','HeaderLines',1);
0043 dbFields = ~cellfun(@isempty,fieldFile{5});
0044 dbFields = dbFields & ~contains(fieldFile{1},{'metInChIString','metKEGGID','metPubChemID','rxnECNumbers'});
0045 COBRAnamespace = fieldFile{5}(dbFields);
0046 COBRAnamespace = regexprep(COBRAnamespace,';.*','');
0047 COBRAfields = fieldFile{1}(dbFields);
0048 fclose(fid);
0049
0050
0051 fid = fopen(fullfile(ravenPath,'struct_conversion','cobraNamespaces.csv'));
0052 fieldFile = textscan(fid,'%s %s','Delimiter',',','HeaderLines',0);
0053 COBRAfields = [COBRAfields; fieldFile{1}];
0054 COBRAnamespace = [COBRAnamespace; fieldFile{2}];
0055 rxnCOBRAfields = COBRAfields(startsWith(COBRAfields,'rxn'));
0056 rxnNamespaces = COBRAnamespace(startsWith(COBRAfields,'rxn'));
0057 metCOBRAfields = COBRAfields(startsWith(COBRAfields,'met'));
0058 metNamespaces = COBRAnamespace(startsWith(COBRAfields,'met'));
0059 geneCOBRAfields = COBRAfields(startsWith(COBRAfields,'gene'));
0060 geneNamespaces = COBRAnamespace(startsWith(COBRAfields,'gene'));
0061 fclose(fid);
0062
0063 if isRaven
0064
0065 model=simplifyModel(model);
0066 end
0067
0068
0069 newModel.S=model.S;
0070 newModel.lb=model.lb;
0071 newModel.ub=model.ub;
0072 if isfield(model,'c')
0073 newModel.c=model.c;
0074 else
0075 newModel.c=zeros(numel(model.rxns),1);
0076 end
0077 newModel.rxns=model.rxns;
0078 optFields = {'rxnNames','rxnNotes','rxnConfidenceScores','rxnDeltaG',...
0079 'rxnGeneMat','rev','subSystems','comps','compNames','metCharges',...
0080 'metDeltaG','metFormulas','metNotes','metSmiles','genes','proteins'};
0081 for i=1:length(optFields)
0082 if isfield(model,optFields{i})
0083 newModel.(optFields{i})=model.(optFields{i});
0084 end
0085 end
0086
0087
0088 if isRaven
0089 fprintf('Converting RAVEN structure to COBRA..\n');
0090
0091
0092
0093 newModel.rxns=model.rxns;
0094 if all(~cellfun(@isempty,regexp(model.mets,'\[[^\]]+\]$')))
0095 newModel.mets=model.mets;
0096 else
0097
0098 BiGGformat = false(size(model.mets));
0099 for i=1:numel(model.comps)
0100 compPos=model.metComps==i;
0101 BiGGformat(compPos)=~cellfun(@isempty,regexp(model.mets(compPos),['_' model.comps{i} '$']));
0102 end
0103 if all(BiGGformat)
0104 newModel.mets=model.mets;
0105 for i=1:numel(model.comps)
0106 newModel.mets=regexprep(newModel.mets,['_' model.comps{i} '$'],['[' model.comps{i} ']']);
0107 end
0108 else
0109 newModel.mets=strcat(model.mets,'[',model.comps(model.metComps),']');
0110 end
0111 end
0112
0113
0114
0115
0116
0117 if isfield(model,'id')
0118 newModel.modelID=model.id;
0119 end
0120 if isfield(model,'name')
0121 newModel.modelName=model.name;
0122 end
0123 if isfield(model,'eccodes')
0124 newModel.rxnECNumbers=model.eccodes;
0125 end
0126 if isfield(model,'rxnMiriams')
0127 [miriams,extractedMiriamNames]=extractMiriam(model.rxnMiriams);
0128 for i = 1:length(rxnCOBRAfields)
0129 j=ismember(extractedMiriamNames,rxnNamespaces{i});
0130 if any(j)
0131 eval(['newModel.' rxnCOBRAfields{i} ' = miriams(:,j);'])
0132 end
0133 end
0134 end
0135 if isfield(model,'rxnReferences')
0136 if isfield(newModel,'rxnReferences')
0137 newModel.rxnReferences = strcat(newModel.rxnReferences,{'; '},model.rxnReferences);
0138 newModel.rxnReferences = regexprep(newModel.rxnReferences,'^; $','');
0139 else
0140 newModel.rxnReferences = model.rxnReferences;
0141 end
0142 end
0143 if isfield(model,'metNames')
0144 newModel.metNames=strcat(model.metNames,' [',model.compNames(model.metComps),']');
0145 end
0146 if isfield(model,'metMiriams')
0147 [miriams,extractedMiriamNames]=extractMiriam(model.metMiriams);
0148
0149
0150
0151 extractedMiriamNames=regexprep(extractedMiriamNames,'^kegg\..+','kegg');
0152 extractedMiriamNames=regexprep(extractedMiriamNames,'^pubchem\..+','pubchem');
0153 i=ismember(extractedMiriamNames,'kegg');
0154 if any(i)
0155 for j=1:length(i)
0156 if i(j) && isfield(newModel,'metKEGGID')~=1
0157 newModel.metKEGGID=miriams(:,j);
0158 elseif i(j)
0159 newModel.metKEGGID=strcat(newModel.metKEGGID,';',miriams(:,j));
0160 end
0161 end
0162 newModel.metKEGGID=regexprep(newModel.metKEGGID,'^;|;$','');
0163 end
0164 i=ismember(extractedMiriamNames,'pubchem');
0165 if any(i)
0166 for j=1:length(i)
0167 if i(j) && isfield(newModel,'metPubChemID')~=1
0168 newModel.metPubChemID=miriams(:,j);
0169 elseif i(j)
0170 newModel.metPubChemID=strcat(newModel.metPubChemID,';',miriams(:,j));
0171 end
0172 end
0173 newModel.metPubChemID=regexprep(newModel.metPubChemID,'^;|;$','');
0174 end
0175
0176 for i = 1:length(metCOBRAfields)
0177 j=ismember(extractedMiriamNames,metNamespaces{i});
0178 if any(j)
0179 eval(['newModel.' metCOBRAfields{i} ' = miriams(:,j);'])
0180 end
0181 end
0182 end
0183 if isfield(model,'inchis')
0184 newModel.metInChIString=regexprep(strcat('InChI=', model.inchis),'^InChI=$','');
0185 end
0186 newModel.b=zeros(numel(model.mets),1);
0187 newModel.csense=repmat('E',size(model.mets));
0188 if isfield(model,'geneMiriams')
0189 [~,extractedMiriamNames]=extractMiriam(model.geneMiriams);
0190 for i = 1:length(geneCOBRAfields)
0191 j=ismember(extractedMiriamNames,geneNamespaces{i});
0192 if any(j)
0193 eval(['newModel.' geneCOBRAfields{i} ' = miriams(:,j);'])
0194 end
0195 end
0196 end
0197 if isfield(model,'geneShortNames')
0198 newModel.geneNames=model.geneShortNames;
0199 end
0200 if isfield(model,'genes')
0201 newModel.rules=grrulesToRules(model);
0202 else
0203 fprintf('WARNING: no genes detected. The model therefore may not be exportable to SBML file with writeCbModel\n');
0204 end
0205
0206 if isfield(newModel,'subSystems') && ~isempty(newModel.subSystems)
0207 if ~iscell(newModel.subSystems{1})
0208 newModel.subSystems = cellfun(@(x) {x}, newModel.subSystems, 'UniformOutput', false);
0209 end
0210 end
0211 else
0212 fprintf('Converting COBRA structure to RAVEN..\n');
0213
0214
0215
0216 newModel.mets=model.mets;
0217 if ~isfield(model,'comps')
0218
0219
0220
0221 newModel.comps = unique(regexprep(model.mets,'.*\[([^\]]+)\]$','$1'));
0222 newModel.compNames = newModel.comps;
0223 end
0224 for i=1:numel(newModel.comps)
0225 newModel.mets=regexprep(newModel.mets,['\[', newModel.comps{i}, '\]$'],'');
0226 newModel.mets=regexprep(newModel.mets,['\[', newModel.compNames{i}, '\]$'],'');
0227 end
0228
0229
0230
0231
0232 if numel(unique(newModel.mets))~=numel(model.mets)
0233 newModel.mets=model.mets;
0234 for i=1:numel(newModel.comps)
0235 newModel.mets=regexprep(newModel.mets,['\[' newModel.comps{i} '\]$'],['_' newModel.comps{i}]);
0236 end
0237 end
0238
0239
0240 if ~isfield(model,'rev')
0241 for i=1:numel(model.rxns)
0242 if model.lb(i)<0
0243 newModel.rev(i,1)=1;
0244 else
0245 newModel.rev(i,1)=0;
0246 end
0247 end
0248 end
0249 newModel.b=zeros(numel(model.mets),1);
0250
0251
0252
0253
0254
0255
0256
0257
0258
0259 if isfield(model,'modelID')
0260 newModel.id=model.modelID;
0261 end
0262 if isfield(model,'modelName')
0263 newModel.name=model.modelName;
0264 end
0265 if isfield(model,'rules') && ~isfield(model,'grRules')
0266 model.grRules = rulesTogrrules(model);
0267 end
0268 if isfield(model,'grRules')
0269 [grRules,rxnGeneMat] = standardizeGrRules(model,true);
0270 newModel.grRules = grRules;
0271 newModel.rxnGeneMat = rxnGeneMat;
0272 end
0273 if isfield(model,'rxnECNumbers')
0274 newModel.eccodes=regexprep(model.rxnECNumbers,'EC|EC:','');
0275 end
0276 if any(isfield(model,rxnCOBRAfields))
0277 for i=1:numel(model.rxns)
0278 counter=1;
0279 newModel.rxnMiriams{i,1}=[];
0280 if isfield(model,'rxnReferences')
0281 if ~isempty(model.rxnReferences{i})
0282 pmids = model.rxnReferences{i};
0283 pmids = strsplit(pmids,'; ');
0284 nonPmids = cellfun(@isempty,regexp(pmids,'^\d+$','match','once'));
0285 if any(nonPmids)
0286 newModel.rxnReferences{i,1} = strjoin(pmids(nonPmids),', ');
0287 pmids(nonPmids)=[];
0288 end
0289 for j = 1:length(pmids)
0290 newModel.rxnMiriams{i,1}.name{counter,1} = 'pubmed';
0291 newModel.rxnMiriams{i,1}.value{counter,1} = pmids{j};
0292 counter=counter+1;
0293 end
0294 end
0295 end
0296 for j = 2:length(rxnCOBRAfields)
0297 if isfield(model,rxnCOBRAfields{j})
0298 rxnAnnotation = eval(['model.' rxnCOBRAfields{j} '{i}']);
0299 if ~isempty(rxnAnnotation)
0300 rxnAnnotation = strtrim(strsplit(rxnAnnotation,';'));
0301 for a=1:length(rxnAnnotation)
0302 newModel.rxnMiriams{i,1}.name{counter,1} = rxnNamespaces{j};
0303 newModel.rxnMiriams{i,1}.value{counter,1} = rxnAnnotation{a};
0304 counter=counter+1;
0305 end
0306 end
0307 end
0308 end
0309 end
0310 end
0311 if isfield(newModel,'rxnReferences')
0312 emptyEntry = cellfun(@isempty,newModel.rxnReferences);
0313 newModel.rxnReferences(emptyEntry)={''};
0314 diffNumel = numel(newModel.rxns) - numel(newModel.rxnReferences);
0315 if diffNumel > 0
0316 newModel.rxnReferences(end+1:end+diffNumel) = {''};
0317 end
0318 end
0319 if any(isfield(model,geneCOBRAfields))
0320 for i=1:numel(model.genes)
0321 counter=1;
0322 newModel.geneMiriams{i,1}=[];
0323 for j = 1:length(geneCOBRAfields)
0324 if isfield(model,geneCOBRAfields{j})
0325 geneAnnotation = eval(['model.' geneCOBRAfields{j} '{i}']);
0326 if ~isempty(geneAnnotation)
0327 geneAnnotation = strtrim(strsplit(geneAnnotation,';'));
0328 for a=1:length(geneAnnotation)
0329 newModel.geneMiriams{i,1}.name{counter,1} = geneNamespaces{j};
0330 newModel.geneMiriams{i,1}.value{counter,1} = geneAnnotation{a};
0331 counter=counter+1;
0332 end
0333 end
0334 end
0335 end
0336 end
0337 end
0338 if isfield(model,'geneNames')
0339 newModel.geneShortNames=model.geneNames;
0340 end
0341 newModel.metNames=model.metNames;
0342 for i=1:numel(newModel.comps)
0343 newModel.metNames=regexprep(newModel.metNames,['\[', newModel.comps{i}, '\]$'],'');
0344 newModel.metNames=regexprep(newModel.metNames,['\[', newModel.compNames{i}, '\]$'],'');
0345 end
0346 newModel.metNames=deblank(newModel.metNames);
0347 newModel.metComps=regexprep(model.mets,'^.+\[','');
0348 newModel.metComps=regexprep(newModel.metComps,'\]$','');
0349 [~, newModel.metComps]=ismember(newModel.metComps,newModel.comps);
0350 if isfield(model,'metInChIString')
0351 newModel.inchis=regexprep(model.metInChIString,'^InChI=','');
0352 end
0353 printWarning=false;
0354 if any(isfield(model,[metCOBRAfields;'metKEGGID';'metPubChemID']))
0355 for i=1:numel(model.mets)
0356 counter=1;
0357 newModel.metMiriams{i,1}=[];
0358 if isfield(model,'metKEGGID')
0359 if ~isempty(model.metKEGGID{i})
0360 if strcmp(model.metKEGGID{i}(1),'C')
0361 newModel.metMiriams{i,1}.name{counter,1} = 'kegg.compound';
0362 newModel.metMiriams{i,1}.value{counter,1} = model.metKEGGID{i};
0363 counter=counter+1;
0364 elseif strcmp(model.metKEGGID{i}(1),'G')
0365 newModel.metMiriams{i,1}.name{counter,1} = 'kegg.glycan';
0366 newModel.metMiriams{i,1}.value{counter,1} = model.metKEGGID{i};
0367 counter=counter+1;
0368 end
0369 end
0370 end
0371 if isfield(model,'metPubChemID')
0372 if ~isempty(model.metPubChemID{i})
0373 if length(model.metPubChemID{i})>3 && strcmp(model.metPubChemID{i}(1:4),'CID:')
0374 newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.compound';
0375 newModel.metMiriams{i,1}.value{counter,1} = model.metPubChemID{i};
0376 counter=counter+1;
0377 elseif length(model.metPubChemID{i})>3 && strcmp(model.metPubChemID{i}(1:4),'SID:')
0378 newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.substance';
0379 newModel.metMiriams{i,1}.value{counter,1} = model.metPubChemID{i};
0380 counter=counter+1;
0381 else
0382 newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.compound';
0383 newModel.metMiriams{i,1}.value{counter,1} = model.metPubChemID{i};
0384 counter=counter+1;
0385 printWarning=true;
0386 end
0387 end
0388 end
0389 for j = 1:length(metCOBRAfields)
0390 if isfield(model,metCOBRAfields{j})
0391 metAnnotation = eval(['model.' metCOBRAfields{j} '{i}']);
0392 if ~isempty(metAnnotation)
0393 metAnnotation = strtrim(strsplit(metAnnotation,';'));
0394 for a=1:length(metAnnotation)
0395 newModel.metMiriams{i,1}.name{counter,1} = metNamespaces{j};
0396 newModel.metMiriams{i,1}.value{counter,1} = metAnnotation{a};
0397 counter=counter+1;
0398 end
0399 end
0400 end
0401 end
0402 end
0403 end
0404
0405 if isfield(newModel,'subSystems') && ~isempty(newModel.subSystems)
0406 if all(cellfun(@(x) iscell(x) && isscalar(x), newModel.subSystems))
0407 newModel.subSystems = transpose([newModel.subSystems{:}]);
0408 end
0409 end
0410 if printWarning
0411 fprintf('Could not determine whether PubChemIDs are compounds (CID)\n or substances (SID). All annotated PubChemIDs will therefore \n be assigned as compounds (CID).\n');
0412 end
0413 end
0414
0415
0416 newModel=standardizeModelFieldOrder(newModel);
0417 end
0418
0419 function rules=grrulesToRules(model)
0420
0421
0422
0423 replacingGenes=cell([size(model.genes,1) 1]);
0424 for i=1:numel(replacingGenes)
0425 replacingGenes{i}=strcat('x(',num2str(i),')');
0426 end
0427 rules = strcat({' '},model.grRules,{' '});
0428 for i=1:length(model.genes)
0429 rules=regexprep(rules,[' ' model.genes{i} ' '],[' ' replacingGenes{i} ' ']);
0430 rules=regexprep(rules,['(' model.genes{i} ' '],['(' replacingGenes{i} ' ']);
0431 rules=regexprep(rules,[' ' model.genes{i} ')'],[' ' replacingGenes{i} ')']);
0432 end
0433 rules=regexprep(rules,' and ',' & ');
0434 rules=regexprep(rules,' or ',' | ');
0435 rules=strtrim(rules);
0436 end
0437
0438 function grRules=rulesTogrrules(model)
0439
0440
0441
0442 grRules = strrep(model.rules,'&','and');
0443 grRules = strrep(grRules,'|','or');
0444 for i = 1:length(model.genes)
0445 grRules = strrep(grRules,['x(' num2str(i) ')'],model.genes{i});
0446 end
0447 grRules = strrep(grRules,'( ','(');
0448 grRules = strrep(grRules,' )',')');
0449 grRules = regexprep(grRules,'^(','');
0450 grRules = regexprep(grRules,')$','');
0451 end