0001 function model=importModel(fileName,removeExcMets,isSBML2COBRA,supressWarnings)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076 if nargin<1 || isempty(fileName)
0077 [fileName, pathName] = uigetfile({'*.xml;*.sbml'}, 'Please select the model file');
0078 if fileName == 0
0079 error('You should select a model file')
0080 else
0081 fileName = fullfile(pathName,fileName);
0082 end
0083 end
0084 fileName=char(fileName);
0085 if nargin<2
0086 removeExcMets=true;
0087 end
0088
0089 if nargin<3
0090 isSBML2COBRA=false;
0091 end
0092
0093 if nargin<4
0094 supressWarnings=false;
0095 end
0096
0097 if ~isfile(fileName)
0098 error('SBML file %s cannot be found',string(fileName));
0099 end
0100
0101
0102
0103 model=[];
0104 model.id=[];
0105 model.name=[];
0106 model.annotation=[];
0107 model.rxns={};
0108 model.mets={};
0109 model.S=[];
0110 model.lb=[];
0111 model.ub=[];
0112 model.rev=[];
0113 model.c=[];
0114 model.b=[];
0115 model.comps={};
0116 model.compNames={};
0117 model.compOutside={};
0118 model.compMiriams={};
0119 model.rxnNames={};
0120 model.rxnComps=[];
0121 model.grRules={};
0122 model.rxnGeneMat=[];
0123 model.subSystems={};
0124 model.eccodes={};
0125 model.rxnMiriams={};
0126 model.rxnNotes={};
0127 model.rxnReferences={};
0128 model.rxnConfidenceScores=[];
0129 model.genes={};
0130 model.geneComps=[];
0131 model.geneMiriams={};
0132 model.geneShortNames={};
0133 model.metNames={};
0134 model.metComps=[];
0135 model.inchis={};
0136 model.metFormulas={};
0137 model.metMiriams={};
0138 model.metCharges=[];
0139 model.unconstrained=[];
0140
0141
0142 [ravenDir,prevDir]=findRAVENroot();
0143 fileName=checkFileExistence(fileName,1);
0144 cd(fullfile(ravenDir,'software','libSBML'));
0145 modelSBML = TranslateSBML(fileName,0,0,[1 1]);
0146 cd(prevDir);
0147
0148 if isempty(modelSBML)
0149 EM='There is a problem with the SBML file. Try using the SBML Validator at http://sbml.org/Facilities/Validator';
0150 dispEM(EM);
0151 end
0152
0153
0154
0155
0156
0157 for i=1:numel(modelSBML.reaction)
0158 modelSBML.reaction(i).name=regexprep(modelSBML.reaction(i).name,'^R_','');
0159 modelSBML.reaction(i).id=regexprep(modelSBML.reaction(i).id,'^R_','');
0160 if isfield(modelSBML.reaction(i),'compartment')
0161 modelSBML.reaction(i).compartment=regexprep(modelSBML.reaction(i).compartment,'^C_','');
0162 end
0163 for j=1:numel(modelSBML.reaction(i).reactant)
0164 modelSBML.reaction(i).reactant(j).species=regexprep(modelSBML.reaction(i).reactant(j).species,'^M_','');
0165 end
0166 for j=1:numel(modelSBML.reaction(i).product)
0167 modelSBML.reaction(i).product(j).species=regexprep(modelSBML.reaction(i).product(j).species,'^M_','');
0168 end
0169 end
0170
0171
0172 compartmentNames=cell(numel(modelSBML.compartment),1);
0173 compartmentIDs=cell(numel(modelSBML.compartment),1);
0174 compartmentOutside=cell(numel(modelSBML.compartment),1);
0175 compartmentMiriams=cell(numel(modelSBML.compartment),1);
0176
0177 if isfield(modelSBML.compartment,'sboTerm') && numel(unique([modelSBML.compartment.sboTerm])) == 1
0178
0179 modelSBML.compartment = rmfield(modelSBML.compartment,'sboTerm');
0180 end
0181
0182 for i=1:numel(modelSBML.compartment)
0183 compartmentNames{i}=modelSBML.compartment(i).name;
0184 compartmentIDs{i}=regexprep(modelSBML.compartment(i).id,'^C_','');
0185 if isfield(modelSBML.compartment(i),'outside')
0186 if ~isempty(modelSBML.compartment(i).outside)
0187 compartmentOutside{i}=regexprep(modelSBML.compartment(i).outside,'^C_','');
0188 else
0189 compartmentOutside{i}='';
0190 end
0191 else
0192 compartmentOutside{i}=[];
0193 end
0194
0195 if isfield(modelSBML.compartment(i),'annotation')
0196 compartmentMiriams{i}=parseMiriam(modelSBML.compartment(i).annotation);
0197 else
0198 compartmentMiriams{i}=[];
0199 end
0200
0201 if isfield(modelSBML.compartment(i),'sboTerm') && ~(modelSBML.compartment(i).sboTerm==-1)
0202 compartmentMiriams{i} = addSBOtoMiriam(compartmentMiriams{i},modelSBML.compartment(i).sboTerm);
0203 end
0204 end
0205
0206
0207 if all(cellfun(@isempty,compartmentNames))
0208 compartmentNames=compartmentIDs;
0209 end
0210
0211
0212 metaboliteNames={};
0213 metaboliteIDs={};
0214 metaboliteCompartments={};
0215 metaboliteUnconstrained=[];
0216 metaboliteFormula={};
0217 metaboliteInChI={};
0218 metaboliteMiriams={};
0219 metaboliteCharges=[];
0220
0221 geneNames={};
0222 geneIDs={};
0223 geneMiriams={};
0224 geneShortNames={};
0225 geneCompartments={};
0226 complexIDs={};
0227 complexNames={};
0228
0229
0230
0231
0232
0233 geneSBOs = [];
0234 metSBOs = [];
0235
0236
0237 regexCompNames = ['\s?\[((' strjoin({modelSBML.compartment.name},')|(') '))\]$'];
0238 for i=1:numel(modelSBML.species)
0239 if ~isSBML2COBRA
0240 if length(modelSBML.species(i).id)>=2 && strcmpi(modelSBML.species(i).id(1:2),'E_')
0241 geneNames{numel(geneNames)+1,1}=modelSBML.species(i).name;
0242
0243
0244
0245
0246 geneIDs{numel(geneIDs)+1,1}=modelSBML.species(i).id;
0247 geneCompartments{numel(geneCompartments)+1,1}=regexprep(modelSBML.species(i).compartment,'^C_','');
0248
0249
0250 if isfield(modelSBML.species(i),'annotation')
0251
0252 geneMiriam=parseMiriam(modelSBML.species(i).annotation);
0253 geneMiriams{numel(geneMiriams)+1,1}=geneMiriam;
0254 else
0255 geneMiriams{numel(geneMiriams)+1,1}=[];
0256 end
0257
0258
0259
0260
0261
0262
0263 if isfield(modelSBML.species(i),'notes')
0264 geneShortNames{numel(geneShortNames)+1,1}=parseNote(modelSBML.species(i).notes,'SHORT NAME');
0265 else
0266 geneShortNames{numel(geneShortNames)+1,1}='';
0267 end
0268
0269
0270 if isfield(modelSBML.species(i),'sboTerm') && ~(modelSBML.species(i).sboTerm==-1)
0271 geneSBOs(end+1,1) = modelSBML.species(i).sboTerm;
0272 end
0273 elseif length(modelSBML.species(i).id)>=2 && strcmpi(modelSBML.species(i).id(1:3),'Cx_')
0274
0275 complexIDs=[complexIDs;modelSBML.species(i).id];
0276 complexNames=[complexNames;modelSBML.species(i).name];
0277 else
0278
0279 metaboliteNames{numel(metaboliteNames)+1,1}=modelSBML.species(i).name;
0280 metaboliteIDs{numel(metaboliteIDs)+1,1}=regexprep(modelSBML.species(i).id,'^M_','');
0281 metaboliteCompartments{numel(metaboliteCompartments)+1,1}=regexprep(modelSBML.species(i).compartment,'^C_','');
0282 metaboliteUnconstrained(numel(metaboliteUnconstrained)+1,1)=modelSBML.species(i).boundaryCondition;
0283
0284
0285
0286
0287
0288 if ~isempty(modelSBML.species(i).annotation)
0289
0290 startString='>InChI=';
0291 endString='</in:inchi>';
0292 formStart=strfind(modelSBML.species(i).annotation,startString);
0293 if isempty(formStart)
0294 startString='InChI=';
0295 endString='"/>';
0296 end
0297 formStart=strfind(modelSBML.species(i).annotation,startString);
0298 if ~isempty(formStart)
0299 formEnd=strfind(modelSBML.species(i).annotation,endString);
0300 formEndIndex=find(formEnd>formStart, 1 );
0301 formula=modelSBML.species(i).annotation(formStart+numel(startString):formEnd(formEndIndex)-1);
0302 metaboliteInChI{numel(metaboliteInChI)+1,1}=formula;
0303
0304
0305
0306
0307
0308 compositionIndexes=strfind(formula,'/');
0309 if numel(compositionIndexes)>1
0310 metaboliteFormula{numel(metaboliteFormula)+1,1}=...
0311 formula(compositionIndexes(1)+1:compositionIndexes(2)-1);
0312 else
0313 if numel(compositionIndexes)==1
0314
0315
0316 metaboliteFormula{numel(metaboliteFormula)+1,1}=...
0317 formula(compositionIndexes(1)+1:numel(formula));
0318 else
0319 metaboliteFormula{numel(metaboliteFormula)+1,1}='';
0320 end
0321 end
0322 elseif isfield(modelSBML.species(i),'fbc_chemicalFormula')
0323 metaboliteInChI{numel(metaboliteInChI)+1,1}='';
0324 if ~isempty(modelSBML.species(i).fbc_chemicalFormula)
0325
0326
0327 metaboliteFormula{numel(metaboliteFormula)+1,1}=modelSBML.species(i).fbc_chemicalFormula;
0328 else
0329 metaboliteFormula{numel(metaboliteFormula)+1,1}='';
0330 end
0331 else
0332 metaboliteInChI{numel(metaboliteInChI)+1,1}='';
0333 metaboliteFormula{numel(metaboliteFormula)+1,1}='';
0334 end
0335
0336
0337 metMiriam=parseMiriam(modelSBML.species(i).annotation);
0338 metaboliteMiriams{numel(metaboliteMiriams)+1,1}=metMiriam;
0339 else
0340 metaboliteInChI{numel(metaboliteInChI)+1,1}='';
0341 if isfield(modelSBML.species(i),'notes')
0342 metaboliteFormula{numel(metaboliteFormula)+1,1}=parseNote(modelSBML.species(i).notes,'FORMULA');
0343 else
0344 metaboliteFormula{numel(metaboliteFormula)+1,1}='';
0345 end
0346 metaboliteMiriams{numel(metaboliteMiriams)+1,1}=[];
0347 end
0348 if ~isempty(modelSBML.species(i).notes)
0349 if ~isfield(modelSBML.species(i),'annotation')
0350 metaboliteFormula{numel(metaboliteFormula)+1,1}=parseNote(modelSBML.species(i).notes,'FORMULA');
0351 end
0352 elseif ~isfield(modelSBML.species(i),'annotation')
0353 metaboliteFormula{numel(metaboliteFormula)+1,1}='';
0354 end
0355
0356 if isfield(modelSBML.species(i),'sboTerm') && ~(modelSBML.species(i).sboTerm==-1)
0357 metSBOs(end+1,1) = modelSBML.species(i).sboTerm;
0358 end
0359 end
0360
0361 elseif isSBML2COBRA
0362
0363
0364
0365
0366 modelSBML.species(i).name=regexprep(modelSBML.species(i).name,'^M_','');
0367 modelSBML.species(i).name=regexprep(modelSBML.species(i).name,'^_','');
0368 underscoreIndex=strfind(modelSBML.species(i).name,'_');
0369
0370 metaboliteNames{numel(metaboliteNames)+1,1}=modelSBML.species(i).name;
0371
0372 metaboliteIDs{numel(metaboliteIDs)+1,1}=regexprep(modelSBML.species(i).id,'^M_','');
0373 metaboliteCompartments{numel(metaboliteCompartments)+1,1}=regexprep(modelSBML.species(i).compartment,'^C_','');
0374
0375
0376
0377 metaboliteUnconstrained(numel(metaboliteUnconstrained)+1,1)=modelSBML.species(i).boundaryCondition;
0378 if strcmp(metaboliteIDs{end}(max(end-1,1):end),'_b')
0379 metaboliteUnconstrained(end)=1;
0380 end
0381
0382
0383 if max(underscoreIndex)<length(modelSBML.species(i).name)
0384 metaboliteFormula{numel(metaboliteFormula)+1,1}=modelSBML.species(i).name(max(underscoreIndex)+1:length(modelSBML.species(i).name));
0385 else
0386 metaboliteFormula{numel(metaboliteFormula)+1,1}='';
0387 end
0388
0389
0390
0391 if isfield(modelSBML.species(i),'notes') && ~isempty(parseNote(modelSBML.species(i).notes,'FORMULA'))
0392 metaboliteFormula{numel(metaboliteFormula)+1,1}=parseNote(modelSBML.species(i).notes,'FORMULA');
0393 end
0394
0395
0396 if ~isempty(modelSBML.species(i).annotation)
0397 metMiriam=parseMiriam(modelSBML.species(i).annotation);
0398 else
0399 metMiriam=[];
0400 end
0401 metaboliteMiriams{numel(metaboliteMiriams)+1,1}=metMiriam;
0402
0403
0404 if isfield(modelSBML.species(i),'sboTerm') && ~(modelSBML.species(i).sboTerm==-1)
0405 metSBOs(end+1,1) = modelSBML.species(i).sboTerm;
0406 end
0407 end
0408
0409 if isempty(modelSBML.species(i).id) || ~strcmpi(modelSBML.species(i).id(1:2),'E_')
0410 if isempty(modelSBML.species(i).id) || ~strcmpi(modelSBML.species(i).id(1:3),'Cx_')
0411
0412 metaboliteNames{end,1}=regexprep(metaboliteNames{end,1},regexCompNames,'');
0413 metaboliteNames{end,1}=regexprep(metaboliteNames{end,1},'^M_','');
0414 if isfield(modelSBML.species(i),'fbc_charge')
0415 if ~isempty(modelSBML.species(i).fbc_charge) && modelSBML.species(i).isSetfbc_charge
0416 metaboliteCharges(numel(metaboliteCharges)+1,1)=double(modelSBML.species(i).fbc_charge);
0417 else
0418 if isfield(modelSBML.species(i),'notes')
0419 if strfind(modelSBML.species(i).notes,'CHARGE')
0420 metaboliteCharges(numel(metaboliteCharges)+1,1)=str2double(parseNote(modelSBML.species(i).notes,'CHARGE'));
0421 else
0422 metaboliteCharges(numel(metaboliteCharges)+1,1)=NaN;
0423 end
0424 else
0425 metaboliteCharges(numel(metaboliteCharges)+1,1)=NaN;
0426 end
0427 end
0428 elseif isfield(modelSBML.species(i),'notes')
0429 if strfind(modelSBML.species(i).notes,'CHARGE')
0430 metaboliteCharges(numel(metaboliteCharges)+1,1)=str2double(parseNote(modelSBML.species(i).notes,'CHARGE'));
0431 else
0432 metaboliteCharges(numel(metaboliteCharges)+1,1)=NaN;
0433 end
0434 else
0435 metaboliteCharges(numel(metaboliteCharges)+1,1)=NaN;
0436 end
0437
0438 if isfield(modelSBML.species(i),'fbc_chemicalFormula')
0439 if ~isempty(modelSBML.species(i).fbc_chemicalFormula)
0440 metaboliteFormula{numel(metaboliteFormula),1}=modelSBML.species(i).fbc_chemicalFormula;
0441 end
0442 end
0443 end
0444 end
0445 end
0446
0447
0448 if numel(unique(geneSBOs)) > 1
0449 for i = 1:numel(geneNames)
0450 geneMiriams{i} = addSBOtoMiriam(geneMiriams{i},geneSBOs(i));
0451 end
0452 end
0453 if numel(unique(metSBOs)) > 1
0454 for i = 1:numel(metaboliteNames)
0455 metaboliteMiriams{i} = addSBOtoMiriam(metaboliteMiriams{i},metSBOs(i));
0456 end
0457 end
0458
0459
0460 reactionNames=cell(numel(modelSBML.reaction),1);
0461 reactionIDs=cell(numel(modelSBML.reaction),1);
0462 subsystems=cell(numel(modelSBML.reaction),1);
0463 eccodes=cell(numel(modelSBML.reaction),1);
0464 eccodes(:,:)=cellstr('');
0465 rxnconfidencescores=NaN(numel(modelSBML.reaction),1);
0466 rxnreferences=cell(numel(modelSBML.reaction),1);
0467 rxnreferences(:,:)=cellstr('');
0468 rxnnotes=cell(numel(modelSBML.reaction),1);
0469 rxnnotes(:,:)=cellstr('');
0470 grRules=cell(numel(modelSBML.reaction),1);
0471 grRules(:,:)=cellstr('');
0472 grRulesFromModifier=grRules;
0473 rxnComps=zeros(numel(modelSBML.reaction),1);
0474 rxnMiriams=cell(numel(modelSBML.reaction),1);
0475 reactionReversibility=zeros(numel(modelSBML.reaction),1);
0476 reactionUB=zeros(numel(modelSBML.reaction),1);
0477 reactionLB=zeros(numel(modelSBML.reaction),1);
0478 reactionObjective=zeros(numel(modelSBML.reaction),1);
0479
0480
0481 S=zeros(numel(metaboliteIDs),numel(modelSBML.reaction));
0482
0483 counter=0;
0484
0485 if isfield(modelSBML,'parameter')
0486 parameter.name=cell(numel(modelSBML.parameter),1);
0487 parameter.name={modelSBML.parameter(:).id}';
0488 parameter.value={modelSBML.parameter(:).value}';
0489 end
0490
0491 if isfield(modelSBML.reaction,'sboTerm') && numel(unique([modelSBML.reaction.sboTerm])) == 1
0492
0493 modelSBML.reaction = rmfield(modelSBML.reaction,'sboTerm');
0494 end
0495
0496 for i=1:numel(modelSBML.reaction)
0497
0498
0499
0500
0501
0502 if numel(modelSBML.reaction(i).product)==1
0503 if length(modelSBML.reaction(i).product(1).species)>=3
0504 if strcmp(modelSBML.reaction(i).product(1).species(1:3),'Cx_')==true
0505 continue;
0506 end
0507 end
0508 end
0509
0510
0511 counter=counter+1;
0512
0513 reactionNames{counter}=modelSBML.reaction(i).name;
0514
0515 reactionIDs{counter}=modelSBML.reaction(i).id;
0516 reactionReversibility(counter)=modelSBML.reaction(i).reversible;
0517
0518
0519
0520
0521 if isfield(modelSBML.reaction(i),'fbc_lowerFluxBound')
0522 lb=modelSBML.reaction(i).fbc_lowerFluxBound;
0523 ub=modelSBML.reaction(i).fbc_upperFluxBound;
0524 for n=1:numel(parameter.value)
0525 lb=regexprep(lb,parameter.name(n),num2str(parameter.value{n}));
0526 ub=regexprep(ub,parameter.name(n),num2str(parameter.value{n}));
0527 end
0528 if isempty(lb)
0529 lb='-Inf';
0530 end
0531 if isempty(ub)
0532 ub='Inf';
0533 end
0534 reactionLB(counter)=str2num(lb);
0535 reactionUB(counter)=str2num(ub);
0536
0537 elseif isfield(modelSBML.reaction(i).kineticLaw,'parameter')
0538 reactionLB(counter)=modelSBML.reaction(i).kineticLaw.parameter(1).value;
0539 reactionUB(counter)=modelSBML.reaction(i).kineticLaw.parameter(2).value;
0540 reactionObjective(counter)=modelSBML.reaction(i).kineticLaw.parameter(3).value;
0541 else
0542 if reactionReversibility(counter)==true
0543 reactionLB(counter)=-inf;
0544 else
0545 reactionLB(counter)=0;
0546 end
0547 reactionUB(counter)=inf;
0548 reactionObjective(counter)=0;
0549 end
0550
0551
0552
0553 if isfield(modelSBML.reaction(i),'fbc_geneProductAssociation')
0554 if ~isempty(modelSBML.reaction(i).fbc_geneProductAssociation) && ~isempty(modelSBML.reaction(i).fbc_geneProductAssociation.fbc_association)
0555 grRules{counter}=modelSBML.reaction(i).fbc_geneProductAssociation.fbc_association.fbc_association;
0556 end
0557 elseif isfield(modelSBML.reaction(i),'notes')
0558
0559
0560
0561 if strfind(modelSBML.reaction(i).notes,'GENE_ASSOCIATION')
0562 geneAssociation=parseNote(modelSBML.reaction(i).notes,'GENE_ASSOCIATION');
0563 elseif strfind(modelSBML.reaction(i).notes,'GENE ASSOCIATION')
0564 geneAssociation=parseNote(modelSBML.reaction(i).notes,'GENE ASSOCIATION');
0565 else
0566 geneAssociation='';
0567 end
0568 if ~isempty(geneAssociation)
0569
0570
0571 grRules{counter}=geneAssociation;
0572 end
0573 end
0574 if isempty(grRules{counter}) && ~isempty(modelSBML.reaction(i).modifier)
0575 rules='';
0576 for j=1:numel(modelSBML.reaction(i).modifier)
0577 modifier=modelSBML.reaction(i).modifier(j).species;
0578 if ~isempty(modifier)
0579 if strcmpi(modifier(1:2),'E_')
0580 index=find(strcmp(modifier,geneIDs));
0581
0582
0583 if numel(index)~=1
0584 EM=['Could not get the gene association data from reaction ' reactionIDs{i}];
0585 dispEM(EM);
0586 end
0587 if ~isempty(rules)
0588 rules=[rules ' or (' geneNames{index} ')'];
0589 else
0590 rules=['(' geneNames{index} ')'];
0591 end
0592 elseif strcmp(modifier(1:2),'s_')
0593 index=find(strcmp(modifier,metaboliteIDs));
0594
0595
0596 if numel(index)~=1
0597 EM=['Could not get the gene association data from reaction ' reactionIDs{i}];
0598 dispEM(EM);
0599 end
0600 if ~isempty(rules)
0601 rules=[rules ' or (' metaboliteIDs{index} ')'];
0602 else
0603 rules=['(' metaboliteIDs{index} ')'];
0604 end
0605 else
0606
0607
0608
0609 index=find(strcmp(modifier,complexIDs));
0610 if numel(index)==1
0611 if ~isempty(rules)
0612 rules=[rules ' or (' strrep(complexNames{index},':',' and ') ')'];
0613 else
0614 rules=['(' strrep(complexNames{index},':',' and ') ')'];
0615 end
0616 else
0617
0618 EM=['Could not get the gene association data from reaction ' reactionIDs{i}];
0619 dispEM(EM);
0620 end
0621 end
0622 end
0623 end
0624 grRules{counter}=rules;
0625 grRulesFromModifier{counter}=rules;
0626 end
0627
0628
0629 if isfield(modelSBML.reaction(i),'compartment')
0630 if ~isempty(modelSBML.reaction(i).compartment)
0631 rxnComp=modelSBML.reaction(i).compartment;
0632 else
0633 rxnComp='';
0634 end
0635 elseif isfield(modelSBML.reaction(i),'notes')
0636 rxnComp=parseNote(modelSBML.reaction(i).notes,'COMPARTMENT');
0637 end
0638 if ~isempty(rxnComp)
0639
0640 [~, J]=ismember(rxnComp,compartmentIDs);
0641 rxnComps(counter)=J;
0642 end
0643
0644
0645
0646
0647 if isSBML2COBRA==false
0648 miriamStruct=parseMiriam(modelSBML.reaction(i).annotation);
0649 rxnMiriams{counter}=miriamStruct;
0650 if isfield(modelSBML.reaction(i),'notes')
0651 subsystems{counter,1}=cellstr(parseNote(modelSBML.reaction(i).notes,'SUBSYSTEM'));
0652 subsystems{counter,1}(cellfun('isempty',subsystems{counter,1})) = [];
0653 if strfind(modelSBML.reaction(i).notes,'Confidence Level')
0654 rxnconfidencescores(counter)=str2num(parseNote(modelSBML.reaction(i).notes,'Confidence Level'));
0655 end
0656 rxnreferences{counter,1}=parseNote(modelSBML.reaction(i).notes,'AUTHORS');
0657 rxnnotes{counter,1}=parseNote(modelSBML.reaction(i).notes,'NOTES');
0658 end
0659 end
0660
0661
0662 if isfield(modelSBML.reaction(i),'sboTerm') && ~(modelSBML.reaction(i).sboTerm==-1)
0663 rxnMiriams{counter} = addSBOtoMiriam(rxnMiriams{counter}, modelSBML.reaction(i).sboTerm);
0664 end
0665
0666
0667 eccode='';
0668 if ~isempty(modelSBML.reaction(i).annotation)
0669 if strfind(modelSBML.reaction(i).annotation,'urn:miriam:ec-code')
0670 eccode=parseAnnotation(modelSBML.reaction(i).annotation,'urn:miriam:',':','ec-code');
0671 elseif strfind(modelSBML.reaction(i).annotation,'http://identifiers.org/ec-code')
0672 eccode=parseAnnotation(modelSBML.reaction(i).annotation,'http://identifiers.org/','/','ec-code');
0673 elseif strfind(modelSBML.reaction(i).annotation,'https://identifiers.org/ec-code')
0674 eccode=parseAnnotation(modelSBML.reaction(i).annotation,'https://identifiers.org/','/','ec-code');
0675 end
0676 elseif isfield(modelSBML.reaction(i),'notes')
0677 if strfind(modelSBML.reaction(i).notes,'EC Number')
0678 eccode=[eccode parseNote(modelSBML.reaction(i).notes,'EC Number')];
0679 elseif strfind(modelSBML.reaction(i).notes,'PROTEIN_CLASS')
0680 eccode=[eccode parseNote(modelSBML.reaction(i).notes,'PROTEIN_CLASS')];
0681 end
0682 end
0683 eccodes{counter}=eccode;
0684
0685
0686 for j=1:numel(modelSBML.reaction(i).reactant)
0687
0688
0689 metIndex=find(strcmp(modelSBML.reaction(i).reactant(j).species,metaboliteIDs),1);
0690 if isempty(metIndex)
0691 EM=['Could not find metabolite ' modelSBML.reaction(i).reactant(j).species ' in reaction ' reactionIDs{counter}];
0692 dispEM(EM);
0693 end
0694 S(metIndex,counter)=S(metIndex,counter)+modelSBML.reaction(i).reactant(j).stoichiometry*-1;
0695 end
0696
0697
0698 for j=1:numel(modelSBML.reaction(i).product)
0699
0700 metIndex=find(strcmp(modelSBML.reaction(i).product(j).species,metaboliteIDs),1);
0701 if isempty(metIndex)
0702 EM=['Could not find metabolite ' modelSBML.reaction(i).product(j).species ' in reaction ' reactionIDs{counter}];
0703 dispEM(EM);
0704 end
0705 S(metIndex,counter)=S(metIndex,counter)+modelSBML.reaction(i).product(j).stoichiometry;
0706 end
0707 end
0708
0709
0710
0711 if isfield(modelSBML, 'fbc_activeObjective')
0712 obj=modelSBML.fbc_activeObjective;
0713 for i=1:numel(modelSBML.fbc_objective)
0714 if strcmp(obj,modelSBML.fbc_objective(i).fbc_id)
0715 if ~isempty(modelSBML.fbc_objective(i).fbc_fluxObjective)
0716 rxn=modelSBML.fbc_objective(i).fbc_fluxObjective.fbc_reaction;
0717 rxn=regexprep(rxn,'^R_','');
0718 idx=find(ismember(reactionIDs,rxn));
0719 reactionObjective(idx)=modelSBML.fbc_objective(i).fbc_fluxObjective.fbc_coefficient;
0720 end
0721 end
0722 end
0723 end
0724
0725
0726 if isfield(modelSBML,'groups_group')
0727 for i=1:numel(modelSBML.groups_group)
0728 groupreactions={modelSBML.groups_group(i).groups_member(:).groups_idRef};
0729 groupreactions=regexprep(groupreactions,'^R_','');
0730 [~, idx] = ismember(groupreactions, reactionIDs);
0731 if any(idx)
0732 for j=1:numel(idx)
0733 if isempty(subsystems{idx(j)})
0734 subsystems{idx(j)} = {modelSBML.groups_group(i).groups_name};
0735 else
0736 subsystems{idx(j)} = horzcat(subsystems{idx(j)}, modelSBML.groups_group(i).groups_name);
0737 end
0738 end
0739 end
0740 end
0741 end
0742
0743
0744 reactionNames=reactionNames(1:counter);
0745 reactionIDs=reactionIDs(1:counter);
0746 subsystems=subsystems(1:counter);
0747 eccodes=eccodes(1:counter);
0748 rxnconfidencescores=rxnconfidencescores(1:counter);
0749 rxnreferences=rxnreferences(1:counter);
0750 rxnnotes=rxnnotes(1:counter);
0751 grRules=grRules(1:counter);
0752 rxnMiriams=rxnMiriams(1:counter);
0753 reactionReversibility=reactionReversibility(1:counter);
0754 reactionUB=reactionUB(1:counter);
0755 reactionLB=reactionLB(1:counter);
0756 reactionObjective=reactionObjective(1:counter);
0757 S=S(:,1:counter);
0758
0759 model.name=modelSBML.name;
0760 model.id=regexprep(modelSBML.id,'^M_','');
0761 model.rxns=reactionIDs;
0762 model.mets=metaboliteIDs;
0763 model.S=sparse(S);
0764 model.lb=reactionLB;
0765 model.ub=reactionUB;
0766 model.rev=reactionReversibility;
0767 model.c=reactionObjective;
0768 model.b=zeros(numel(metaboliteIDs),1);
0769 model.comps=compartmentIDs;
0770 model.compNames=compartmentNames;
0771 model.rxnConfidenceScores=rxnconfidencescores;
0772 model.rxnReferences=rxnreferences;
0773 model.rxnNotes=rxnnotes;
0774
0775
0776
0777 if isfield(modelSBML,'annotation')
0778 endString='</';
0779 I=strfind(modelSBML.annotation,endString);
0780 J=strfind(modelSBML.annotation,'<vCard:Family>');
0781 if any(J)
0782 model.annotation.familyName=modelSBML.annotation(J(1)+14:I(find(I>J(1),1))-1);
0783 end
0784 J=strfind(modelSBML.annotation,'<vCard:Given>');
0785 if any(J)
0786 model.annotation.givenName=modelSBML.annotation(J(1)+13:I(find(I>J(1),1))-1);
0787 end
0788 J=strfind(modelSBML.annotation,'<vCard:EMAIL>');
0789 if any(J)
0790 model.annotation.email=modelSBML.annotation(J(1)+13:I(find(I>J(1),1))-1);
0791 end
0792 J=strfind(modelSBML.annotation,'<vCard:Orgname>');
0793 if any(J)
0794 model.annotation.organization=modelSBML.annotation(J(1)+15:I(find(I>J(1),1))-1);
0795 end
0796 endString='"/>';
0797 I=strfind(modelSBML.annotation,endString);
0798 if strfind(modelSBML.annotation,'"urn:miriam:')
0799 J=strfind(modelSBML.annotation,'"urn:miriam:');
0800 if any(J)
0801 model.annotation.taxonomy=modelSBML.annotation(J+12:I(find(I>J,1))-1);
0802 end
0803 else
0804 J=strfind(modelSBML.annotation,'"http://identifiers.org/');
0805 if any(J)
0806 model.annotation.taxonomy=modelSBML.annotation(J+24:I(find(I>J,1))-1);
0807 else
0808 J=strfind(modelSBML.annotation,'"https://identifiers.org/');
0809 if any(J)
0810 model.annotation.taxonomy=modelSBML.annotation(J+25:I(find(I>J,1))-1);
0811 end
0812 end
0813 end
0814 end
0815 if isfield(modelSBML,'notes')
0816 startString=strfind(modelSBML.notes,'xhtml">');
0817 endString=strfind(modelSBML.notes,'</body>');
0818 if any(startString) && any(endString)
0819 model.annotation.note=modelSBML.notes(startString+7:endString-1);
0820 model.annotation.note=regexprep(model.annotation.note,'<p>|</p>','');
0821 model.annotation.note=strtrim(model.annotation.note);
0822 end
0823 end
0824
0825 if any(~cellfun(@isempty,compartmentOutside))
0826 model.compOutside=compartmentOutside;
0827 end
0828
0829 model.rxnNames=reactionNames;
0830 model.metNames=metaboliteNames;
0831
0832
0833 [~, J]=ismember(metaboliteCompartments,model.comps);
0834 model.metComps=J;
0835
0836
0837 if ~isempty(geneNames)
0838
0839
0840
0841
0842
0843 if all(cellfun(@isempty,strfind(grRules,geneNames{1})))
0844 geneShortNames=geneNames;
0845
0846 geneShortNames=regexprep(geneShortNames,' \[.+$','');
0847
0848
0849
0850
0851 grRulesFromModifier=regexprep(regexprep(grRulesFromModifier,'\[|\]','_'),regexprep(geneNames,'\[|\]','_'),geneIDs);
0852 grRules=regexprep(regexprep(grRules,'\[|\]','_'),regexprep(geneNames,'\[|\]','_'),geneIDs);
0853
0854
0855
0856
0857
0858 geneShortNames=vertcat(geneShortNames,metaboliteNames);
0859 geneIDs=vertcat(geneIDs,metaboliteIDs);
0860 geneSystNames=extractMiriam(vertcat(geneMiriams,metaboliteMiriams),'kegg.genes');
0861 geneCompartments=vertcat(geneCompartments,metaboliteCompartments);
0862 geneMiriams=vertcat(geneMiriams,metaboliteMiriams);
0863
0864
0865
0866 geneShortNames=geneShortNames(~cellfun('isempty',geneSystNames));
0867 geneIDs=geneIDs(~cellfun('isempty',geneSystNames));
0868 geneSystNames=geneSystNames(~cellfun('isempty',geneSystNames));
0869 geneCompartments=geneCompartments(~cellfun('isempty',geneSystNames));
0870 geneMiriams=geneMiriams(~cellfun('isempty',geneSystNames));
0871
0872
0873 geneNames=geneIDs;
0874 [~, Indx] = sort(cellfun('size', geneSystNames, 2), 'descend');
0875 geneIDs = geneIDs(Indx);
0876 geneSystNames = geneSystNames(Indx);
0877 for i=1:numel(geneSystNames)
0878 for j=1:numel(grRules)
0879 if strfind(grRules{j},geneSystNames{i})
0880 if ~isempty(grRules{j})
0881 if sum(ismember(geneSystNames,geneSystNames{i}))==1
0882 grRules{j}=regexprep(grRules{j},geneSystNames{i},geneIDs{i});
0883 elseif sum(ismember(geneSystNames,geneSystNames{i}))>1
0884 counter=0;
0885 ovrlpIDs=geneIDs(ismember(geneSystNames,geneSystNames{i}));
0886 for k=1:numel(ovrlpIDs)
0887 if strfind(grRulesFromModifier{j},ovrlpIDs{k})
0888 counter=counter+1;
0889 grRules{j}=regexprep(grRules{j},geneSystNames{i},ovrlpIDs{k});
0890 end
0891 if counter>1
0892 EM=['Gene association is ambiguous for reaction ' modelSBML.reaction(j).id];
0893 dispEM(EM);
0894 end
0895 end
0896 end
0897 end
0898 end
0899 end
0900 end
0901 end
0902 model.genes=geneNames;
0903 model.grRules=grRules;
0904 [grRules,rxnGeneMat] = standardizeGrRules(model,true);
0905 model.grRules = grRules;
0906 model.rxnGeneMat = rxnGeneMat;
0907
0908
0909 [~, J]=ismember(geneCompartments,model.comps);
0910 model.geneComps=J;
0911 else
0912 if ~all(cellfun(@isempty,grRules))
0913
0914
0915 if isfield(modelSBML,'fbc_geneProduct')
0916 genes={modelSBML.fbc_geneProduct.fbc_id};
0917
0918
0919
0920 if isempty(geneMiriams)
0921 geneMiriams = cell(numel(genes),1);
0922 if isfield(modelSBML.fbc_geneProduct,'sboTerm') && numel(unique([modelSBML.fbc_geneProduct.sboTerm])) == 1
0923
0924 modelSBML.fbc_geneProduct = rmfield(modelSBML.fbc_geneProduct,'sboTerm');
0925 end
0926 for i = 1:numel(genes)
0927 geneMiriams{i}=parseMiriam(modelSBML.fbc_geneProduct(i).annotation);
0928 if isfield(modelSBML.fbc_geneProduct(i),'sboTerm') && ~(modelSBML.fbc_geneProduct(i).sboTerm==-1)
0929 geneMiriams{i} = addSBOtoMiriam(geneMiriams{i},modelSBML.fbc_geneProduct(i).sboTerm);
0930 end
0931 end
0932 end
0933 else
0934 genes=getGeneList(grRules);
0935 end
0936 if strcmpi(genes{1}(1:2),'G_')
0937 genes=regexprep(genes,'^G_','');
0938 grRules=regexprep(grRules,'^G_','');
0939 grRules=regexprep(grRules,'\(G_','(');
0940 grRules=regexprep(grRules,' G_',' ');
0941 end
0942 model.genes=genes;
0943 model.grRules=grRules;
0944 [grRules,rxnGeneMat] = standardizeGrRules(model,true);
0945 model.grRules = grRules;
0946 model.rxnGeneMat = rxnGeneMat;
0947 end
0948 end
0949
0950 if all(cellfun(@isempty,geneShortNames))
0951 if isfield(modelSBML,'fbc_geneProduct')
0952 for i=1:numel(genes)
0953 if ~isempty(modelSBML.fbc_geneProduct(i).fbc_label)
0954 geneShortNames{i,1}=modelSBML.fbc_geneProduct(i).fbc_label;
0955 elseif ~isempty(modelSBML.fbc_geneProduct(i).fbc_name)
0956 geneShortNames{i,1}=modelSBML.fbc_geneProduct(i).fbc_name;
0957 else
0958 geneShortNames{i,1}='';
0959 end
0960 end
0961 end
0962 end
0963
0964
0965 if any(~cellfun(@isempty,metaboliteInChI))
0966 model.inchis=metaboliteInChI;
0967 end
0968
0969
0970 if any(~cellfun(@isempty,metaboliteFormula))
0971 model.metFormulas=metaboliteFormula;
0972 end
0973
0974
0975 if ~isempty(metaboliteCharges)
0976 model.metCharges=metaboliteCharges;
0977 end
0978
0979
0980 if any(~cellfun(@isempty,geneShortNames))
0981 model.geneShortNames=geneShortNames;
0982 end
0983
0984
0985 if any(~cellfun(@isempty,compartmentMiriams))
0986 model.compMiriams=compartmentMiriams;
0987 end
0988
0989
0990 if any(~cellfun(@isempty,metaboliteMiriams))
0991 model.metMiriams=metaboliteMiriams;
0992 end
0993
0994
0995 if any(~cellfun(@isempty,subsystems))
0996 model.subSystems=subsystems;
0997 end
0998 if any(rxnComps)
0999 if all(rxnComps)
1000 model.rxnComps=rxnComps;
1001 else
1002 if supressWarnings==false
1003 EM='The compartments for the following reactions could not be matched. Ignoring reaction compartment information';
1004 dispEM(EM,false,model.rxns(rxnComps==0));
1005 end
1006 end
1007 end
1008
1009
1010 if any(~cellfun(@isempty,eccodes))
1011 model.eccodes=eccodes;
1012 end
1013
1014
1015 if any(~cellfun(@isempty,rxnMiriams))
1016 model.rxnMiriams=rxnMiriams;
1017 end
1018
1019
1020 if any(~cellfun(@isempty,geneMiriams))
1021 model.geneMiriams=geneMiriams;
1022 end
1023
1024 model.unconstrained=metaboliteUnconstrained;
1025
1026
1027
1028 model.rxns=regexprep(model.rxns,'__([0-9]+)__','${char(str2num($1))}');
1029 model.mets=regexprep(model.mets,'__([0-9]+)__','${char(str2num($1))}');
1030 model.comps=regexprep(model.comps,'__([0-9]+)__','${char(str2num($1))}');
1031 model.grRules=regexprep(model.grRules,'__([0-9]+)__','${char(str2num($1))}');
1032 model.genes=regexprep(model.genes,'__([0-9]+)__','${char(str2num($1))}');
1033 model.id=regexprep(model.id,'__([0-9]+)__','${char(str2num($1))}');
1034
1035
1036 if isempty(model.annotation)
1037 model=rmfield(model,'annotation');
1038 end
1039 if isempty(model.compOutside)
1040 model=rmfield(model,'compOutside');
1041 end
1042 if isempty(model.compMiriams)
1043 model=rmfield(model,'compMiriams');
1044 end
1045 if isempty(model.rxnComps)
1046 model=rmfield(model,'rxnComps');
1047 end
1048 if isempty(model.grRules)
1049 model=rmfield(model,'grRules');
1050 end
1051 if isempty(model.rxnGeneMat)
1052 model=rmfield(model,'rxnGeneMat');
1053 end
1054 if isempty(model.subSystems)
1055 model=rmfield(model,'subSystems');
1056 else
1057 model.subSystems(cellfun(@isempty,subsystems))={{''}};
1058 end
1059 if isempty(model.eccodes)
1060 model=rmfield(model,'eccodes');
1061 end
1062 if isempty(model.rxnMiriams)
1063 model=rmfield(model,'rxnMiriams');
1064 end
1065 if cellfun(@isempty,model.rxnNotes)
1066 model=rmfield(model,'rxnNotes');
1067 end
1068 if cellfun(@isempty,model.rxnReferences)
1069 model=rmfield(model,'rxnReferences');
1070 end
1071 if isempty(model.rxnConfidenceScores) || all(isnan(model.rxnConfidenceScores))
1072 model=rmfield(model,'rxnConfidenceScores');
1073 end
1074 if isempty(model.genes)
1075 model=rmfield(model,'genes');
1076 elseif isrow(model.genes)
1077 model.genes=transpose(model.genes);
1078 end
1079 if isempty(model.geneComps)
1080 model=rmfield(model,'geneComps');
1081 end
1082 if isempty(model.geneMiriams)
1083 model=rmfield(model,'geneMiriams');
1084 end
1085 if isempty(model.geneShortNames)
1086 model=rmfield(model,'geneShortNames');
1087 end
1088 if isempty(model.inchis)
1089 model=rmfield(model,'inchis');
1090 end
1091 if isempty(model.metFormulas)
1092 model=rmfield(model,'metFormulas');
1093 end
1094 if isempty(model.metMiriams)
1095 model=rmfield(model,'metMiriams');
1096 end
1097 if ~any(model.metCharges)
1098 model=rmfield(model,'metCharges');
1099 end
1100
1101
1102 if ~isfield(model,'genes') && isfield(model,'grRules')
1103 model=rmfield(model,'grRules');
1104 end
1105
1106
1107 if supressWarnings==false
1108 checkModelStruct(model,false);
1109 end
1110
1111 if removeExcMets==true
1112 model=simplifyModel(model);
1113 end
1114 end
1115
1116 function matchGenes=getGeneList(grRules)
1117
1118
1119
1120
1121 genes=strrep(grRules,'(','');
1122 genes=strrep(genes,')','');
1123 genes=strrep(genes,' or ',' ');
1124 genes=strrep(genes,' and ',' ');
1125 genes=strrep(genes,' OR ',' ');
1126 genes=strrep(genes,' AND ',' ');
1127 genes=regexp(genes,' ','split');
1128
1129 allNames={};
1130 for i=1:numel(genes)
1131 allNames=[allNames genes{i}];
1132 end
1133 matchGenes=unique(allNames)';
1134
1135
1136 if isempty(matchGenes{1})
1137 matchGenes(1)=[];
1138 end
1139 end
1140
1141 function fieldContent=parseNote(searchString,fieldName)
1142
1143
1144
1145 fieldContent='';
1146
1147 if strfind(searchString,fieldName)
1148 [~,targetString] = regexp(searchString,['<p>' fieldName '.*?</p>'],'tokens','match');
1149 targetString=regexprep(targetString,'<p>|</p>','');
1150 targetString=regexprep(targetString,[fieldName, ':'],'');
1151 for i=1:numel(targetString)
1152 fieldContent=[fieldContent ';' strtrim(targetString{1,i})];
1153 end
1154 fieldContent=regexprep(fieldContent,'^;|;$','');
1155 else
1156 fieldContent='';
1157 end
1158 end
1159
1160 function fieldContent=parseAnnotation(searchString,startString,midString,fieldName)
1161
1162 fieldContent='';
1163
1164
1165
1166 searchString=regexprep(searchString,'" />','"/>');
1167 [~,targetString] = regexp(searchString,['<rdf:li rdf:resource="' startString fieldName midString '.*?"/>'],'tokens','match');
1168 targetString=regexprep(targetString,'<rdf:li rdf:resource="|"/>','');
1169 targetString=regexprep(targetString,startString,'');
1170 targetString=regexprep(targetString,[fieldName midString],'');
1171
1172 for i=1:numel(targetString)
1173 fieldContent=[fieldContent ';' strtrim(targetString{1,i})];
1174 end
1175
1176 fieldContent=regexprep(fieldContent,'^;|;$','');
1177 end
1178
1179 function miriamStruct=parseMiriam(searchString)
1180
1181
1182
1183 if strfind(searchString,'urn:miriam:')
1184 startString='urn:miriam:';
1185 midString=':';
1186 elseif strfind(searchString,'http://identifiers.org/')
1187 startString='http://identifiers.org/';
1188 midString='/';
1189 elseif strfind(searchString,'https://identifiers.org/')
1190 startString='https://identifiers.org/';
1191 midString='/';
1192 else
1193 miriamStruct=[];
1194 return;
1195 end
1196
1197 miriamStruct=[];
1198
1199 searchString=regexprep(searchString,'" />','"/>');
1200 [~,targetString] = regexp(searchString,'<rdf:li rdf:resource=".*?"/>','tokens','match');
1201 targetString=regexprep(targetString,'<rdf:li rdf:resource="|"/>','');
1202 targetString=regexprep(targetString,startString,'');
1203 targetString=regexprep(targetString,midString,'/','once');
1204
1205 counter=0;
1206 for i=1:numel(targetString)
1207 if isempty(regexp(targetString{1,i},'inchi|ec-code', 'once'))
1208 counter=counter+1;
1209 miriamStruct.name{counter,1} = regexprep(targetString{1,i},'/.+','','once');
1210 miriamStruct.value{counter,1} = regexprep(targetString{1,i},[miriamStruct.name{counter,1} '/'],'','once');
1211 miriamStruct.name{counter,1} = regexprep(miriamStruct.name{counter,1},'^obo\.','');
1212 end
1213 end
1214 end
1215
1216 function miriam = addSBOtoMiriam(miriam,sboTerm)
1217
1218
1219 sboTerm = {['SBO:' sprintf('%07u',sboTerm)]};
1220 if isempty(miriam)
1221 miriam.name = {'sbo'};
1222 miriam.value = sboTerm;
1223 elseif any(strcmp('sbo',miriam.name))
1224 currSbo = strcmp('sbo',miriam.name);
1225 miriam.value(currSbo) = sboTerm;
1226 else
1227 miriam.name(end+1) = {'sbo'};
1228 miriam.value(end+1) = sboTerm;
1229 end
1230 end