0001 function model=getRxnsFromMetaCyc(metacycPath,keepTransportRxns,keepUnbalanced,keepUndetermined)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071 if nargin<1
0072 ravenPath=findRAVENroot();
0073 metacycPath=fullfile(ravenPath,'external','metacyc');
0074 else
0075 metacycPath=char(metacycPath);
0076 end
0077 if nargin<2
0078 keepTransportRxns=false;
0079 end
0080 if nargin<3
0081 keepUnbalanced=false;
0082 end
0083 if nargin<4
0084 keepUndetermined=false;
0085 end
0086
0087
0088
0089 rxnsFile=fullfile(metacycPath,'metaCycRxns.mat');
0090 metaCycRxnFile='reactions.dat';
0091 metaCycPwyFile='pathway-links.dat';
0092
0093 if exist(rxnsFile, 'file')
0094 fprintf(['Importing MetaCyc reactions from ' strrep(rxnsFile,'\','/') '... ']);
0095 load(rxnsFile);
0096 fprintf('done\n');
0097 else
0098 fprintf(['Cannot locate ' strrep(rxnsFile,'\','/') '\nNow try to generate it from local MetaCyc data files...\n']);
0099 if ~isfile(fullfile(metacycPath,metaCycRxnFile)) || ~isfile(fullfile(metacycPath,metaCycPwyFile))
0100 EM=fprintf(['The files of reactions or pathways cannot be located, and should be downloaded from MetaCyc.\n']);
0101 dispEM(EM);
0102 else
0103 metaCycRxns.id='MetaCyc';
0104 metaCycRxns.name='Automatically generated from MetaCyc database';
0105
0106
0107 fid = fopen(fullfile(metacycPath,metaCycPwyFile), 'r');
0108
0109
0110 pwyCounter=0;
0111 pwys=cell(10000,1);
0112 pwyNames=cell(10000,1);
0113
0114
0115 while 1
0116
0117 tline = fgetl(fid);
0118
0119
0120 if ~ischar(tline)
0121 break;
0122 end
0123
0124
0125 if ~strcmp(tline(1),'#')
0126 pwyCounter=pwyCounter+1;
0127 vars = regexp(tline, '\t', 'split');
0128 pwys{pwyCounter}=vars{1};
0129 pwyNames{pwyCounter}=vars{2};
0130
0131
0132 pwyNames{pwyCounter}=removeHTMLcodes(pwyNames{pwyCounter});
0133
0134 end
0135 end
0136 fclose(fid);
0137 pwys=pwys(1:pwyCounter);
0138 pwyNames=pwyNames(1:pwyCounter);
0139
0140
0141 metaCycRxns.rxns=cell(50000,1);
0142 metaCycRxns.rxnNames=cell(50000,1);
0143 metaCycRxns.eccodes=cell(50000,1);
0144 metaCycRxns.subSystems=cell(50000,1);
0145 metaCycRxns.pwys=cell(50000,1);
0146 metaCycRxns.rxnMiriams=cell(50000,1);
0147 metaCycRxns.rxnReferences=cell(50000,1);
0148 metaCycRxns.rev=ones(50000,1);
0149
0150 rxnLinks.metacyc=cell(10000,1);
0151 rxnLinks.kegg=cell(10000,1);
0152 rxnLinks.check=cell(10000,1);
0153 isSpontaneous=false(10000,1);
0154 UNBALANCED=false(10000,1);
0155 UNDETERMINED=false(10000,1);
0156 TRANSPORT={};
0157
0158 metaCycRxns.equations=cell(50000,1);
0159 left=cell(50000,1);
0160 right=cell(50000,1);
0161
0162
0163
0164 fid = fopen(fullfile(metacycPath,metaCycRxnFile), 'r');
0165
0166
0167 rxnCounter=0;
0168 addSpont=false;
0169 dbLinkCounter=0;
0170
0171
0172 while 1
0173
0174 tline = fgetl(fid);
0175
0176
0177 if ~ischar(tline)
0178 break;
0179 end
0180
0181
0182 if numel(tline)>11 && strcmp(tline(1:11),'# Version: ')
0183 version=tline(12:end);
0184 end
0185
0186
0187 if numel(tline)>12 && strcmp(tline(1:12),'UNIQUE-ID - ')
0188 rxnCounter=rxnCounter+1;
0189 nPwys=0;
0190
0191
0192 metaCycRxns.rxnNames{rxnCounter}='';
0193 metaCycRxns.eccodes{rxnCounter}='';
0194 metaCycRxns.subSystems{rxnCounter}='';
0195 metaCycRxns.pwys{rxnCounter}='';
0196 metaCycRxns.equations{rxnCounter}='';
0197 metaCycRxns.rxnReferences{rxnCounter}='';
0198 reverse=0;
0199
0200
0201 left{rxnCounter}='';
0202 right{rxnCounter}='';
0203 coefficient='';
0204 templeft='';
0205 tempright='';
0206
0207
0208
0209 metaCycRxns.rxns{rxnCounter}=tline(13:end);
0210
0211 end
0212
0213
0214 if numel(tline)>14 && strcmp(tline(1:14),'COMMON-NAME - ')
0215 metaCycRxns.rxnNames{rxnCounter}=tline(15:end);
0216
0217
0218 metaCycRxns.rxnNames{rxnCounter}=removeHTMLcodes(metaCycRxns.rxnNames{rxnCounter});
0219 end
0220
0221
0222 if numel(tline)>15 && strcmp(tline(1:15),'EC-NUMBER - EC-')
0223 if isempty(metaCycRxns.eccodes{rxnCounter})
0224 metaCycRxns.eccodes{rxnCounter}=strcat('ec-code/',tline(16:end));
0225 else
0226 metaCycRxns.eccodes{rxnCounter}=strcat(metaCycRxns.eccodes{rxnCounter},';ec-code/',tline(16:end));
0227 end
0228 end
0229 if numel(tline)>16 && strcmp(tline(1:16),'EC-NUMBER - |EC-')
0230 if isempty(metaCycRxns.eccodes{rxnCounter})
0231 metaCycRxns.eccodes{rxnCounter}=strcat('ec-code/',tline(17:end-1));
0232 else
0233 metaCycRxns.eccodes{rxnCounter}=strcat(metaCycRxns.eccodes{rxnCounter},';ec-code/',tline(17:end-1));
0234 end
0235 end
0236
0237
0238 if numel(tline)>13 && strcmp(tline(1:13),'IN-PATHWAY - ')
0239 if isempty(metaCycRxns.pwys{rxnCounter})
0240 metaCycRxns.pwys{rxnCounter}=tline(14:end);
0241 else
0242 metaCycRxns.pwys{rxnCounter}=strcat(metaCycRxns.pwys{rxnCounter},';',tline(14:end));
0243 end
0244
0245 [x, y]=ismember(tline(14:end),pwys);
0246 if x
0247 metaCycRxns.subSystems{rxnCounter,1}{1,numel(metaCycRxns.subSystems{rxnCounter,1})+1}=pwyNames{y};
0248 end
0249 end
0250
0251
0252 if numel(tline)>12 && strcmp(tline(1:12),'CITATIONS - ')
0253 if isempty(metaCycRxns.rxnReferences{rxnCounter})
0254 metaCycRxns.rxnReferences{rxnCounter}=strcat('pubmed/',tline(13:end));
0255 else
0256 metaCycRxns.rxnReferences{rxnCounter}=strcat(metaCycRxns.rxnReferences{rxnCounter},';pubmed/',tline(13:end));
0257 end
0258 end
0259
0260
0261 if numel(tline)>11 && strcmp(tline(1:11),'DBLINKS - (')
0262 dblink=tline(12:end);
0263
0264 if strcmp(dblink(1:12),'LIGAND-RXN "')
0265 dblink=dblink(13:end);
0266 s=strfind(dblink,'"');
0267 if any(s)
0268 dblink=dblink(1:s-1);
0269 end
0270
0271 if isstruct(metaCycRxns.rxnMiriams{rxnCounter})
0272 addToIndex=numel(metaCycRxns.rxnMiriams{rxnCounter}.name)+1;
0273 else
0274 addToIndex=1;
0275 end
0276 tempStruct=metaCycRxns.rxnMiriams{rxnCounter};
0277 tempStruct.name{addToIndex,1}='kegg.reaction';
0278 tempStruct.value{addToIndex,1}=dblink;
0279 metaCycRxns.rxnMiriams{rxnCounter}=tempStruct;
0280
0281
0282 dbLinkCounter=dbLinkCounter+1;
0283 rxnLinks.metacyc{dbLinkCounter}=metaCycRxns.rxns{rxnCounter};
0284 rxnLinks.kegg{dbLinkCounter}=dblink;
0285 rxnLinks.check{dbLinkCounter}=strcat(metaCycRxns.rxns{rxnCounter},dblink);
0286 end
0287
0288
0289 if strcmp(dblink(1:6),'RHEA "')
0290 dblink=dblink(7:end);
0291 s=strfind(dblink,'"');
0292 if any(s)
0293 dblink=dblink(1:s-1);
0294 end
0295
0296 if isstruct(metaCycRxns.rxnMiriams{rxnCounter})
0297 addToIndex=numel(metaCycRxns.rxnMiriams{rxnCounter}.name)+1;
0298 else
0299 addToIndex=1;
0300 end
0301 tempStruct=metaCycRxns.rxnMiriams{rxnCounter};
0302 tempStruct.name{addToIndex,1}='rhea';
0303 tempStruct.value{addToIndex,1}=dblink;
0304 metaCycRxns.rxnMiriams{rxnCounter}=tempStruct;
0305 end
0306 end
0307
0308 if numel(tline)>21 && strcmp(tline(1:21),'REACTION-DIRECTION - ')
0309 rxnDirection=tline(22:end);
0310 switch(rxnDirection)
0311 case 'IRREVERSIBLE-LEFT-TO-RIGHT'
0312 metaCycRxns.rev(rxnCounter,1)=0;
0313 case 'LEFT-TO-RIGHT'
0314 metaCycRxns.rev(rxnCounter,1)=0;
0315 case 'PHYSIOL-LEFT-TO-RIGHT'
0316 metaCycRxns.rev(rxnCounter,1)=0;
0317 case 'IRREVERSIBLE-RIGHT-TO-LEFT'
0318 metaCycRxns.rev(rxnCounter,1)=0;
0319 reverse=1;
0320 case 'RIGHT-TO-LEFT'
0321 metaCycRxns.rev(rxnCounter,1)=0;
0322 reverse=1;
0323 case 'PHYSIOL-RIGHT-TO-LEFT'
0324 metaCycRxns.rev(rxnCounter,1)=0;
0325 reverse=1;
0326 end
0327 end
0328
0329
0330 if strcmp(tline,'TYPES - Transport-Reactions')
0331 TRANSPORT=[TRANSPORT;metaCycRxns.rxns{rxnCounter}];
0332 end
0333
0334
0335 if strcmp(tline,'SPONTANEOUS? - T')
0336
0337 isSpontaneous(rxnCounter)=true;
0338 end
0339
0340
0341 if numel(tline)>27 && strcmp(tline(1:27),'REACTION-BALANCE-STATUS - :')
0342 if isequal(tline(28:35), 'UNBALANC')
0343 UNBALANCED(rxnCounter)=true;
0344 elseif isequal(tline(28:35), 'UNDETERM')
0345 UNDETERMINED(rxnCounter)=true;
0346 end
0347 end
0348
0349
0350 if numel(tline)>7 && strcmp(tline(1:7),'LEFT - ')
0351 if strcmp(left{rxnCounter},'')
0352 if strcmp(templeft,'')
0353 templeft=tline(8:end);
0354 else
0355 if strcmp(coefficient,'')
0356 left{rxnCounter}=templeft;
0357 else
0358 left{rxnCounter}=strcat(coefficient,32,templeft);
0359 coefficient='';
0360 end
0361 templeft=tline(8:end);
0362 end
0363 else
0364 if strcmp(coefficient,'')
0365 left{rxnCounter}=strcat(left{rxnCounter},' +',32,templeft);
0366 else
0367 left{rxnCounter}=strcat(left{rxnCounter},' +',32,coefficient,32,templeft);
0368 coefficient='';
0369 end
0370 templeft=tline(8:end);
0371 end
0372 end
0373
0374
0375 if numel(tline)>8 && strcmp(tline(1:8),'RIGHT - ')
0376 if strcmp(right{rxnCounter},'')
0377 if strcmp(tempright,'')
0378
0379
0380
0381 if strcmp(coefficient,'')
0382 if strcmp(left{rxnCounter},'')
0383 left{rxnCounter}=templeft;
0384 else
0385 left{rxnCounter}=strcat(left{rxnCounter},' +',32,templeft);
0386 end
0387
0388 else
0389 if strcmp(left{rxnCounter},'')
0390 left{rxnCounter}=strcat(coefficient,32,templeft);
0391 else
0392 left{rxnCounter}=strcat(left{rxnCounter},' +',32,coefficient,32,templeft);
0393 end
0394 coefficient='';
0395 end
0396
0397
0398 tempright=tline(9:end);
0399 else
0400 if strcmp(coefficient,'')
0401 right{rxnCounter}=tempright;
0402 else
0403 right{rxnCounter}=strcat(coefficient,32,tempright);
0404 coefficient='';
0405 end
0406 tempright=tline(9:end);
0407 end
0408 else
0409 if strcmp(coefficient,'')
0410 right{rxnCounter}=strcat(right{rxnCounter},' +',32,tempright);
0411 else
0412 right{rxnCounter}=strcat(right{rxnCounter},' +',32,coefficient,32,tempright);
0413 coefficient='';
0414 end
0415 tempright=tline(9:end);
0416 end
0417
0418 end
0419
0420 if numel(tline)>15 && strcmp(tline(1:15),'^COEFFICIENT - ')
0421 coefficient=tline(16:end);
0422 end
0423
0424
0425 if strcmp(tline,'//')
0426
0427
0428
0429 if strcmp(coefficient,'')
0430 if strcmp(right{rxnCounter},'')
0431 right{rxnCounter}=tempright;
0432 else
0433 right{rxnCounter}=strcat(right{rxnCounter},' +',32,tempright);
0434 end
0435
0436 else
0437 if strcmp(right{rxnCounter},'')
0438 right{rxnCounter}=strcat(coefficient,32,tempright);
0439 else
0440 right{rxnCounter}=strcat(right{rxnCounter},' +',32,coefficient,32,tempright);
0441 end
0442 coefficient='';
0443 end
0444
0445
0446
0447 if metaCycRxns.rev(rxnCounter,1)
0448 symbol = ' <=>';
0449 else
0450 symbol = ' =>';
0451 end
0452
0453 if reverse
0454 metaCycRxns.equations{rxnCounter}=strcat(right{rxnCounter},symbol,32,left{rxnCounter});
0455 else
0456 metaCycRxns.equations{rxnCounter}=strcat(left{rxnCounter},symbol,32,right{rxnCounter});
0457 end
0458
0459
0460 if strcmp(left{rxnCounter},'') || strcmp(right{rxnCounter},'')
0461 rxnCounter=rxnCounter-1;
0462
0463 end
0464
0465 end
0466
0467 end
0468
0469 fclose(fid);
0470
0471
0472 UNBALANCED=metaCycRxns.rxns(UNBALANCED);
0473 UNDETERMINED=metaCycRxns.rxns(UNDETERMINED);
0474 isSpontaneous=metaCycRxns.rxns(isSpontaneous);
0475
0476
0477 metaCycRxns.rxns=metaCycRxns.rxns(1:rxnCounter);
0478 metaCycRxns.rxnNames=metaCycRxns.rxnNames(1:rxnCounter);
0479 metaCycRxns.eccodes=metaCycRxns.eccodes(1:rxnCounter);
0480 metaCycRxns.equations=metaCycRxns.equations(1:rxnCounter);
0481 metaCycRxns.rxnMiriams=metaCycRxns.rxnMiriams(1:rxnCounter);
0482 metaCycRxns.rxnReferences=metaCycRxns.rxnReferences(1:rxnCounter);
0483 metaCycRxns.subSystems=metaCycRxns.subSystems(1:rxnCounter);
0484 metaCycRxns.pwys=metaCycRxns.pwys(1:rxnCounter);
0485 metaCycRxns.rev=metaCycRxns.rev(1:rxnCounter,:);
0486
0487 rxnLinks.kegg=rxnLinks.kegg(1:dbLinkCounter);
0488 rxnLinks.metacyc=rxnLinks.metacyc(1:dbLinkCounter);
0489 rxnLinks.check=rxnLinks.check(1:dbLinkCounter);
0490 [~,index]=unique(rxnLinks.check);
0491 rxnLinks.kegg=rxnLinks.kegg(index);
0492 rxnLinks.metacyc=rxnLinks.metacyc(index);
0493 rxnLinks=rmfield(rxnLinks,'check');
0494
0495
0496 [S, mets, badRxns]=constructS(metaCycRxns.equations);
0497 metaCycRxns.S=S;
0498 metaCycRxns.mets=mets;
0499
0500
0501 metaCycRxns.ub=ones(rxnCounter,1)*1000;
0502 metaCycRxns.lb=metaCycRxns.rev*-1000;
0503 metaCycRxns.c=zeros(rxnCounter,1);
0504 metaCycRxns.b=zeros(numel(metaCycRxns.mets),1);
0505 metaCycRxns.version=version;
0506
0507
0508 save(rxnsFile,'metaCycRxns','rxnLinks','TRANSPORT','UNBALANCED','UNDETERMINED','isSpontaneous');
0509 fprintf(['New metaCycRxns.mat has been successfully updated!\n\n']);
0510 end
0511 end
0512
0513
0514
0515 model=metaCycRxns;
0516 if keepTransportRxns==false
0517 model=removeReactions(model,intersect(TRANSPORT,model.rxns),true,true);
0518 end
0519 if keepUnbalanced==false
0520 model=removeReactions(model,intersect(UNBALANCED,model.rxns),true,true);
0521 end
0522 if keepUndetermined==false
0523 model=removeReactions(model,intersect(UNDETERMINED,model.rxns),true,true);
0524 end
0525 end
0526
0527
0528
0529 function newString=removeHTMLcodes(string)
0530 string=regexprep(string,'<(\w+)>','');
0531 string=regexprep(string,'</(\w+)>','');
0532 string=regexprep(string,'[&;]','');
0533 newString=string;
0534 end