0001 function [model,isSpontaneous,isUndefinedStoich,isIncomplete,...
0002 isGeneral]=getRxnsFromKEGG(keggPath)
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071 ravenPath=findRAVENroot();
0072
0073 if nargin<1
0074 keggPath=fullfile(ravenPath,'external','kegg');
0075 else
0076 keggPath=char(keggPath);
0077 end
0078
0079
0080
0081 rxnsFile=fullfile(ravenPath,'external','kegg','keggRxns.mat');
0082 if exist(rxnsFile, 'file')
0083 fprintf(['Importing KEGG reactions from ' strrep(rxnsFile,'\','/') '... ']);
0084 load(rxnsFile);
0085 else
0086 fprintf(['NOTE: Cannot locate ' strrep(rxnsFile,'\','/') ', it will therefore be generated from the local KEGG database\n']);
0087 if ~isfile(fullfile(keggPath,'reaction')) || ~isfile(fullfile(keggPath,'reaction.lst')) || ~isfile(fullfile(keggPath,'reaction_mapformula.lst'))
0088 EM=fprintf(['The files ''reaction'', ''reaction.lst'' and ''reaction_mapformula.lst'' cannot be located at ' strrep(keggPath,'\','/') '/ and should be downloaded from the KEGG FTP\n']);
0089 dispEM(EM);
0090 else
0091 fprintf('Generating keggRxns.mat file... ');
0092
0093 model.id='KEGG';
0094 model.name='Automatically generated from KEGG database';
0095
0096
0097 model.rxns=cell(15000,1);
0098 model.rxnNames=cell(15000,1);
0099 model.eccodes=cell(15000,1);
0100 model.subSystems=cell(15000,1);
0101 model.rxnMiriams=cell(15000,1);
0102 model.rxnNotes=cell(15000,1);
0103 equations=cell(15000,1);
0104
0105
0106 isSpontaneous=false(15000,1);
0107 isIncomplete=false(15000,1);
0108 isGeneral=false(15000,1);
0109
0110
0111
0112 fid = fopen(fullfile(keggPath,'reaction'), 'r');
0113
0114
0115 rxnCounter=0;
0116
0117
0118 orthology=false;
0119 pathway=false;
0120 module=false;
0121 while 1
0122
0123 tline = fgetl(fid);
0124
0125
0126 if ~ischar(tline)
0127 break;
0128 end
0129
0130
0131 if numel(tline)<12
0132 continue;
0133 end
0134
0135 if strcmp(tline(1:5),'BRITE')
0136 pathway = false;
0137 continue;
0138 end
0139
0140
0141 if strcmp(tline(1:12),'ENTRY ')
0142 rxnCounter=rxnCounter+1;
0143
0144
0145 model.rxnNames{rxnCounter}='';
0146 model.eccodes{rxnCounter}='';
0147
0148 model.rxnNotes{rxnCounter}='';
0149 equations{rxnCounter}='';
0150
0151
0152 model.rxns{rxnCounter}=tline(13:18);
0153 orthology=false;
0154 pathway=false;
0155 module=false;
0156
0157
0158 tempStruct=model.rxnMiriams{rxnCounter};
0159 tempStruct.name{1,1}='kegg.reaction';
0160 tempStruct.value{1,1}=tline(13:18);
0161 model.rxnMiriams{rxnCounter}=tempStruct;
0162 end
0163
0164
0165 if strcmp(tline(1:12),'NAME ')
0166 model.rxnNames{rxnCounter}=tline(13:end);
0167 end
0168
0169
0170
0171 if strcmp(tline(1:12),'COMMENT ')
0172
0173 commentText=tline(13:end);
0174 while 1
0175 tline = fgetl(fid);
0176 if ~strcmp(tline(1:3),'///') && ~strcmp(tline(1:3),'RPA') && ~strcmp(tline(1:3),'ENZ') && ~strcmp(tline(1:3),'PAT') && ~strcmp(tline(1:3),'RCL')
0177 commentText=[commentText ' ' strtrim(tline)];
0178 else
0179 break;
0180 end
0181 end
0182 if any(regexpi(commentText,'SPONTANEOUS'))
0183
0184 isSpontaneous(rxnCounter)=true;
0185 end
0186 if any(regexpi(commentText,'INCOMPLETE')) || any(regexpi(commentText,'ERRONEOUS')) || any(regexpi(commentText,'UNCLEAR'))
0187 isIncomplete(rxnCounter)=true;
0188 end
0189 if any(regexpi(commentText,'GENERAL REACTION'))
0190
0191 isGeneral(rxnCounter)=true;
0192 end
0193
0194
0195 if numel(tline)<12
0196 continue;
0197 end
0198 end
0199
0200
0201 if strcmp(tline(1:12),'ENZYME ')
0202 model.eccodes{rxnCounter}=tline(13:end);
0203 model.eccodes{rxnCounter}=deblank(model.eccodes{rxnCounter});
0204 model.eccodes{rxnCounter}=regexprep(model.eccodes{rxnCounter},'\s+',';');
0205 end
0206 if numel(tline)>8
0207 if strcmp(tline(1:9),'REFERENCE')
0208 pathway=false;
0209 orthology=false;
0210 end
0211 end
0212
0213
0214 if numel(tline)>18
0215 if strcmp(tline(1:12),'MODULE ') || module==true
0216 pathway=false;
0217 orthology=false;
0218 if isstruct(model.rxnMiriams{rxnCounter})
0219 addToIndex=numel(model.rxnMiriams{rxnCounter}.name)+1;
0220 else
0221 addToIndex=1;
0222 end
0223 tempStruct=model.rxnMiriams{rxnCounter};
0224 tempStruct.name{addToIndex,1}='kegg.module';
0225 tempStruct.value{addToIndex,1}=tline(13:18);
0226 model.rxnMiriams{rxnCounter}=tempStruct;
0227 end
0228 end
0229
0230
0231 if numel(tline)>18
0232 if strcmp(tline(1:18),'DBLINKS RHEA: ')
0233 pathway=false;
0234 orthology=false;
0235 module=false;
0236 if isstruct(model.rxnMiriams{rxnCounter})
0237 addToIndex=numel(model.rxnMiriams{rxnCounter}.name)+1;
0238 else
0239 addToIndex=1;
0240 end
0241 tempStruct=model.rxnMiriams{rxnCounter};
0242 tempStruct.name{addToIndex,1}='rhea';
0243 tempStruct.value{addToIndex,1}=tline(19:end);
0244 model.rxnMiriams{rxnCounter}=tempStruct;
0245 end
0246 end
0247
0248
0249 if numel(tline)>16
0250 if strcmp(tline(1:16),'ORTHOLOGY KO: ') || strcmp(tline(1:16),' KO: ') || strcmp(tline(1:12),'ORTHOLOGY ') || orthology==true
0251 pathway=false;
0252 module=false;
0253
0254
0255 if isstruct(model.rxnMiriams{rxnCounter})
0256 addToIndex=numel(model.rxnMiriams{rxnCounter}.name)+1;
0257 else
0258 addToIndex=1;
0259 end
0260
0261 tempStruct=model.rxnMiriams{rxnCounter};
0262 tempStruct.name{addToIndex,1}='kegg.orthology';
0263 if strcmp(tline(13:16),'KO:')
0264
0265 tempStruct.value{addToIndex,1}=tline(17:22);
0266 else
0267
0268
0269 orthology=true;
0270 tempStruct.value{addToIndex,1}=tline(13:18);
0271 end
0272 model.rxnMiriams{rxnCounter}=tempStruct;
0273 end
0274 end
0275
0276
0277 if numel(tline)>18
0278 if strcmp(tline(1:18),'PATHWAY PATH: ') || strcmp(tline(1:18),' PATH: ') || strcmp(tline(1:12),'PATHWAY ') || pathway==true
0279 orthology=false;
0280 module=false;
0281
0282 if isstruct(model.rxnMiriams{rxnCounter})
0283 addToIndex=numel(model.rxnMiriams{rxnCounter}.name)+1;
0284 else
0285 addToIndex=1;
0286 end
0287
0288 tempStruct=model.rxnMiriams{rxnCounter};
0289 tempStruct.name{addToIndex,1}='kegg.pathway';
0290
0291 if strcmp(tline(14:17),'PATH:')
0292 tempStruct.value{addToIndex,1}=tline(19:25);
0293 else
0294
0295 tempStruct.value{addToIndex,1}=tline(13:19);
0296 pathway=true;
0297 end
0298
0299
0300
0301 if ~strcmp('rn011',tempStruct.value{addToIndex,1}(1:5)) && ~strcmp('rn012',tempStruct.value{addToIndex,1}(1:5))
0302 model.rxnMiriams{rxnCounter}=tempStruct;
0303
0304
0305
0306
0307
0308 if strcmp(tline(14:17),'PATH:')
0309
0310 model.subSystems{rxnCounter}=tline(28:end);
0311 else
0312
0313 model.subSystems{rxnCounter,1}{numel(model.subSystems{rxnCounter,1})+1,1}=tline(22:end);
0314 end
0315 end
0316 end
0317 end
0318 end
0319
0320
0321 fclose(fid);
0322
0323
0324
0325 isIncomplete=model.rxns(isIncomplete);
0326 isGeneral=model.rxns(isGeneral);
0327 isSpontaneous=model.rxns(isSpontaneous);
0328
0329
0330 model.rxns=model.rxns(1:rxnCounter);
0331 model.rxnNames=model.rxnNames(1:rxnCounter);
0332 model.eccodes=model.eccodes(1:rxnCounter);
0333 equations=equations(1:rxnCounter);
0334 model.rxnMiriams=model.rxnMiriams(1:rxnCounter);
0335 model.rxnNotes=model.rxnNotes(1:rxnCounter);
0336 model.subSystems=model.subSystems(1:rxnCounter);
0337
0338 emptySubSys = cellfun(@isempty,model.subSystems);
0339 model.subSystems(emptySubSys) = {{''}};
0340
0341
0342
0343
0344
0345
0346 fid = fopen(fullfile(keggPath,'reaction.lst'), 'r');
0347
0348
0349 for i=1:rxnCounter
0350
0351 tline = fgetl(fid);
0352
0353 equations{i}=tline(9:end);
0354 end
0355
0356
0357 fclose(fid);
0358
0359
0360
0361
0362 equations = regexprep(equations,' <=>', ' <=>');
0363
0364
0365 [S, mets, badRxns]=constructS(equations);
0366 model.S=S;
0367 model.mets=mets;
0368
0369
0370
0371
0372
0373
0374
0375
0376
0377
0378
0379
0380
0381
0382
0383 reversibility.rxns={};
0384 reversibility.product={};
0385 reversibility.rev=[];
0386
0387 fid = fopen(fullfile(keggPath,'reaction_mapformula.lst'), 'r');
0388 while 1
0389
0390 tline = fgetl(fid);
0391
0392
0393 if ~ischar(tline)
0394 break;
0395 end
0396
0397 rxn=tline(1:6);
0398 prod=tline(end-5:end);
0399 rev=any(strfind(tline,'<=>'));
0400 if isempty(reversibility.rxns)
0401 reversibility.rxns{1}=rxn;
0402 reversibility.product{1}=prod;
0403 reversibility.rev(1)=rev;
0404 elseif strcmp(reversibility.rxns(end),rxn)
0405
0406
0407
0408
0409 if rev==1 || reversibility.rev(end)==1
0410 reversibility.rev(end)=1;
0411 else
0412
0413
0414
0415
0416
0417
0418
0419 if ~strcmp(prod,reversibility.product(end))
0420 reversibility.rev(end)=1;
0421 end
0422 end
0423 else
0424 reversibility.rxns=[reversibility.rxns;rxn];
0425 reversibility.product=[reversibility.product;prod];
0426 reversibility.rev=[reversibility.rev;rev];
0427 end
0428 end
0429 fclose(fid);
0430
0431
0432 model.rev=ones(rxnCounter,1);
0433
0434 irrevIDs=find(reversibility.rev==0);
0435 [~, I]=ismember(reversibility.rxns(irrevIDs),model.rxns);
0436 [~, prodMetIDs]=ismember(reversibility.product(irrevIDs),model.mets);
0437 model.rev(I)=0;
0438
0439
0440 linearInd=sub2ind(size(model.S), prodMetIDs, I);
0441 changeOrder=I(model.S(linearInd)<0);
0442
0443 model.S(:,changeOrder)=model.S(:,changeOrder).*-1;
0444
0445
0446 model.ub=ones(rxnCounter,1)*1000;
0447 model.lb=model.rev*-1000;
0448 model.c=zeros(rxnCounter,1);
0449 model.b=zeros(numel(model.mets),1);
0450 model=removeReactions(model,badRxns,true,true);
0451
0452
0453
0454
0455 I=cellfun(@any,strfind(model.mets,'n')) | cellfun(@any,strfind(model.mets,'m'));
0456 [~, J]=find(model.S(I,:));
0457 isUndefinedStoich=model.rxns(unique(J));
0458
0459
0460 metList=[model.mets(~I);model.mets(I)];
0461 [~,metIndexes]=ismember(metList,model.mets);
0462 model=permuteModel(model,metIndexes,'mets');
0463
0464
0465
0466
0467 endRxnList=unique([model.rxns(ismember(model.rxns,isSpontaneous));model.rxns(ismember(model.rxns,isUndefinedStoich));model.rxns(ismember(model.rxns,isIncomplete));model.rxns(ismember(model.rxns,isGeneral))],'stable');
0468 rxnList=[model.rxns(~ismember(model.rxns,endRxnList));endRxnList];
0469 [~,rxnIndexes]=ismember(rxnList,model.rxns);
0470 model=permuteModel(model,rxnIndexes,'rxns');
0471
0472
0473
0474 for i=(numel(rxnList)-numel(endRxnList)+1):numel(model.rxns)
0475 if ismember(model.rxns(i),isSpontaneous)
0476 model.rxnNotes(i)=strcat(model.rxnNotes(i),'Spontaneous');
0477 end
0478 if ismember(model.rxns(i),isUndefinedStoich)
0479 if isempty(model.rxnNotes{i})
0480 model.rxnNotes(i)=strcat(model.rxnNotes(i),'With undefined stoichiometry');
0481 else
0482 model.rxnNotes(i)=strcat(model.rxnNotes(i),', with undefined stoichiometry');
0483 end
0484 end
0485 if ismember(model.rxns(i),isIncomplete)
0486 if isempty(model.rxnNotes{i})
0487 model.rxnNotes(i)=strcat(model.rxnNotes(i),'Incomplete');
0488 else
0489 model.rxnNotes(i)=strcat(model.rxnNotes(i),', incomplete');
0490 end
0491 end
0492 if ismember(model.rxns(i),isGeneral)
0493 if isempty(model.rxnNotes{i})
0494 model.rxnNotes(i)=strcat(model.rxnNotes(i),'General');
0495 else
0496 model.rxnNotes(i)=strcat(model.rxnNotes(i),', general');
0497 end
0498 end
0499 model.rxnNotes(i)=strcat(model.rxnNotes(i),' reaction');
0500 end
0501
0502
0503 save(rxnsFile,'model','isGeneral','isIncomplete','isUndefinedStoich','isSpontaneous');
0504 end
0505 end
0506 fprintf('COMPLETE\n');
0507
0508 end