0001 function [model,isSpontaneous,isUndefinedStoich,isIncomplete,...
0002 isGeneral]=getRxnsFromKEGG(keggPath)
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071 if nargin<1
0072 keggPath='RAVEN/external/kegg';
0073 else
0074 keggPath=char(keggPath);
0075 end
0076
0077
0078
0079 ravenPath=findRAVENroot();
0080 rxnsFile=fullfile(ravenPath,'external','kegg','keggRxns.mat');
0081 if exist(rxnsFile, 'file')
0082 fprintf(['Importing KEGG reactions from ' strrep(rxnsFile,'\','/') '... ']);
0083 load(rxnsFile);
0084 else
0085 fprintf(['NOTE: Cannot locate ' strrep(rxnsFile,'\','/') ', it will therefore be generated from the local KEGG database\n']);
0086 if ~isfile(fullfile(keggPath,'reaction')) || ~isfile(fullfile(keggPath,'reaction.lst')) || ~isfile(fullfile(keggPath,'reaction_mapformula.lst'))
0087 EM=fprintf(['The files ''reaction'', ''reaction.lst'' and ''reaction_mapformula.lst'' cannot be located at ' strrep(keggPath,'\','/') '/ and should be downloaded from the KEGG FTP\n']);
0088 dispEM(EM);
0089 else
0090 fprintf('Generating keggRxns.mat file... ');
0091
0092 model.id='KEGG';
0093 model.name='Automatically generated from KEGG database';
0094
0095
0096 model.rxns=cell(15000,1);
0097 model.rxnNames=cell(15000,1);
0098 model.eccodes=cell(15000,1);
0099 model.subSystems=cell(15000,1);
0100 model.rxnMiriams=cell(15000,1);
0101 model.rxnNotes=cell(15000,1);
0102 equations=cell(15000,1);
0103
0104
0105 isSpontaneous=false(15000,1);
0106 isIncomplete=false(15000,1);
0107 isGeneral=false(15000,1);
0108
0109
0110
0111 fid = fopen(fullfile(keggPath,'reaction'), 'r');
0112
0113
0114 rxnCounter=0;
0115
0116
0117 orthology=false;
0118 pathway=false;
0119 module=false;
0120 while 1
0121
0122 tline = fgetl(fid);
0123
0124
0125 if ~ischar(tline)
0126 break;
0127 end
0128
0129
0130 if numel(tline)<12
0131 continue;
0132 end
0133
0134
0135 if strcmp(tline(1:12),'ENTRY ')
0136 rxnCounter=rxnCounter+1;
0137
0138
0139 model.rxnNames{rxnCounter}='';
0140 model.eccodes{rxnCounter}='';
0141
0142 model.rxnNotes{rxnCounter}='';
0143 equations{rxnCounter}='';
0144
0145
0146 model.rxns{rxnCounter}=tline(13:18);
0147 orthology=false;
0148 pathway=false;
0149 module=false;
0150
0151
0152 tempStruct=model.rxnMiriams{rxnCounter};
0153 tempStruct.name{1,1}='kegg.reaction';
0154 tempStruct.value{1,1}=tline(13:18);
0155 model.rxnMiriams{rxnCounter}=tempStruct;
0156 end
0157
0158
0159 if strcmp(tline(1:12),'NAME ')
0160 model.rxnNames{rxnCounter}=tline(13:end);
0161 end
0162
0163
0164
0165 if strcmp(tline(1:12),'COMMENT ')
0166
0167 commentText=tline(13:end);
0168 while 1
0169 tline = fgetl(fid);
0170 if ~strcmp(tline(1:3),'///') && ~strcmp(tline(1:3),'RPA') && ~strcmp(tline(1:3),'ENZ') && ~strcmp(tline(1:3),'PAT') && ~strcmp(tline(1:3),'RCL')
0171 commentText=[commentText ' ' strtrim(tline)];
0172 else
0173 break;
0174 end
0175 end
0176 if any(regexpi(commentText,'SPONTANEOUS'))
0177
0178 isSpontaneous(rxnCounter)=true;
0179 end
0180 if any(regexpi(commentText,'INCOMPLETE')) || any(regexpi(commentText,'ERRONEOUS')) || any(regexpi(commentText,'UNCLEAR'))
0181 isIncomplete(rxnCounter)=true;
0182 end
0183 if any(regexpi(commentText,'GENERAL REACTION'))
0184
0185 isGeneral(rxnCounter)=true;
0186 end
0187
0188
0189 if numel(tline)<12
0190 continue;
0191 end
0192 end
0193
0194
0195 if strcmp(tline(1:12),'ENZYME ')
0196 model.eccodes{rxnCounter}=tline(13:end);
0197 model.eccodes{rxnCounter}=deblank(model.eccodes{rxnCounter});
0198 model.eccodes{rxnCounter}=regexprep(model.eccodes{rxnCounter},'\s+',';');
0199 end
0200 if numel(tline)>8
0201 if strcmp(tline(1:9),'REFERENCE')
0202 pathway=false;
0203 orthology=false;
0204 end
0205 end
0206
0207
0208 if numel(tline)>18
0209 if strcmp(tline(1:12),'MODULE ') || module==true
0210 pathway=false;
0211 orthology=false;
0212 if isstruct(model.rxnMiriams{rxnCounter})
0213 addToIndex=numel(model.rxnMiriams{rxnCounter}.name)+1;
0214 else
0215 addToIndex=1;
0216 end
0217 tempStruct=model.rxnMiriams{rxnCounter};
0218 tempStruct.name{addToIndex,1}='kegg.module';
0219 tempStruct.value{addToIndex,1}=tline(13:18);
0220 model.rxnMiriams{rxnCounter}=tempStruct;
0221 end
0222 end
0223
0224
0225 if numel(tline)>18
0226 if strcmp(tline(1:18),'DBLINKS RHEA: ')
0227 pathway=false;
0228 orthology=false;
0229 module=false;
0230 if isstruct(model.rxnMiriams{rxnCounter})
0231 addToIndex=numel(model.rxnMiriams{rxnCounter}.name)+1;
0232 else
0233 addToIndex=1;
0234 end
0235 tempStruct=model.rxnMiriams{rxnCounter};
0236 tempStruct.name{addToIndex,1}='rhea';
0237 tempStruct.value{addToIndex,1}=tline(19:end);
0238 model.rxnMiriams{rxnCounter}=tempStruct;
0239 end
0240 end
0241
0242
0243 if numel(tline)>16
0244 if strcmp(tline(1:16),'ORTHOLOGY KO: ') || strcmp(tline(1:16),' KO: ') || strcmp(tline(1:12),'ORTHOLOGY ') || orthology==true
0245 pathway=false;
0246 module=false;
0247
0248
0249 if isstruct(model.rxnMiriams{rxnCounter})
0250 addToIndex=numel(model.rxnMiriams{rxnCounter}.name)+1;
0251 else
0252 addToIndex=1;
0253 end
0254
0255 tempStruct=model.rxnMiriams{rxnCounter};
0256 tempStruct.name{addToIndex,1}='kegg.orthology';
0257 if strcmp(tline(13:16),'KO:')
0258
0259 tempStruct.value{addToIndex,1}=tline(17:22);
0260 else
0261
0262
0263 orthology=true;
0264 tempStruct.value{addToIndex,1}=tline(13:18);
0265 end
0266 model.rxnMiriams{rxnCounter}=tempStruct;
0267 end
0268 end
0269
0270
0271 if numel(tline)>18
0272 if strcmp(tline(1:18),'PATHWAY PATH: ') || strcmp(tline(1:18),' PATH: ') || strcmp(tline(1:12),'PATHWAY ') || pathway==true
0273 orthology=false;
0274 module=false;
0275
0276 if isstruct(model.rxnMiriams{rxnCounter})
0277 addToIndex=numel(model.rxnMiriams{rxnCounter}.name)+1;
0278 else
0279 addToIndex=1;
0280 end
0281
0282 tempStruct=model.rxnMiriams{rxnCounter};
0283 tempStruct.name{addToIndex,1}='kegg.pathway';
0284
0285 if strcmp(tline(14:17),'PATH:')
0286 tempStruct.value{addToIndex,1}=tline(19:25);
0287 else
0288
0289 tempStruct.value{addToIndex,1}=tline(13:19);
0290 pathway=true;
0291 end
0292
0293
0294
0295 if ~strcmp('rn011',tempStruct.value{addToIndex,1}(1:5)) && ~strcmp('rn012',tempStruct.value{addToIndex,1}(1:5))
0296 model.rxnMiriams{rxnCounter}=tempStruct;
0297
0298
0299
0300
0301
0302 if strcmp(tline(14:17),'PATH:')
0303
0304 model.subSystems{rxnCounter}=tline(28:end);
0305 else
0306
0307 model.subSystems{rxnCounter,1}{1,numel(model.subSystems{rxnCounter,1})+1}=tline(22:end);
0308 end
0309 end
0310 end
0311 end
0312 end
0313
0314
0315 fclose(fid);
0316
0317
0318
0319 isIncomplete=model.rxns(isIncomplete);
0320 isGeneral=model.rxns(isGeneral);
0321 isSpontaneous=model.rxns(isSpontaneous);
0322
0323
0324 model.rxns=model.rxns(1:rxnCounter);
0325 model.rxnNames=model.rxnNames(1:rxnCounter);
0326 model.eccodes=model.eccodes(1:rxnCounter);
0327 equations=equations(1:rxnCounter);
0328 model.rxnMiriams=model.rxnMiriams(1:rxnCounter);
0329 model.rxnNotes=model.rxnNotes(1:rxnCounter);
0330 model.subSystems=model.subSystems(1:rxnCounter);
0331
0332
0333
0334
0335
0336
0337 fid = fopen(fullfile(keggPath,'reaction.lst'), 'r');
0338
0339
0340 for i=1:rxnCounter
0341
0342 tline = fgetl(fid);
0343
0344 equations{i}=tline(9:end);
0345 end
0346
0347
0348 fclose(fid);
0349
0350
0351
0352
0353 equations = regexprep(equations,' <=>', ' <=>');
0354
0355
0356 [S, mets, badRxns]=constructS(equations);
0357 model.S=S;
0358 model.mets=mets;
0359
0360
0361
0362
0363
0364
0365
0366
0367
0368
0369
0370
0371
0372
0373
0374 reversibility.rxns={};
0375 reversibility.product={};
0376 reversibility.rev=[];
0377
0378 fid = fopen(fullfile(keggPath,'reaction_mapformula.lst'), 'r');
0379 while 1
0380
0381 tline = fgetl(fid);
0382
0383
0384 if ~ischar(tline)
0385 break;
0386 end
0387
0388 rxn=tline(1:6);
0389 prod=tline(end-5:end);
0390 rev=any(strfind(tline,'<=>'));
0391 if isempty(reversibility.rxns)
0392 reversibility.rxns{1}=rxn;
0393 reversibility.product{1}=prod;
0394 reversibility.rev(1)=rev;
0395 elseif strcmp(reversibility.rxns(end),rxn)
0396
0397
0398
0399
0400 if rev==1 || reversibility.rev(end)==1
0401 reversibility.rev(end)=1;
0402 else
0403
0404
0405
0406
0407
0408
0409
0410 if ~strcmp(prod,reversibility.product(end))
0411 reversibility.rev(end)=1;
0412 end
0413 end
0414 else
0415 reversibility.rxns=[reversibility.rxns;rxn];
0416 reversibility.product=[reversibility.product;prod];
0417 reversibility.rev=[reversibility.rev;rev];
0418 end
0419 end
0420 fclose(fid);
0421
0422
0423 model.rev=ones(rxnCounter,1);
0424
0425 irrevIDs=find(reversibility.rev==0);
0426 [~, I]=ismember(reversibility.rxns(irrevIDs),model.rxns);
0427 [~, prodMetIDs]=ismember(reversibility.product(irrevIDs),model.mets);
0428 model.rev(I)=0;
0429
0430
0431 linearInd=sub2ind(size(model.S), prodMetIDs, I);
0432 changeOrder=I(model.S(linearInd)<0);
0433
0434 model.S(:,changeOrder)=model.S(:,changeOrder).*-1;
0435
0436
0437 model.ub=ones(rxnCounter,1)*1000;
0438 model.lb=model.rev*-1000;
0439 model.c=zeros(rxnCounter,1);
0440 model.b=zeros(numel(model.mets),1);
0441 model=removeReactions(model,badRxns,true,true);
0442
0443
0444
0445
0446 I=cellfun(@any,strfind(model.mets,'n')) | cellfun(@any,strfind(model.mets,'m'));
0447 [~, J]=find(model.S(I,:));
0448 isUndefinedStoich=model.rxns(unique(J));
0449
0450
0451 metList=[model.mets(~I);model.mets(I)];
0452 [~,metIndexes]=ismember(metList,model.mets);
0453 model=permuteModel(model,metIndexes,'mets');
0454
0455
0456
0457
0458 endRxnList=unique([model.rxns(ismember(model.rxns,isSpontaneous));model.rxns(ismember(model.rxns,isUndefinedStoich));model.rxns(ismember(model.rxns,isIncomplete));model.rxns(ismember(model.rxns,isGeneral))],'stable');
0459 rxnList=[model.rxns(~ismember(model.rxns,endRxnList));endRxnList];
0460 [~,rxnIndexes]=ismember(rxnList,model.rxns);
0461 model=permuteModel(model,rxnIndexes,'rxns');
0462
0463
0464
0465 for i=(numel(rxnList)-numel(endRxnList)+1):numel(model.rxns)
0466 if ismember(model.rxns(i),isSpontaneous)
0467 model.rxnNotes(i)=strcat(model.rxnNotes(i),'Spontaneous');
0468 end
0469 if ismember(model.rxns(i),isUndefinedStoich)
0470 if isempty(model.rxnNotes{i})
0471 model.rxnNotes(i)=strcat(model.rxnNotes(i),'With undefined stoichiometry');
0472 else
0473 model.rxnNotes(i)=strcat(model.rxnNotes(i),', with undefined stoichiometry');
0474 end
0475 end
0476 if ismember(model.rxns(i),isIncomplete)
0477 if isempty(model.rxnNotes{i})
0478 model.rxnNotes(i)=strcat(model.rxnNotes(i),'Incomplete');
0479 else
0480 model.rxnNotes(i)=strcat(model.rxnNotes(i),', incomplete');
0481 end
0482 end
0483 if ismember(model.rxns(i),isGeneral)
0484 if isempty(model.rxnNotes{i})
0485 model.rxnNotes(i)=strcat(model.rxnNotes(i),'General');
0486 else
0487 model.rxnNotes(i)=strcat(model.rxnNotes(i),', general');
0488 end
0489 end
0490 model.rxnNotes(i)=strcat(model.rxnNotes(i),' reaction');
0491 end
0492
0493
0494 save(rxnsFile,'model','isGeneral','isIncomplete','isUndefinedStoich','isSpontaneous');
0495 end
0496 end
0497 fprintf('COMPLETE\n');
0498
0499 end