0001 function metaCycEnzymes=getEnzymesFromMetaCyc(metacycPath)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061 if nargin<1
0062 ravenPath=findRAVENroot();
0063 metacycPath=fullfile(ravenPath,'external','metacyc');
0064 else
0065 metacycPath=char(metacycPath);
0066 end
0067
0068
0069
0070 enzymesFile=fullfile(metacycPath,'metaCycEnzymes.mat');
0071 metaCycProteinFile='proteins.dat';
0072 metaCycEnzrxnsFile='enzrxns.dat';
0073
0074 try
0075 (['Importing MetaCyc enzymes and reaction-enzyme association from ' strrep(enzymesFile,'\','/') '... ']);
0076 load(enzymesFile);
0077 fprintf('done\n');
0078 catch
0079 fprintf(['Cannot locate ' strrep(enzymesFile,'\','/') '\nNow try to generate it from local MetaCyc data files...\n']);
0080 if ~isfile(fullfile(metacycPath,metaCycProteinFile)) || ~isfile(fullfile(metacycPath,metaCycEnzrxnsFile))
0081 EM=fprintf(['The files of enzymes or proteins cannot be located, and should be downloaded from MetaCyc.\n']);
0082 dispEM(EM);
0083 else
0084 metaCycEnzymes.id='MetaCyc';
0085 metaCycEnzymes.name='Automatically generated from MetaCyc database';
0086
0087
0088 metaCycEnzymes.cplxs=cell(10000,1);
0089 metaCycEnzymes.cplxComp=cell(10000,1);
0090 metaCycEnzymes.enzymes=cell(50000,1);
0091
0092
0093 nCplx=0;
0094 enzymeCounter=0;
0095
0096 fid = fopen(fullfile(metacycPath,metaCycProteinFile), 'r');
0097
0098 while 1
0099 tline = fgetl(fid);
0100
0101
0102 if ~ischar(tline)
0103 break;
0104 end
0105
0106
0107 if numel(tline)>12 && strcmp(tline(1:12),'UNIQUE-ID - ')
0108 enzymeCounter=enzymeCounter+1;
0109 addMe=false;
0110 enzymeID=tline(13:end);
0111 metaCycEnzymes.enzymes{enzymeCounter}=enzymeID;
0112 end
0113
0114
0115 if strcmp(tline(1:end),'TYPES - Protein-Complexes')
0116 nCplx=nCplx+1;
0117 nComp=0;
0118
0119
0120 Comp.subunit=cell(100,1);
0121 metaCycEnzymes.cplxs{nCplx}=enzymeID;
0122
0123 addMe=true;
0124 end
0125
0126 if numel(tline)>13 && strcmp(tline(1:13),'COMPONENTS - ')
0127 if addMe
0128 nComp=nComp+1;
0129 Comp.subunit{nComp}=tline(14:end);
0130 end
0131 end
0132
0133 if strcmp(tline(1:end),'//')
0134 if addMe
0135 Comp.subunit=Comp.subunit(1:nComp);
0136 metaCycEnzymes.cplxComp{nCplx}=Comp;
0137 addMe=false;
0138 end
0139 end
0140 end
0141
0142 fclose(fid);
0143
0144
0145 metaCycEnzymes.cplxs=metaCycEnzymes.cplxs(1:nCplx);
0146 metaCycEnzymes.cplxComp=metaCycEnzymes.cplxComp(1:nCplx);
0147 metaCycEnzymes.enzymes=metaCycEnzymes.enzymes(1:enzymeCounter);
0148
0149
0150 for i=1:numel(metaCycEnzymes.cplxComp)
0151
0152
0153 checkCplx=true;
0154 while checkCplx
0155 x=0;
0156 mat=[];
0157 for j=1:numel(metaCycEnzymes.cplxComp{i}.subunit)
0158 [a, b]=ismember(metaCycEnzymes.cplxComp{i}.subunit{j},metaCycEnzymes.cplxs);
0159 if a
0160 x=x+1;
0161 mat(x,:)=[j b];
0162 end
0163 end
0164
0165 if isempty(mat)
0166 checkCplx=false;
0167 else
0168
0169 for k=1:x
0170
0171 metaCycEnzymes.cplxComp{i}.subunit(mat(k,1))=[];
0172 metaCycEnzymes.cplxComp{i}.subunit=[metaCycEnzymes.cplxComp{i}.subunit; metaCycEnzymes.cplxComp{mat(k,2)}.subunit];
0173 end
0174 end
0175
0176 end
0177
0178
0179
0180 [a, b] = ismember(metaCycEnzymes.cplxComp{i}.subunit,metaCycEnzymes.enzymes);
0181 if ~all(a)
0182 metaCycEnzymes.cplxComp{i}.subunit = metaCycEnzymes.enzymes(b(find(a)));
0183 end
0184 end
0185
0186
0187 metaCycEnzymes.enzrxns=cell(50000,1);
0188 metaCycEnzymes.rxns=cell(50000,1);
0189 metaCycEnzymes.rxnNames=cell(50000,1);
0190 metaCycEnzymes.commoname=cell(50000,1);
0191 metaCycEnzymes.rxnEnzymeMat=sparse(50000,enzymeCounter);
0192
0193
0194 fid = fopen(fullfile(metacycPath,metaCycEnzrxnsFile), 'r');
0195
0196
0197 enzrxnCounter=0;
0198 nRxn=0;
0199
0200
0201
0202 while 1
0203 tline = fgetl(fid);
0204
0205
0206 if ~ischar(tline)
0207 break;
0208 end
0209
0210
0211 if numel(tline)>11 && strcmp(tline(1:11),'# Version: ')
0212 metaCycEnzymes.version=tline(12:end);
0213 end
0214
0215
0216 if numel(tline)>12 && strcmp(tline(1:12),'UNIQUE-ID - ')
0217 enzrxnCounter=enzrxnCounter+1;
0218 metaCycEnzymes.enzrxns{enzrxnCounter}=tline(13:end);
0219 metaCycEnzymes.commoname{enzrxnCounter}='';
0220 end
0221
0222
0223 if numel(tline)>14 && strcmp(tline(1:14),'COMMON-NAME - ')
0224 metaCycEnzymes.commoname{enzrxnCounter}=tline(15:end);
0225
0226
0227 metaCycEnzymes.commoname{enzrxnCounter}=regexprep(metaCycEnzymes.commoname{enzrxnCounter},'<(\w+)>','');
0228 metaCycEnzymes.commoname{enzrxnCounter}=regexprep(metaCycEnzymes.commoname{enzrxnCounter},'</(\w+)>','');
0229 metaCycEnzymes.commoname{enzrxnCounter}=regexprep(metaCycEnzymes.commoname{enzrxnCounter},'[&;]','');
0230 end
0231
0232
0233
0234
0235
0236 if numel(tline)>9 && strcmp(tline(1:9),'ENZYME - ')
0237
0238
0239 [x, nEnzyme]=ismember(tline(10:end),metaCycEnzymes.enzymes);
0240 if ~x
0241
0242 end
0243 end
0244
0245
0246
0247 if numel(tline)>11 && strcmp(tline(1:11),'REACTION - ')
0248 nRxn=nRxn+1;
0249 rxns{nRxn}='';
0250 rxnID=tline(12:end);
0251 [c, d]=ismember(rxnID,rxns);
0252 if c
0253 nRxn=nRxn-1;
0254
0255
0256
0257 k=strfind(metaCycEnzymes.rxnNames{d},metaCycEnzymes.commoname{enzrxnCounter});
0258 if isempty(k)
0259 metaCycEnzymes.rxnNames{d}=strcat(metaCycEnzymes.rxnNames{d},';',metaCycEnzymes.commoname{enzrxnCounter});
0260 end
0261 metaCycEnzymes.rxnEnzymeMat(d,nEnzyme)=1;
0262 else
0263 metaCycEnzymes.rxns{nRxn}=rxnID;
0264 metaCycEnzymes.rxnNames{nRxn}=metaCycEnzymes.commoname{enzrxnCounter};
0265 rxns{nRxn}=rxnID;
0266 metaCycEnzymes.rxnEnzymeMat(nRxn,nEnzyme)=1;
0267 end
0268 end
0269
0270 end
0271
0272
0273 fclose(fid);
0274
0275
0276 metaCycEnzymes.enzrxns=metaCycEnzymes.enzrxns(1:enzrxnCounter);
0277 metaCycEnzymes.commoname=metaCycEnzymes.commoname(1:enzrxnCounter);
0278 metaCycEnzymes.rxns=metaCycEnzymes.rxns(1:nRxn);
0279 metaCycEnzymes.rxnNames=metaCycEnzymes.rxnNames(1:nRxn);
0280 metaCycEnzymes.rxnEnzymeMat=metaCycEnzymes.rxnEnzymeMat(1:nRxn,:);
0281
0282
0283 save(enzymesFile,'metaCycEnzymes');
0284 fprintf(['New metaCycEnzymes.mat has been successfully updated!\n\n']);
0285 end
0286
0287 end
0288 end