0001 function metaCycMets=getMetsFromMetaCyc(metacycPath)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060 if nargin<1
0061 ravenPath=findRAVENroot();
0062 metacycPath=fullfile(ravenPath,'external','metacyc');
0063 else
0064 metacycPath=char(metacycPath);
0065 end
0066
0067 metsFile=fullfile(metacycPath,'metaCycMets.mat');
0068 metaCycMetFile='compounds.dat';
0069
0070 if exist(metsFile, 'file')
0071 fprintf(['Importing MetaCyc metabolites from ' strrep(metsFile,'\','/') '... ']);
0072 load(metsFile);
0073 fprintf('done\n');
0074 else
0075 fprintf(['Cannot locate ' strrep(metsFile,'\','/') '\nNow try to generate it from local MetaCyc data files...\n']);
0076 if ~isfile(fullfile(metacycPath,metaCycMetFile))
0077 EM=fprintf(['The file of metabolites cannot be located, and should be downloaded from MetaCyc.\n']);
0078 dispEM(EM);
0079 else
0080
0081 metaCycMets.id='MetaCyc';
0082 metaCycMets.name='Automatically generated from MetaCyc database';
0083
0084
0085 metaCycMets.mets=cell(50000,1);
0086 metaCycMets.metNames=cell(50000,1);
0087 metaCycMets.metFormulas=cell(50000,1);
0088 metaCycMets.inchis=cell(50000,1);
0089 metaCycMets.metCharges=zeros(50000,1);
0090 metaCycMets.metMiriams=cell(50000,1);
0091 metaCycMets.keggid=cell(50000,1);
0092
0093
0094 fid = fopen(fullfile(metacycPath,metaCycMetFile), 'r');
0095
0096
0097 metCounter=0;
0098
0099
0100 while 1
0101
0102 tline = fgetl(fid);
0103
0104
0105
0106 if ~ischar(tline)
0107 break;
0108 end
0109
0110
0111 if numel(tline)>11 && strcmp(tline(1:11),'# Version: ')
0112 metaCycMets.version=tline(12:end);
0113 end
0114
0115
0116 if numel(tline)>12 && strcmp(tline(1:12),'UNIQUE-ID - ')
0117 metCounter=metCounter+1;
0118
0119
0120 metaCycMets.metNames{metCounter}='';
0121 metaCycMets.metFormulas{metCounter}='';
0122 metaCycMets.inchis{metCounter}='';
0123
0124
0125 metaCycMets.keggid{metCounter}='';
0126 nonStandardInchis = '';
0127
0128
0129 metaCycMets.mets{metCounter}=tline(13:end);
0130 end
0131
0132
0133
0134 if numel(tline)>14 && strcmp(tline(1:14),'COMMON-NAME - ')
0135 metaCycMets.metNames{metCounter}=tline(15:end);
0136
0137
0138 metaCycMets.metNames{metCounter}=regexprep(metaCycMets.metNames{metCounter},'<(\w+)>','');
0139 metaCycMets.metNames{metCounter}=regexprep(metaCycMets.metNames{metCounter},'</(\w+)>','');
0140 metaCycMets.metNames{metCounter}=regexprep(metaCycMets.metNames{metCounter},'[&;]','');
0141 end
0142
0143
0144 if numel(tline)>16 && strcmp(tline(1:16),'ATOM-CHARGES - (')
0145 atomCharge=tline(17:end-1);
0146
0147 s=strfind(atomCharge,' ');
0148 if any(s)
0149 atomCharge=atomCharge(s+1:end);
0150 metaCycMets.metCharges(metCounter,1)=metaCycMets.metCharges(metCounter,1)+str2num(atomCharge);
0151 end
0152 end
0153
0154
0155 if numel(tline)>14 && strcmp(tline(1:14),'INCHI - InChI=')
0156 metaCycMets.inchis{metCounter}=tline(15:end);
0157 end
0158
0159
0160 if numel(tline)>27 && strcmp(tline(1:27),'NON-STANDARD-INCHI - InChI=')
0161 nonStandardInchis=tline(28:end);
0162 end
0163
0164
0165 if numel(tline)>9 && strcmp(tline(1:9),'SMILES - ')
0166
0167 if isstruct(metaCycMets.metMiriams{metCounter})
0168 addToIndex=numel(metaCycMets.metMiriams{metCounter}.name)+1;
0169 else
0170 addToIndex=1;
0171 end
0172 tempStruct=metaCycMets.metMiriams{metCounter};
0173 tempStruct.name{addToIndex,1}='SMILES';
0174 tempStruct.value{addToIndex,1}=tline(10:end);
0175 metaCycMets.metMiriams{metCounter}=tempStruct;
0176 end
0177
0178
0179 if numel(tline)>20 && strcmp(tline(1:20),'CHEMICAL-FORMULA - (')
0180 metaCycMets.metFormulas{metCounter}=strcat(metaCycMets.metFormulas{metCounter},tline(21:end-1));
0181 metaCycMets.metFormulas{metCounter}(isspace(metaCycMets.metFormulas{metCounter})) = [];
0182 end
0183
0184
0185 if numel(tline)>23 && strcmp(tline(1:23),'DBLINKS - (LIGAND-CPD "')
0186 keggid=tline(24:end);
0187
0188 s=strfind(keggid,'"');
0189 if any(s)
0190 keggid=keggid(1:s-1);
0191 end
0192
0193 metaCycMets.keggid{metCounter}=keggid;
0194 end
0195
0196
0197 if numel(tline)>18 && strcmp(tline(1:18),'DBLINKS - (CHEBI "')
0198 chebiID=tline(20:end);
0199
0200 s=strfind(chebiID,'"');
0201 if any(s)
0202 chebiID=chebiID(1:s-1);
0203 end
0204
0205 if isstruct(metaCycMets.metMiriams{metCounter})
0206 addToIndex=numel(metaCycMets.metMiriams{metCounter}.name)+1;
0207 else
0208 addToIndex=1;
0209 end
0210 tempStruct=metaCycMets.metMiriams{metCounter};
0211 tempStruct.name{addToIndex,1}='chebi';
0212 tempStruct.value{addToIndex,1}=strcat('CHEBI:',chebiID);
0213 metaCycMets.metMiriams{metCounter}=tempStruct;
0214 end
0215
0216
0217 if numel(tline)>20 && strcmp(tline(1:20),'DBLINKS - (PUBCHEM "')
0218 pubchemID=tline(21:end);
0219
0220 s=strfind(pubchemID,'"');
0221 if any(s)
0222 pubchemID=pubchemID(1:s-1);
0223 end
0224
0225 if isstruct(metaCycMets.metMiriams{metCounter})
0226 addToIndex=numel(metaCycMets.metMiriams{metCounter}.name)+1;
0227 else
0228 addToIndex=1;
0229 end
0230 tempStruct=metaCycMets.metMiriams{metCounter};
0231 tempStruct.name{addToIndex,1}='pubchem.compound';
0232 tempStruct.value{addToIndex,1}=pubchemID;
0233 metaCycMets.metMiriams{metCounter}=tempStruct;
0234 end
0235
0236
0237 if strcmp(tline,'//') && strcmp(metaCycMets.inchis{metCounter},'')
0238 metaCycMets.inchis{metCounter}=nonStandardInchis;
0239 nonStandardInchis = '';
0240
0241
0242 s=strfind(metaCycMets.inchis{metCounter},'/');
0243 if any(s)
0244 inchiFormula=metaCycMets.inchis{metCounter}(s(1)+1:s(2)-1);
0245
0246
0247 inchiFormula(regexp(inchiFormula,'[.]'))=[];
0248 if ~strcmp(metaCycMets.metFormulas{metCounter},inchiFormula)
0249 metaCycMets.metFormulas{metCounter}=inchiFormula;
0250 end
0251 end
0252
0253 end
0254
0255 end
0256
0257
0258 fclose(fid);
0259
0260
0261 metaCycMets.mets=metaCycMets.mets(1:metCounter);
0262 metaCycMets.metNames=metaCycMets.metNames(1:metCounter);
0263 metaCycMets.metFormulas=metaCycMets.metFormulas(1:metCounter);
0264 metaCycMets.metMiriams=metaCycMets.metMiriams(1:metCounter);
0265 metaCycMets.inchis=metaCycMets.inchis(1:metCounter);
0266 metaCycMets.metCharges=metaCycMets.metCharges(1:metCounter,:);
0267
0268
0269 metaCycMets.keggid=metaCycMets.keggid(1:metCounter);
0270
0271
0272 for i=1:numel(metaCycMets.mets)
0273 if ~isstruct(metaCycMets.metMiriams{i})
0274 miriamStruct.name{1}='metacyc.compound';
0275 miriamStruct.value{1}=metaCycMets.mets{i};
0276 metaCycMets.metMiriams{i}=miriamStruct;
0277 end
0278 end
0279
0280
0281 save(metsFile,'metaCycMets');
0282 fprintf(['New metaCycMets.mat has been successfully updated!\n\n']);
0283 end
0284 end
0285 end