0001 function [model,noSMILES] = findMetSmiles(model, modelAdapter, verbose)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 if nargin < 3 || isempty(verbose)
0019 verbose = true;
0020 end
0021 if nargin < 2 || isempty(modelAdapter)
0022 modelAdapter = ModelAdapterManager.getDefault();
0023 if isempty(modelAdapter)
0024 error('Either send in a modelAdapter or set the default model adapter in the ModelAdapterManager.')
0025 end
0026 end
0027 params = modelAdapter.params;
0028
0029 [uniqueNames, ~, uniqueIdx] = unique(regexprep(model.metNames,'^prot_.*',''));
0030 uniqueSmiles(1:numel(uniqueNames),1) = {''};
0031 metMatch = false(length(uniqueNames),1);
0032 metMatch(strcmp(uniqueNames,'')) = 1;
0033 if verbose; fprintf('Check for local SMILES database... '); end
0034 smilesDBfile = (fullfile(params.path,'data','smilesDB.tsv'));
0035 if exist(smilesDBfile,'file')==2
0036 fID = fopen(smilesDBfile,'r');
0037 raw = textscan(fID,'%s %s','Delimiter','\t','HeaderLines',0);
0038 fclose(fID);
0039 smilesDB.names = raw{1};
0040 smilesDB.smile = raw{2};
0041 [metMatch, metIdx] = ismember(uniqueNames,smilesDB.names);
0042 uniqueSmiles(metMatch) = smilesDB.smile(metIdx(metMatch));
0043 if verbose; fprintf('done.\n'); end
0044 else
0045 if verbose; fprintf('not found.\n'); end
0046 end
0047
0048 if any(~metMatch)
0049 progressbar('Querying PubChem for SMILES by metabolite names')
0050 webOptions = weboptions('Timeout', 30);
0051 for i = 1:numel(uniqueNames)
0052 if metMatch(i)
0053 continue;
0054 end
0055 retry = 0;
0056 while retry < 10
0057 try
0058 smileResult = webread(['https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/' uniqueNames{i} '/property/CanonicalSMILES/TXT'], webOptions);
0059
0060
0061 smileResult = regexp(smileResult,'(^\S*)\n','once','tokens');
0062 uniqueSmiles{i,1} = smileResult{1,1};
0063 retry = 15;
0064 catch exception
0065
0066
0067
0068
0069
0070 if (strcmp(exception.identifier, 'MATLAB:webservices:HTTP404StatusCodeError') || ...
0071 strcmp(exception.identifier, 'MATLAB:webservices:HTTP400StatusCodeError') || ...
0072 strcmp(exception.identifier, 'MATLAB:webservices:HTTP500StatusCodeError'))
0073 uniqueSmiles(i) = {''};
0074 retry = 15;
0075 else
0076 retry = retry + 1;
0077 end
0078 end
0079 if retry == 10
0080 error('Cannot reach PubChem. Check your internet connection and try again.')
0081 end
0082 end
0083
0084
0085
0086 out = [uniqueNames(i), uniqueSmiles(i)];
0087 fID = fopen(smilesDBfile,'a');
0088 fprintf(fID,'%s\t%s\n',out{:});
0089 fclose(fID);
0090 progressbar(i/numel(uniqueNames))
0091 end
0092 if verbose
0093 fprintf('Model-specific SMILES database stored at %s\n',smilesDBfile);
0094 end
0095 end
0096 newSmiles = uniqueSmiles(uniqueIdx);
0097 noSMILES = cellfun(@isempty,uniqueSmiles);
0098 successRatio = 1-(numel(find(noSMILES))/numel(uniqueSmiles));
0099 fprintf('SMILES could be found for %s%% of the unique metabolite names.\n',num2str(successRatio*100,'%.0f'))
0100 noSMILES = uniqueNames(noSMILES);
0101
0102 if ~isfield(model,'metSmiles') || all(cellfun(@isempty,model.metSmiles))
0103 model.metSmiles = newSmiles;
0104 else
0105 emptySmiles = cellfun(@isempty,model.metSmiles);
0106 model.metSmiles(emptySmiles) = newSmiles(emptySmiles);
0107 end
0108 progressbar(1)
0109 end