Home > src > geckomat > change_model > findMetSmiles.m

findMetSmiles

PURPOSE ^

findMetSMILES

SYNOPSIS ^

function [model,noSMILES] = findMetSmiles(model, modelAdapter, verbose)

DESCRIPTION ^

 findMetSMILES
   Queries PubChem by metabolite names to obtain SMILES. Matches will also
   be stored in tutorials/***/data/smilesDB.tsv, that will also be queried
   first next time the function is run. If the model already has a
   metSmiles field, then non-empty entries will not be overwritten.

 Input:
   model        a model whose metNames field is used to find the relevant SMILES
   modelAdapter a loaded model adapter (Optional, will otherwise use the
                default model adapter).
   verbose      logical whether progress should be reported (Optional,
                default true)
 Ouput:
   model        model with model.metSmiles specified.
   noSMILES     metabolite names for which no SMILES could be found.

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 function [model,noSMILES] = findMetSmiles(model, modelAdapter, verbose)
0002 % findMetSMILES
0003 %   Queries PubChem by metabolite names to obtain SMILES. Matches will also
0004 %   be stored in tutorials/***/data/smilesDB.tsv, that will also be queried
0005 %   first next time the function is run. If the model already has a
0006 %   metSmiles field, then non-empty entries will not be overwritten.
0007 %
0008 % Input:
0009 %   model        a model whose metNames field is used to find the relevant SMILES
0010 %   modelAdapter a loaded model adapter (Optional, will otherwise use the
0011 %                default model adapter).
0012 %   verbose      logical whether progress should be reported (Optional,
0013 %                default true)
0014 % Ouput:
0015 %   model        model with model.metSmiles specified.
0016 %   noSMILES     metabolite names for which no SMILES could be found.
0017 %
0018 if nargin < 3 || isempty(verbose)
0019     verbose = true;
0020 end
0021 if nargin < 2 || isempty(modelAdapter)
0022     modelAdapter = ModelAdapterManager.getDefault();
0023     if isempty(modelAdapter)
0024         error('Either send in a modelAdapter or set the default model adapter in the ModelAdapterManager.')
0025     end
0026 end
0027 params = modelAdapter.params;
0028 
0029 [uniqueNames, ~, uniqueIdx] = unique(regexprep(model.metNames,'^prot_.*',''));
0030 uniqueSmiles(1:numel(uniqueNames),1) = {''};
0031 metMatch = false(length(uniqueNames),1);
0032 metMatch(strcmp(uniqueNames,'')) = 1; % No need trying to match empty fields
0033 if verbose; fprintf('Check for local SMILES database... '); end
0034 smilesDBfile = (fullfile(params.path,'data','smilesDB.tsv'));
0035 if exist(smilesDBfile,'file')==2
0036     fID = fopen(smilesDBfile,'r');
0037     raw = textscan(fID,'%s %s','Delimiter','\t','HeaderLines',0);
0038     fclose(fID);
0039     smilesDB.names = raw{1};
0040     smilesDB.smile = raw{2};
0041     [metMatch, metIdx] = ismember(uniqueNames,smilesDB.names);
0042     uniqueSmiles(metMatch) = smilesDB.smile(metIdx(metMatch));
0043     if verbose; fprintf('done.\n'); end
0044 else
0045     if verbose; fprintf('not found.\n'); end
0046 end
0047 
0048 if any(~metMatch)
0049     progressbar('Querying PubChem for SMILES by metabolite names')
0050     webOptions = weboptions('Timeout', 30);
0051     for i = 1:numel(uniqueNames)
0052         if metMatch(i)
0053             continue;
0054         end
0055         retry = 0;
0056         while retry < 10
0057             try
0058                 smileResult       = webread(['https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/' uniqueNames{i} '/property/CanonicalSMILES/TXT'], webOptions);
0059                 %Sometimes multiple lines are given, with alternative SMILES. Only
0060                 %keep the first suggestion.
0061                 smileResult       = regexp(smileResult,'(^\S*)\n','once','tokens');
0062                 uniqueSmiles{i,1} = smileResult{1,1};
0063                 retry = 15; % success: no retry
0064             catch exception
0065                 %Sometimes the call fails, for example since the server is busy. In those cases
0066                 %we will try 10 times. Some errors however are because the metabolite
0067                 %name does no exist in the database (404) or some other error (the metabolite contains
0068                 %a slash or similar, 400 or 500). In those cases we can
0069                 %immediately give up.
0070                 if (strcmp(exception.identifier, 'MATLAB:webservices:HTTP404StatusCodeError') || ...
0071                         strcmp(exception.identifier, 'MATLAB:webservices:HTTP400StatusCodeError') || ...
0072                         strcmp(exception.identifier, 'MATLAB:webservices:HTTP500StatusCodeError'))
0073                     uniqueSmiles(i) = {''};
0074                     retry = 15;
0075                 else
0076                     retry = retry + 1;
0077                 end
0078             end
0079         if retry == 10
0080             error('Cannot reach PubChem. Check your internet connection and try again.')
0081         end
0082         end
0083         % Append one line each time, in case internet connection is lost
0084         % halfway. Open & close file each time to avoid leaving the file
0085         % open when breaking the function.
0086         out = [uniqueNames(i), uniqueSmiles(i)];
0087         fID = fopen(smilesDBfile,'a');
0088         fprintf(fID,'%s\t%s\n',out{:});
0089         fclose(fID);
0090         progressbar(i/numel(uniqueNames))
0091     end
0092     if verbose
0093         fprintf('Model-specific SMILES database stored at %s\n',smilesDBfile);
0094     end
0095 end
0096 newSmiles = uniqueSmiles(uniqueIdx);
0097 noSMILES = cellfun(@isempty,uniqueSmiles);
0098 successRatio = 1-(numel(find(noSMILES))/numel(uniqueSmiles));
0099 fprintf('SMILES could be found for %s%% of the unique metabolite names.\n',num2str(successRatio*100,'%.0f'))
0100 noSMILES = uniqueNames(noSMILES);
0101 
0102 if ~isfield(model,'metSmiles') || all(cellfun(@isempty,model.metSmiles))
0103     model.metSmiles = newSmiles;
0104 else
0105     emptySmiles = cellfun(@isempty,model.metSmiles);
0106     model.metSmiles(emptySmiles) = newSmiles(emptySmiles);
0107 end
0108 progressbar(1) % Make sure it closes
0109 end

Generated by m2html © 2005