Home > src > geckomat > gather_kcats > writeDLKcatInput.m

writeDLKcatInput

PURPOSE ^

writeDLKcatInput

SYNOPSIS ^

function writtenTable = writeDLKcatInput(model, ecRxns, modelAdapter, onlyWithSmiles, filename, overwrite)

DESCRIPTION ^

 writeDLKcatInput
   Prepares the input for DLKcat, and writes it to data/DLKcat.tsv
   in the obj.params.path specified in the ModelAdapter.

 Input:
   model           an ecModel in GECKO 3 format (with ecModel.ec structure)
   ecRxns          for which reactions (from model.ec.rxns) DLKcat should
                   predict kcat values, provided as logical vector with
                   same length as model.ec.rxns. (Opt, default is all
                   reactions)
   modelAdapter    a loaded model adapter (Optional, will otherwise use the
                   default model adapter).
   onlyWithSmiles  logical whether to only include metabolites with SMILES
                   (optional, default true)
   filename        path to the input file, including the filename and .tsv
                   extension (Optional, default is data/DLKcat.tsv from
                   the obj.params.path folder specified in the modelAdapter)
   overwrite       logical whether existing file should be overwritten.
                   (Optional, default false, to prevent overwriting file
                   that already contains DLKcat-predicted kcat values).

 Output:
   writtenTable    The table written, mainly to be used for testing purposes.

 Usage:
   writtenTable = writeDLKcatInput(model, ecRxns, modelAdapter, onlyWithSmiles, filename, overwrite)

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 function writtenTable = writeDLKcatInput(model, ecRxns, modelAdapter, onlyWithSmiles, filename, overwrite)
0002 % writeDLKcatInput
0003 %   Prepares the input for DLKcat, and writes it to data/DLKcat.tsv
0004 %   in the obj.params.path specified in the ModelAdapter.
0005 %
0006 % Input:
0007 %   model           an ecModel in GECKO 3 format (with ecModel.ec structure)
0008 %   ecRxns          for which reactions (from model.ec.rxns) DLKcat should
0009 %                   predict kcat values, provided as logical vector with
0010 %                   same length as model.ec.rxns. (Opt, default is all
0011 %                   reactions)
0012 %   modelAdapter    a loaded model adapter (Optional, will otherwise use the
0013 %                   default model adapter).
0014 %   onlyWithSmiles  logical whether to only include metabolites with SMILES
0015 %                   (optional, default true)
0016 %   filename        path to the input file, including the filename and .tsv
0017 %                   extension (Optional, default is data/DLKcat.tsv from
0018 %                   the obj.params.path folder specified in the modelAdapter)
0019 %   overwrite       logical whether existing file should be overwritten.
0020 %                   (Optional, default false, to prevent overwriting file
0021 %                   that already contains DLKcat-predicted kcat values).
0022 %
0023 % Output:
0024 %   writtenTable    The table written, mainly to be used for testing purposes.
0025 %
0026 % Usage:
0027 %   writtenTable = writeDLKcatInput(model, ecRxns, modelAdapter, onlyWithSmiles, filename, overwrite)
0028 
0029 [geckoPath, ~] = findGECKOroot();
0030 
0031 if nargin<2 || isempty(ecRxns)
0032     ecRxns = true(numel(model.ec.rxns),1);
0033 elseif ~logical(ecRxns)
0034     error('ecRxns should be provided as logical vector')
0035 elseif numel(ecRxns)~=numel(model.ec.rxns)
0036     error('Length of ecRxns is not the same as model.ec.rxns')
0037 end
0038 ecRxns = find(ecRxns); % Change to indices
0039 
0040 if nargin < 3 || isempty(modelAdapter)
0041     modelAdapter = ModelAdapterManager.getDefault();
0042     if isempty(modelAdapter)
0043         error('Either send in a modelAdapter or set the default model adapter in the ModelAdapterManager.')
0044     end
0045 end
0046 params = modelAdapter.params;
0047 
0048 if nargin<4 || isempty(onlyWithSmiles)
0049     onlyWithSmiles=true;
0050 end
0051 
0052 if nargin<5 || isempty(filename)
0053     filename = fullfile(params.path,'data','DLKcat.tsv');
0054 elseif ~endsWith(filename,'.tsv')
0055     error('If filename is provided, it should include the .tsv extension.')
0056 end
0057 
0058 if nargin<6 || isempty(overwrite) || ~overwrite % If is true
0059     if exist(filename,'file')
0060         error([filename ' already exists, either delete it first, or set the overwrite input argument as true'])
0061     end
0062 end
0063 
0064 if ~model.ec.geckoLight
0065    origRxns = model.ec.rxns;
0066 else
0067    origRxns = extractAfter(model.ec.rxns,4);
0068 end
0069 origRxnsToInclude = origRxns(ecRxns);
0070 
0071 % Map back to original reactions, to extract substrates
0072 [sanityCheck,origRxnIdxs] = ismember(origRxnsToInclude,model.rxns);
0073 if ~all(sanityCheck)
0074     error('Not all reactions in model.ec.rxns are found in model.rxns')
0075 end
0076 
0077 % Ignore selected metabolites (metal ions, proteins etc.). First check by
0078 % name (case insensitive, without white spaces and special characters),
0079 % then also try to match with metSmiles (if available).
0080 metsNoSpecialChars = lower(regexprep(model.metNames,'[^0-9a-zA-Z]+',''));
0081 if exist(fullfile(params.path,'data','DLKcatIgnoreMets.tsv'),'file')
0082     fID        = fopen(fullfile(params.path,'data','DLKcatIgnoreMets.tsv'));
0083 else
0084     fID        = fopen(fullfile(geckoPath,'databases','DLKcatIgnoreMets.tsv'));
0085 end
0086 fileData   = textscan(fID,'%s %s','delimiter','\t');
0087 fclose(fID);
0088 [ignoreMets, ignoreSmiles] = deal(fileData{[1,2]});
0089 ignoreMets = lower(regexprep(ignoreMets,'[^0-9a-zA-Z]+',''));
0090 ignoreSmiles(cellfun(@isempty,ignoreSmiles)) = [];
0091 
0092 ignoreMetsIdx  = logical(ismember(metsNoSpecialChars,ignoreMets));
0093 if isfield(model,'metSmiles')
0094     ignoreMetsIdx = ignoreMetsIdx | logical(ismember(model.metSmiles,ignoreSmiles));
0095 end
0096 % Also leave out protein-usage pseudometabolites
0097 ignoreMetsIdx = ignoreMetsIdx | startsWith(model.mets,'prot_');
0098 reducedS = model.S;
0099 reducedS(ignoreMetsIdx,:) = 0;
0100 
0101 % Ignore currency metabolites if they occur in pairs. First check by
0102 % name (case insensitive, without white spaces and special characters),
0103 % then also try to match with metSmiles (if available).
0104 if exist(fullfile(params.path,'data','DLKcatCurrencyMets.tsv'),'file')
0105     fID = fopen(fullfile(params.path,'data','DLKcatCurrencyMets.tsv'));
0106 else
0107     fID = fopen(fullfile(geckoPath,'databases','DLKcatCurrencyMets.tsv'));
0108 end
0109 fileData = textscan(fID,'%s %s','delimiter','\t');
0110 fclose(fID);
0111 [currencyMets(:,1), currencyMets(:,2)] = deal(fileData{[1,2]});
0112 currencyMets = lower(regexprep(currencyMets,'[^0-9a-zA-Z]+',''));
0113 
0114 for i=1:size(currencyMets,1)
0115     subs = strcmp(currencyMets(i,1),metsNoSpecialChars);
0116     prod = strcmp(currencyMets(i,2),metsNoSpecialChars);
0117     [~,subsRxns]=find(reducedS(subs,:));
0118     [~,prodRxns]=find(reducedS(prod,:));
0119     pairRxns = intersect(subsRxns,prodRxns);
0120     tempRedS=reducedS;
0121     tempRedS([find(subs);find(prod)],pairRxns) = 0;
0122     % Do not remove currency mets if no substrate remains
0123     rxnsWithRemainingSubstrates = any(tempRedS(:,pairRxns) < 0,1);
0124     reducedS([find(subs);find(prod)],intersect(pairRxns,pairRxns(rxnsWithRemainingSubstrates))) = 0;
0125 end
0126 
0127 %filter out the reactions we're not interested in - will solve the problem for both full and light
0128 clearedRedS = reducedS(:,origRxnIdxs);
0129 rxnsToClear = true(length(origRxnIdxs),1);
0130 rxnsToClear(ecRxns) = false;
0131 clearedRedS(:,rxnsToClear) = 0;
0132 
0133 % Enumerate all substrates for each reaction
0134 [substrates, reactions] = find(clearedRedS<0); %the reactions here are in model.ec.rxns space
0135 
0136 % Enumerate all proteins for each reaction
0137 [proteins, ecRxns] = find(transpose(model.ec.rxnEnzMat(reactions,:)));
0138 
0139 % Prepare output
0140 out(1,:) = model.ec.rxns(reactions(ecRxns));
0141 out(2,:) = model.ec.genes(proteins);
0142 out(3,:) = model.metNames(substrates(ecRxns));
0143 if isfield(model,'metSmiles')
0144     out(4,:) = model.metSmiles(substrates(ecRxns));
0145 else
0146     out(4,:) = cell(numel(substrates(ecRxns)),1);
0147 end
0148 
0149 out(5,:) = model.ec.sequence(proteins);
0150 if onlyWithSmiles
0151     out(:,cellfun(@isempty,out(4,:))) = [];
0152 else
0153     out(4,cellfun(@isempty,out(4,:))) = {'None'};
0154 end
0155 out(6,:) = cell(numel(out(1,:)),1);
0156 out(6,:) = {'NA'};
0157 
0158 % Write file
0159 fID = fopen(filename,'w');
0160 fprintf(fID,'%s\t%s\t%s\t%s\t%s\t%s\n',out{:});
0161 fclose(fID);
0162 fprintf('Model-specific DLKcat input stored at %s\n',filename);
0163 
0164 writtenTable = out;
0165 end

Generated by m2html © 2005