0001 function writtenTable = writeDLKcatInput(model, ecRxns, modelAdapter, onlyWithSmiles, filename, overwrite)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029 [geckoPath, ~] = findGECKOroot();
0030
0031 if nargin<2 || isempty(ecRxns)
0032 ecRxns = true(numel(model.ec.rxns),1);
0033 elseif ~logical(ecRxns)
0034 error('ecRxns should be provided as logical vector')
0035 elseif numel(ecRxns)~=numel(model.ec.rxns)
0036 error('Length of ecRxns is not the same as model.ec.rxns')
0037 end
0038 ecRxns = find(ecRxns);
0039
0040 if nargin < 3 || isempty(modelAdapter)
0041 modelAdapter = ModelAdapterManager.getDefault();
0042 if isempty(modelAdapter)
0043 error('Either send in a modelAdapter or set the default model adapter in the ModelAdapterManager.')
0044 end
0045 end
0046 params = modelAdapter.params;
0047
0048 if nargin<4 || isempty(onlyWithSmiles)
0049 onlyWithSmiles=true;
0050 end
0051
0052 if nargin<5 || isempty(filename)
0053 filename = fullfile(params.path,'data','DLKcat.tsv');
0054 elseif ~endsWith(filename,'.tsv')
0055 error('If filename is provided, it should include the .tsv extension.')
0056 end
0057
0058 if nargin<6 || isempty(overwrite) || ~overwrite
0059 if exist(filename,'file')
0060 error([filename ' already exists, either delete it first, or set the overwrite input argument as true'])
0061 end
0062 end
0063
0064 if ~model.ec.geckoLight
0065 origRxns = model.ec.rxns;
0066 else
0067 origRxns = extractAfter(model.ec.rxns,4);
0068 end
0069 origRxnsToInclude = origRxns(ecRxns);
0070
0071
0072 [sanityCheck,origRxnIdxs] = ismember(origRxnsToInclude,model.rxns);
0073 if ~all(sanityCheck)
0074 error('Not all reactions in model.ec.rxns are found in model.rxns')
0075 end
0076
0077
0078
0079
0080 metsNoSpecialChars = lower(regexprep(model.metNames,'[^0-9a-zA-Z]+',''));
0081 if exist(fullfile(params.path,'data','DLKcatIgnoreMets.tsv'),'file')
0082 fID = fopen(fullfile(params.path,'data','DLKcatIgnoreMets.tsv'));
0083 else
0084 fID = fopen(fullfile(geckoPath,'databases','DLKcatIgnoreMets.tsv'));
0085 end
0086 fileData = textscan(fID,'%s %s','delimiter','\t');
0087 fclose(fID);
0088 [ignoreMets, ignoreSmiles] = deal(fileData{[1,2]});
0089 ignoreMets = lower(regexprep(ignoreMets,'[^0-9a-zA-Z]+',''));
0090 ignoreSmiles(cellfun(@isempty,ignoreSmiles)) = [];
0091
0092 ignoreMetsIdx = logical(ismember(metsNoSpecialChars,ignoreMets));
0093 if isfield(model,'metSmiles')
0094 ignoreMetsIdx = ignoreMetsIdx | logical(ismember(model.metSmiles,ignoreSmiles));
0095 end
0096
0097 ignoreMetsIdx = ignoreMetsIdx | startsWith(model.mets,'prot_');
0098 reducedS = model.S;
0099 reducedS(ignoreMetsIdx,:) = 0;
0100
0101
0102
0103
0104 if exist(fullfile(params.path,'data','DLKcatCurrencyMets.tsv'),'file')
0105 fID = fopen(fullfile(params.path,'data','DLKcatCurrencyMets.tsv'));
0106 else
0107 fID = fopen(fullfile(geckoPath,'databases','DLKcatCurrencyMets.tsv'));
0108 end
0109 fileData = textscan(fID,'%s %s','delimiter','\t');
0110 fclose(fID);
0111 [currencyMets(:,1), currencyMets(:,2)] = deal(fileData{[1,2]});
0112 currencyMets = lower(regexprep(currencyMets,'[^0-9a-zA-Z]+',''));
0113
0114 for i=1:size(currencyMets,1)
0115 subs = strcmp(currencyMets(i,1),metsNoSpecialChars);
0116 prod = strcmp(currencyMets(i,2),metsNoSpecialChars);
0117 [~,subsRxns]=find(reducedS(subs,:));
0118 [~,prodRxns]=find(reducedS(prod,:));
0119 pairRxns = intersect(subsRxns,prodRxns);
0120 tempRedS=reducedS;
0121 tempRedS([find(subs);find(prod)],pairRxns) = 0;
0122
0123 rxnsWithRemainingSubstrates = any(tempRedS(:,pairRxns) < 0,1);
0124 reducedS([find(subs);find(prod)],intersect(pairRxns,pairRxns(rxnsWithRemainingSubstrates))) = 0;
0125 end
0126
0127
0128 clearedRedS = reducedS(:,origRxnIdxs);
0129 rxnsToClear = true(length(origRxnIdxs),1);
0130 rxnsToClear(ecRxns) = false;
0131 clearedRedS(:,rxnsToClear) = 0;
0132
0133
0134 [substrates, reactions] = find(clearedRedS<0);
0135
0136
0137 [proteins, ecRxns] = find(transpose(model.ec.rxnEnzMat(reactions,:)));
0138
0139
0140 out(1,:) = model.ec.rxns(reactions(ecRxns));
0141 out(2,:) = model.ec.genes(proteins);
0142 out(3,:) = model.metNames(substrates(ecRxns));
0143 if isfield(model,'metSmiles')
0144 out(4,:) = model.metSmiles(substrates(ecRxns));
0145 else
0146 out(4,:) = cell(numel(substrates(ecRxns)),1);
0147 end
0148
0149 out(5,:) = model.ec.sequence(proteins);
0150 if onlyWithSmiles
0151 out(:,cellfun(@isempty,out(4,:))) = [];
0152 else
0153 out(4,cellfun(@isempty,out(4,:))) = {'None'};
0154 end
0155 out(6,:) = cell(numel(out(1,:)),1);
0156 out(6,:) = {'NA'};
0157
0158
0159 fID = fopen(filename,'w');
0160 fprintf(fID,'%s\t%s\t%s\t%s\t%s\t%s\n',out{:});
0161 fclose(fID);
0162 fprintf('Model-specific DLKcat input stored at %s\n',filename);
0163
0164 writtenTable = out;
0165 end