0001 function model = getECfromDatabase(model, ecRxns, action, modelAdapter)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032 if nargin < 2 || isempty(ecRxns)
0033 ecRnxs = true(numel(model.ec.rxns),1);
0034 end
0035
0036 if nargin < 3 || isempty(action)
0037 action = 'display';
0038 end
0039
0040 if nargin < 4 || isempty(modelAdapter)
0041 modelAdapter = ModelAdapterManager.getDefault();
0042 if isempty(modelAdapter)
0043 error('Either send in a modelAdapter or set the default model adapter in the ModelAdapterManager.')
0044 end
0045 end
0046 params = modelAdapter.getParameters();
0047
0048 rxnEnzMat = model.ec.rxnEnzMat;
0049 genes = modelAdapter.getUniprotCompatibleGenes(model.ec.genes);
0050
0051 data = loadDatabases('both', modelAdapter);
0052 uniprot = data.uniprot;
0053 kegg = data.kegg;
0054
0055 modelGenes = modelAdapter.getUniprotIDsFromTable(model.genes);
0056 DBgenesUniprot = data.uniprot.genes;
0057 if ~isequal(modelGenes,model.genes)
0058 [Lia,Locb] = ismember(modelGenes,uniprot.ID);
0059 DBgenesUniprot(Locb(Lia)) = model.genes(Lia);
0060 keepEntry = unique(Locb(Lia));
0061 DBgenesUniprot = DBgenesUniprot(keepEntry);
0062 else
0063 keepEntry = true(numel(DBgenesUniprot),1);
0064 end
0065 DBecNumUniprot = data.uniprot.eccodes(keepEntry);
0066 DBMWUniprot = data.uniprot.MW(keepEntry);
0067
0068 [geneIndexUniprot,geneHashMapUniprot] = hashGeneToProt(DBgenesUniprot);
0069
0070 if ~isempty(kegg)
0071 DBgenesKEGG = data.kegg.genes;
0072 DBecNumKEGG = data.kegg.eccodes;
0073 DBMWKEGG = data.kegg.MW;
0074 [geneIndexKEGG,geneHashMapKEGG] = hashGeneToProt(DBgenesKEGG);
0075 end
0076 n = size(rxnEnzMat,1);
0077
0078 eccodes = cell(n,1);
0079 eccodes(:)= {''};
0080 conflicts = cell(1,4);
0081
0082 rxnEnzMat = logical(rxnEnzMat);
0083 progressbar('Assigning EC numbers from database')
0084 for i = 1:n
0085 gns = genes(rxnEnzMat(i,:).');
0086 if ~isempty(gns)
0087
0088 [new_EC,multGenes] = findECInDB(gns,DBecNumUniprot,DBMWUniprot,geneIndexUniprot,geneHashMapUniprot);
0089 if ~isempty(new_EC)
0090 DBase = 'uniprot';
0091 if ~isempty(multGenes{1})
0092 multGenes{3} = DBase;
0093 end
0094 end
0095 if ~isempty(kegg) && (isempty(new_EC) || endsWith(new_EC,'-'))
0096
0097 [new_EC_kegg,multGenes] = findECInDB(gns,DBecNumKEGG,DBMWKEGG,geneIndexKEGG,geneHashMapKEGG);
0098 if ~isempty(new_EC_kegg)
0099 DBase = 'kegg';
0100 if ~isempty(multGenes{1})
0101 multGenes{3} = DBase;
0102 end
0103 new_EC=new_EC_kegg;
0104 end
0105 end
0106 eccodes{i} = new_EC;
0107
0108 if ~isempty(multGenes{1})
0109
0110 conflicts{1} = [conflicts{1};i];
0111
0112 conflicts{2} = [conflicts{2};multGenes{1}];
0113
0114 conflicts{3} = [conflicts{3};multGenes{2}];
0115
0116 conflicts{4} = [conflicts{4};{multGenes{3}}];
0117
0118
0119
0120
0121
0122
0123
0124
0125
0126
0127 end
0128 end
0129 progressbar(i/n)
0130 end
0131
0132
0133 if strcmpi(action,'display') && ~isempty(conflicts{1})
0134 displayErrorMessage(conflicts,uniprot,kegg)
0135 end
0136
0137 if nargin < 2 || isempty(ecRxns) || all(ecRxns)
0138 model.ec.eccodes = eccodes;
0139 else
0140 if ~isfield(model.ec,'eccodes')
0141 model.ec.eccodes(1:numel(model.ec.rxns),1) = {''};
0142 end
0143
0144
0145 model.ec.eccodes(ecRxns) = eccodes(ecRxns);
0146 end
0147
0148 function displayErrorMessage(conflicts,uniprot,kegg)
0149 STR = ['\n ' num2str(length(conflicts{1})) ' genes with multiple associated proteins were found, please'];
0150 STR = [STR, ' revise case by case in the uniprot and kegg files:\n\n'];
0151 for i=1:length(conflicts{1})
0152 if strcmpi(conflicts{4}{i},'uniprot')
0153 DB = uniprot.ID;
0154 else
0155 DB = kegg.uniprot;
0156 end
0157 proteins = DB(conflicts{3}{i});
0158 STR = [STR, '- gene: ' conflicts{2}{i} ' Proteins: ' strjoin(proteins) '\n'];
0159 end
0160 STR = [STR, '\nIf a wrongly annotated case was found then call the '];
0161 STR = [STR, 'getECfromDatabase.m function again with the option action'];
0162 STR = [STR, '= ignore\n\n'];
0163 STR = [STR, 'If the conflicting proteins are desired to be kept as isozymes'];
0164 STR = [STR, ' then call the getECfromDatabase.m function'];
0165 STR = [STR, ' again with the option action = add\n'];
0166 error(sprintf(STR))
0167 end
0168
0169 function [geneIndex,geneHashMap]=hashGeneToProt(proteinDB)
0170
0171 [x,y] = size(proteinDB);
0172 genesForIndex = reshape(proteinDB, x*y, 1);
0173 genesForIndex = genesForIndex(~cellfun(@isempty, genesForIndex));
0174 genesForIndex = unique(genesForIndex);
0175 geneIndex = cell(length(genesForIndex),1);
0176 geneHashMap = containers.Map(genesForIndex,1:length(genesForIndex));
0177 protIndices = 1:length(proteinDB(:,1));
0178 for i = 1:y
0179 tmp1 = proteinDB(:,i);
0180 sel = ~cellfun(@isempty, tmp1);
0181 indices = cell2mat(values(geneHashMap,tmp1(sel)));
0182 protIndicesSel = protIndices(sel);
0183 for j = 1:length(indices)
0184 geneIndex{indices(j)} = [geneIndex{indices(j)};protIndicesSel(j)];
0185 end
0186 end
0187 end
0188 end