Home > src > geckomat > gather_kcats > mergeDLKcatAndFuzzyKcats.m

mergeDLKcatAndFuzzyKcats

PURPOSE ^

mergeDlkcatAndFuzzyKcats

SYNOPSIS ^

function mergedKcatList = mergeDLKcatAndFuzzyKcats(kcatListDLKcat, kcatListFuzzy, topOriginLimit, bottomOriginLimit, wildcardLimit)

DESCRIPTION ^

 mergeDlkcatAndFuzzyKcats
   Merges the results from DLKcat and fuzzy matching to BRENDA database.
   Order of preference:
   1: BRENDA match with correct E.C. number, with origin (see below) not
      lower than the specified topOriginLimit
   2: DLKcat match
   3: BRENDA match with correct E.C. number, with origin below
      topOriginLimit but not lower than the bottomOriginLimit
   4: BRENDA match with wildcards in the E.C. number, with not more
      wildcards than wildcardLimit, and origin not lower than the
      bottomOriginLimit

 Input:
   kcatListDLKcat      kcatList derived from readDLKcatOutput
   kcatListFuzzy       kcatList derived from fuzzyKcatMatching
   topOriginLimit      origin limit for prioritized BRENDA matches. Origin
                       is explained in more detail below. (Optional,
                       default 6)
   bottomOriginLimit   origin limit for low priority BRENDA matches.
                       Origin is explained in more detail below.
                       (Optional, default 6)
   wildcardLimit       maximum number of wildcards in E.C. number of
                       BRENDA matches (Optional, default 3)

 Output:
   mergedKcatList      merged list of kcats
   
 The origin parameter:
   1: correct organism, correct substrate, kcat
   2: any organism, correct substrate, kcat
   3: correct organism, any substrate, kcat
   4: any organism, any substrate, kcat
   5: correct organism, specific activity
   6: any organism, specific activity

 Example of wildcards in E.C. number:
   0: 1.1.1.3      glycerol-3-phosphate dehydrogenase (NAD+)
   1: 1.1.1.-      oxidoreductase, acting on the CH-OH group of donors,
                   with NAD+ or NADP+ as acceptor
   2: 1.1.-.-      oxidoreductase, acting on the CH-OH group of donors
   3: 1.-.-.-      oxidoreductase

 Usage:
   mergedKcatList = mergeDLKcatAndFuzzyKcats(kcatListDLKcat, kcatListFuzzy, topOriginLimit, bottomOriginLimit, wildcardLimit)

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 function mergedKcatList = mergeDLKcatAndFuzzyKcats(kcatListDLKcat, kcatListFuzzy, topOriginLimit, bottomOriginLimit, wildcardLimit)
0002 % mergeDlkcatAndFuzzyKcats
0003 %   Merges the results from DLKcat and fuzzy matching to BRENDA database.
0004 %   Order of preference:
0005 %   1: BRENDA match with correct E.C. number, with origin (see below) not
0006 %      lower than the specified topOriginLimit
0007 %   2: DLKcat match
0008 %   3: BRENDA match with correct E.C. number, with origin below
0009 %      topOriginLimit but not lower than the bottomOriginLimit
0010 %   4: BRENDA match with wildcards in the E.C. number, with not more
0011 %      wildcards than wildcardLimit, and origin not lower than the
0012 %      bottomOriginLimit
0013 %
0014 % Input:
0015 %   kcatListDLKcat      kcatList derived from readDLKcatOutput
0016 %   kcatListFuzzy       kcatList derived from fuzzyKcatMatching
0017 %   topOriginLimit      origin limit for prioritized BRENDA matches. Origin
0018 %                       is explained in more detail below. (Optional,
0019 %                       default 6)
0020 %   bottomOriginLimit   origin limit for low priority BRENDA matches.
0021 %                       Origin is explained in more detail below.
0022 %                       (Optional, default 6)
0023 %   wildcardLimit       maximum number of wildcards in E.C. number of
0024 %                       BRENDA matches (Optional, default 3)
0025 %
0026 % Output:
0027 %   mergedKcatList      merged list of kcats
0028 %
0029 % The origin parameter:
0030 %   1: correct organism, correct substrate, kcat
0031 %   2: any organism, correct substrate, kcat
0032 %   3: correct organism, any substrate, kcat
0033 %   4: any organism, any substrate, kcat
0034 %   5: correct organism, specific activity
0035 %   6: any organism, specific activity
0036 %
0037 % Example of wildcards in E.C. number:
0038 %   0: 1.1.1.3      glycerol-3-phosphate dehydrogenase (NAD+)
0039 %   1: 1.1.1.-      oxidoreductase, acting on the CH-OH group of donors,
0040 %                   with NAD+ or NADP+ as acceptor
0041 %   2: 1.1.-.-      oxidoreductase, acting on the CH-OH group of donors
0042 %   3: 1.-.-.-      oxidoreductase
0043 %
0044 % Usage:
0045 %   mergedKcatList = mergeDLKcatAndFuzzyKcats(kcatListDLKcat, kcatListFuzzy, topOriginLimit, bottomOriginLimit, wildcardLimit)
0046 
0047 if nargin < 5
0048     wildcardLimit = 3;
0049 end
0050 
0051 if nargin < 4
0052     bottomOriginLimit = 6;
0053 end
0054 
0055 if nargin < 3
0056     topOriginLimit = 6;
0057 end
0058 
0059 if (topOriginLimit < 1) || (topOriginLimit > 6)
0060     error('topPrioOriginLimit should be between 1 and 6.');
0061 end
0062 
0063 if (bottomOriginLimit < 1) || (bottomOriginLimit > 6)
0064     error('originCutLevel should be between 1 and 6.');
0065 end
0066 
0067 if (wildcardLimit < 0) || (wildcardLimit > 3)
0068     error('wildcardCutLevel should be between 0 and 3.');
0069 end
0070 
0071 wc = kcatListFuzzy.wildcardLvl;
0072 wc(isnan(wc)) = 1000; %large wildcard
0073 
0074 origin = kcatListFuzzy.origin;
0075 origin(isnan(origin)) = 1000; %large origin
0076 
0077 prio1 = (wc == 0) & (origin <= topOriginLimit);
0078 
0079 rxnsWithPrio1 = unique(kcatListFuzzy.rxns(prio1));
0080 
0081 %Things get a bit complicated since not all reactions are in the kcatLists and
0082 %some reactions may appear multiple times
0083 prio2 = true(length(kcatListDLKcat.rxns),1);
0084 prio2(ismember(kcatListDLKcat.rxns, rxnsWithPrio1)) = false;
0085 prio2Rxns = unique(kcatListDLKcat.rxns(prio2));
0086 
0087 %The prioritization between wildcards and origin is already done in fuzzy matching,
0088 %so we can join them here
0089 prio3 = ((wc == 0) & (origin > topOriginLimit) & (origin <= bottomOriginLimit)) | ...
0090         ((wc > 0) & (wc <= wildcardLimit) & (origin <= bottomOriginLimit));
0091 prio3(ismember(kcatListFuzzy.rxns, prio2Rxns)) = false;
0092 
0093 fuzzyRxns = prio1 | prio3;
0094 
0095 %Now build the merged list, fuzzy followed by dlkcat
0096 %The order of the reactions is therefore not preserved.
0097 mergedKcatList               = struct();
0098 mergedKcatList.source        = 'Merged DLKcat and fuzzy';
0099 [fuzzySrc{1:sum(fuzzyRxns)}] = deal(kcatListFuzzy.source);
0100 [dlkcatSrc{1:sum(prio2)}]    = deal(kcatListDLKcat.source);
0101 mergedKcatList.kcatSource    = [fuzzySrc.';dlkcatSrc.'];
0102 mergedKcatList.rxns          = [kcatListFuzzy.rxns(fuzzyRxns);kcatListDLKcat.rxns(prio2)];
0103 mergedKcatList.genes         = [cell(sum(fuzzyRxns),1);kcatListDLKcat.genes(prio2)];
0104 mergedKcatList.substrates    = [kcatListFuzzy.substrates(fuzzyRxns);kcatListDLKcat.substrates(prio2)];
0105 mergedKcatList.kcats         = [kcatListFuzzy.kcats(fuzzyRxns);kcatListDLKcat.kcats(prio2)];
0106 mergedKcatList.eccodes       = [kcatListFuzzy.eccodes(fuzzyRxns);cell(sum(prio2),1)];
0107 mergedKcatList.wildcardLvl   = [kcatListFuzzy.wildcardLvl(fuzzyRxns);nan(sum(prio2),1)];
0108 mergedKcatList.origin        = [kcatListFuzzy.origin(fuzzyRxns);nan(sum(prio2),1)];
0109 end

Generated by m2html © 2005