Description of fuzzyKcatMatching

0001 function kcatList = fuzzyKcatMatching(model, ecRxns, modelAdapter, forceWClvl)
0002 % fuzzyKcatMatching
0003 %   Matchs the model EC numbers and substrates to the BRENDA database, to
0004 %   return the corresponding kcats for each reaction. If no exact match is
0005 %   found, less specific kcat values are found from (a) evolutionary
0006 %   closely related organism; (b) different substrate; (c) calculated from
0007 %   specific activities; (d) wildcards in the EC number. The model organism
0008 %   is provided in the model adapter as obj.params.org_name, and
0009 %   evolutionary distance to other organisms is determined via KEGG
0010 %   phylogeny. If an organism name occurs multiple times in KEGG, the first
0011 %   instance will be used when determining evolutionary distance.
0012 %
0013 % Input:
0014 %   model        an ecModel in GECKO 3 format (with ecModel.ec structure)
0015 %   ecRxns       for which reactions (from model.ec.rxns) kcat values should
0016 %                be found, provided as logical vector with same length as
0017 %                model.ec.rxns. (Opt, default is all reactions)
0018 %   modelAdapter a loaded model adapter (Optional, will otherwise use the
0019 %                default model adapter).
0020 %   forceWClvl   force a minimum wildcard level (Optional, default 0).
0021 %
0022 % Output:
0023 %   kcatList    structure array with list of BRENDA derived kcat values,
0024 %               with separate entries for each kcat value
0025 %               source      'brenda'
0026 %               rxns        reaction identifiers
0027 %               substrate   substrate names
0028 %               kcat        proposed kcat value in /sec
0029 %               eccodes     as used to query BRENDA
0030 %               wildCardLvl which level of EC wild-card was necessary to
0031 %                           find a match
0032 %                           0: w.x.y.z
0033 %                           1: w.x.y.-
0034 %                           2: w.x.-.-
0035 %                           3: w.-.-.-
0036 %               origin      which level of specificity was necessary to
0037 %                           find a match
0038 %                           1: correct organism, correct substrate, kcat
0039 %                           2: any organism, correct substrate, kcat
0040 %                           3: correct organism, any substrate, kcat
0041 %                           4: any organism, any substrate, kcat
0042 %                           5: correct organism, specific activity
0043 %                           6: any organism, specific activity
0044 %
0045 %   Note: If a wildcard is used, origin levels 1 and 2 are ignored. The
0046 %   last digits in the E.C. number indicate the substrate specificity, so
0047 %   if this should be ignored, then correct substrate matches should not be
0048 %   prioritized.
0049 %
0050 % Usage:
0051 %   kcatList = fuzzyKcatMatching(model, ecRxns, modelAdapter, forceWClvl)
0052 
0053 if nargin<2 || isempty(ecRxns)
0054     ecRxns = true(numel(model.ec.rxns),1);
0055 elseif isnumeric(ecRxns)
0056     ecRxnsVec = false(numel(model.ec.rxns),1);
0057     ecRxnsVec(ecRxns) = true;
0058     ecRxns = ecRxnsVec;
0059 end
0060 ecRxns=find(ecRxns); % Get indices instead of logical
0061 
0062 if nargin < 3 || isempty(modelAdapter)
0063     modelAdapter = ModelAdapterManager.getDefault();
0064     if isempty(modelAdapter)
0065         error('Either send in a modelAdapter or set the default model adapter in the ModelAdapterManager.')
0066     end
0067 end
0068 params = modelAdapter.params;
0069 
0070 if nargin < 4 || isempty(forceWClvl)
0071     forceWClvl = 0;
0072 end
0073 
0074 if ~isfield(model.ec,'eccodes')
0075     error('No EC codes defined in model.ec.eccodes. First run getECfromGEM() and/or getECfromDatabase().')
0076 end
0077 eccodes      = model.ec.eccodes(ecRxns);
0078 substrates   = cell(numel(ecRxns),1);
0079 substrCoeffs = cell(numel(ecRxns),1);
0080 
0081 %Need to remove the prefix of GECKO light rxn names in the ec structure
0082 if ~model.ec.geckoLight
0083     rxnNames = model.ec.rxns;
0084 else
0085     rxnNames = extractAfter(model.ec.rxns, 4);
0086 end
0087 [~,originalRxns] = ismember(rxnNames(ecRxns),model.rxns);
0088 for i = 1:length(ecRxns)
0089     sel = find(model.S(:,originalRxns(i)) < 0);
0090     substrates{i}  = model.metNames(sel); 
0091     substrCoeffs{i} = -model.S(sel,originalRxns(i));
0092 end
0093 
0094 %Load BRENDA data:
0095 [KCATcell, SAcell] = loadBRENDAdata(modelAdapter);
0096 
0097 %Creates a Structure with KEGG codes for organisms, names and taxonomical
0098 %distance matrix and extract the organism index in the KEGG struct
0099 phylDistStruct =  KEGG_struct(modelAdapter.getPhylDistStructPath());
0100 %Get the KEGG code for the model's organism
0101 org_name       = params.org_name;
0102 org_index      = find_inKEGG(org_name,phylDistStruct.names);
0103 %build an index for genus in the phyl dist struct
0104 %first just extract the genus (i.e. the first part of the name)
0105 phylDistStruct.genus = lower(regexprep(phylDistStruct.names,'\s.*',''));
0106 %create a map for the genuses
0107 phylDistStruct.uniqueGenusList = unique(phylDistStruct.genus);
0108 phylDistStruct.genusHashMap = containers.Map(phylDistStruct.uniqueGenusList,1:length(phylDistStruct.uniqueGenusList));
0109 phylDistStruct.uniqueGenusIndices = cell(length(phylDistStruct.uniqueGenusList),1);
0110 
0111 %Then for each genus create a list with indices to the names
0112 for i = 1:length(phylDistStruct.genus)
0113     matchInd = cell2mat(values(phylDistStruct.genusHashMap,phylDistStruct.genus(i)));
0114     phylDistStruct.uniqueGenusIndices{matchInd} = [phylDistStruct.uniqueGenusIndices{matchInd};i];
0115 end
0116 
0117 %Allocate output
0118 kcats = zeros(length(eccodes),1);
0119 mM = length(eccodes);
0120 
0121 %Create empty kcatInfo
0122 %Legacy, no longer given as output, rather used to construct
0123 %kcatList.wildcardLvl and kcatList.origin.
0124 kcatInfo.info.org_s   = zeros(mM,1);
0125 kcatInfo.info.rest_s  = zeros(mM,1);
0126 kcatInfo.info.org_ns  = zeros(mM,1);
0127 kcatInfo.info.rest_ns = zeros(mM,1);
0128 kcatInfo.info.org_sa  = zeros(mM,1);
0129 kcatInfo.info.rest_sa = zeros(mM,1);
0130 kcatInfo.info.wcLevel = NaN(mM,1);
0131 kcatInfo.stats.queries  = 0;
0132 kcatInfo.stats.org_s    = 0;
0133 kcatInfo.stats.rest_s   = 0;
0134 kcatInfo.stats.org_ns   = 0;
0135 kcatInfo.stats.rest_ns  = 0;
0136 kcatInfo.stats.org_sa   = 0;
0137 kcatInfo.stats.rest_sa  = 0;
0138 kcatInfo.stats.wc0      = 0;
0139 kcatInfo.stats.wc1      = 0;
0140 kcatInfo.stats.wc2      = 0;
0141 kcatInfo.stats.wc3      = 0;
0142 kcatInfo.stats.wc4      = 0;
0143 kcatInfo.stats.matrix   = zeros(6,5);
0144 
0145 %build an EC index to speed things up a bit - many of the ECs appear
0146 %many times - unnecessary to compare them all
0147 %so, here, each EC string appears only once, and you get a vector with
0148 %indices to the rows in KCATcell
0149 [ECIndexIds,~,ic] = unique(KCATcell{1});
0150 EcIndexIndices = cell(length(ECIndexIds),1);
0151 for i = 1:length(EcIndexIndices)
0152     EcIndexIndices{i} = find(ic == i).';
0153 end
0154 
0155 %Apply force wildcard level
0156 while forceWClvl > 0
0157     eccodes=regexprep(eccodes,'(.)*(\.\d+)(\.-)*$','$1\.-$3');
0158     forceWClvl = forceWClvl - 1;
0159 end
0160 if forceWClvl == 1
0161     eccodes = regexprep(eccodes,'.*','-\.-\.-\.-');
0162 end
0163 
0164 progressbar('Gathering kcat values by fuzzy matching to BRENDA database')
0165 %Main loop:
0166 for i = 1:mM
0167     %Match:
0168     EC = eccodes{i};
0169     if ~isempty(EC)
0170         EC = strsplit(EC,';');
0171         %Try to match direct reaction:
0172         if ~isempty(substrates{i})
0173             [kcats(i), kcatInfo.info,kcatInfo.stats] = iterativeMatch(EC,substrates{i},substrCoeffs{i},i,KCATcell,...
0174                 kcatInfo.info,kcatInfo.stats,org_name,...
0175                 phylDistStruct,org_index,SAcell,ECIndexIds,EcIndexIndices);
0176         end
0177     end
0178     progressbar(i/mM)
0179 end
0180 
0181 kcatList.source      = 'brenda';
0182 kcatList.rxns        = model.ec.rxns(ecRxns);
0183 kcatList.substrates  = substrates;
0184 kcatList.kcats       = kcats;
0185 kcatList.eccodes     = eccodes;
0186 kcatList.wildcardLvl = kcatInfo.info.wcLevel;
0187 kcatList.origin      = NaN(numel(model.ec.rxns(ecRxns)),1);
0188 % This can be refactored, iterativeMatch and their nested functions can
0189 % just directly report the origin number.
0190 origin = [kcatInfo.info.org_s kcatInfo.info.rest_s kcatInfo.info.org_ns kcatInfo.info.rest_ns kcatInfo.info.org_sa kcatInfo.info.rest_sa];
0191 for i=1:6
0192     kcatList.origin(find(origin(:,i))) = i;
0193 end
0194 end
0195 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0196 function [kcat,dir,tot] =iterativeMatch(EC,subs,substrCoeff,i,KCATcell,dir,tot,...
0197     name,phylDist,org_index,SAcell,ECIndexIds,EcIndexIndices)
0198 %Will iteratively try to match the EC number to some registry in BRENDA,
0199 %using each time one additional wildcard.
0200 
0201 kcat    = zeros(size(EC));
0202 origin  = zeros(size(EC));
0203 matches = zeros(size(EC));
0204 wc_num  = ones(size(EC)).*1000;
0205 for k = 1:length(EC)
0206     success  = false;
0207     while ~success
0208         %Atempt match:
0209         [kcat(k),origin(k),matches(k)] = mainMatch(EC{k},subs,substrCoeff,KCATcell,...
0210             name,phylDist,...
0211             org_index,SAcell,ECIndexIds,EcIndexIndices);
0212         %If any match found, ends. If not, introduces one extra wild card and
0213         %tries again:
0214         if origin(k) > 0
0215             success   = true;
0216             wc_num(k) = sum(EC{k}=='-');
0217         else
0218             dot_pos  = [2 strfind(EC{k},'.')];
0219             wild_num = sum(EC{k}=='-');
0220             wc_text  = '-.-.-.-';
0221             EC{k}    = [EC{k}(1:dot_pos(4-wild_num)) wc_text(1:2*wild_num+1)];
0222         end
0223     end
0224 end
0225 
0226 if sum(origin) > 0
0227     %For more than one EC: Choose the maximum value among the ones with the
0228     %less amount of wildcards and the better origin:
0229     best_pos   = (wc_num == min(wc_num));
0230     new_origin = origin(best_pos);
0231     best_pos   = (origin == min(new_origin(new_origin~=0)));
0232     max_pos    = find(kcat == max(kcat(best_pos)));
0233     wc_num     = wc_num(max_pos(1));
0234     origin     = origin(max_pos(1));
0235     matches    = matches(max_pos(1));
0236     kcat       = kcat(max_pos(1));
0237 
0238     %Update dir and tot:
0239     dir.org_s(i)   = matches*(origin == 1);
0240     dir.rest_s(i)  = matches*(origin == 2);
0241     dir.org_ns(i)  = matches*(origin == 3);
0242     dir.org_sa(i)  = matches*(origin == 4);
0243     dir.rest_ns(i) = matches*(origin == 5);
0244     dir.rest_sa(i) = matches*(origin == 6);
0245     dir.wcLevel(i) = wc_num;
0246     tot.org_s        = tot.org_s   + (origin == 1);
0247     tot.rest_s       = tot.rest_s  + (origin == 2);
0248     tot.org_ns       = tot.org_ns  + (origin == 3);
0249     tot.org_sa       = tot.org_sa  + (origin == 4);
0250     tot.rest_ns      = tot.rest_ns + (origin == 5);
0251     tot.rest_sa      = tot.rest_sa + (origin == 6);
0252     tot.wc0          = tot.wc0     + (wc_num == 0);
0253     tot.wc1          = tot.wc1     + (wc_num == 1);
0254     tot.wc2          = tot.wc2     + (wc_num == 2);
0255     tot.wc3          = tot.wc3     + (wc_num == 3);
0256     tot.wc4          = tot.wc4     + (wc_num == 4);
0257     tot.queries      = tot.queries + 1;
0258     tot.matrix(origin,wc_num+1) = tot.matrix(origin,wc_num+1) + 1;
0259 end
0260 
0261 end
0262 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0263 
0264 function [kcat,origin,matches] = mainMatch(EC,subs,substrCoeff,KCATcell,...
0265     name,phylDist,org_index,SAcell,ECIndexIds,EcIndexIndices)
0266 
0267 %First make the string matching. This takes time, so we only want to do
0268 %this once:
0269 %Relaxes matching if wild cards are present:
0270 wild     = false;
0271 wild_pos = strfind(EC,'-');
0272 if ~isempty(wild_pos)
0273     EC   = EC(1:wild_pos(1)-1);
0274     wild = true;
0275 end
0276 stringMatchesEC_cell = extract_string_matches(EC,KCATcell{1},wild,ECIndexIds,EcIndexIndices);
0277 
0278 % Matching function prioritizing organism and substrate specificity when
0279 % available.
0280 
0281 origin = 0;
0282 %First try to match organism and substrate:
0283 [kcat,matches] = matchKcat(EC,subs,substrCoeff,KCATcell,name,true,false,...
0284     phylDist,org_index,SAcell,stringMatchesEC_cell,[]);
0285 if matches > 0 && ~wild % If wildcard, ignore substrate match
0286     origin = 1;
0287     %If no match, try the closest organism but match the substrate:
0288 else
0289     [kcat,matches] = matchKcat(EC,subs,substrCoeff,KCATcell,'',true,false,...
0290         phylDist,org_index,SAcell,stringMatchesEC_cell,[]);
0291     if matches > 0 && ~wild % If wildcard, ignore substrate match
0292         origin = 2;
0293         %If no match, try to match organism but with any substrate:
0294     else
0295         [kcat,matches] = matchKcat(EC,subs,substrCoeff,KCATcell,name,false,false,...
0296             phylDist,org_index,SAcell,stringMatchesEC_cell,[]);
0297         if matches > 0
0298             origin = 3;
0299             %If no match, try to match organism but for any substrate (SA*MW):
0300         else
0301             %create matching index for SA, has not been needed until now
0302             stringMatchesSA = extract_string_matches(EC,SAcell{1},wild,[],[]);
0303 
0304             [kcat,matches] = matchKcat(EC,subs,substrCoeff,KCATcell,name,false,...
0305                 true,phylDist,org_index,...
0306                 SAcell,stringMatchesEC_cell,stringMatchesSA);
0307             if matches > 0
0308                 origin = 4;
0309                 %If no match, try any organism and any substrate:
0310             else
0311                 [kcat,matches] = matchKcat(EC,subs,substrCoeff,KCATcell,'',false,...
0312                     false,phylDist,...
0313                     org_index,SAcell,stringMatchesEC_cell,stringMatchesSA);
0314                 if matches > 0
0315                     origin = 5;
0316                     %Again if no match, look for any org and SA*MW
0317                 else
0318                     [kcat,matches] = matchKcat(EC,subs,substrCoeff,KCATcell,'',...
0319                         false,true,phylDist,...
0320                         org_index,SAcell,stringMatchesEC_cell,stringMatchesSA);
0321                     if matches > 0
0322                         origin = 6;
0323                     end
0324                 end
0325 
0326             end
0327         end
0328     end
0329 end
0330 end
0331 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0332 function [kcat,matches] = matchKcat(EC,subs,substrCoeff,KCATcell,organism,...
0333     substrate,SA,phylDist,...
0334     org_index,SAcell,KCATcellMatches,SAcellMatches)
0335 
0336 %Will go through BRENDA and will record any match. Afterwards, it will
0337 %return the average value and the number of matches attained.
0338 kcat    = [];
0339 matches = 0;
0340 
0341 if SA
0342     %SAcell{1},wild,[],[]
0343     EC_indexes = extract_indexes(SAcellMatches,[],SAcell{2},subs,substrate,...
0344         organism,org_index,phylDist);
0345 
0346     kcat       = SAcell{3}(EC_indexes);
0347     org_cell   = SAcell{2}(EC_indexes);
0348     MW_BRENDA  = SAcell{4}(EC_indexes);
0349 
0350 else
0351     %KCATcell{1},wild,ECIndexIds,EcIndexIndices
0352     EC_indexes = extract_indexes(KCATcellMatches,KCATcell{2},KCATcell{3},...
0353         subs,substrate,organism,org_index,...
0354         phylDist);
0355     if substrate
0356         for j = 1:length(EC_indexes)
0357             indx = EC_indexes(j);
0358             for k = 1:length(subs)
0359                 if (isempty(subs{k}))
0360                     break;
0361                 end
0362                 %l = logical(strcmpi(model.metNames,subs{k}).*(model.S(:,i)~=0)); %I don't understand the .* (model.S(:,i)~=0) part, it shouldn't be needed?/JG;
0363                 if ~isempty(subs{k}) && strcmpi(subs{k},KCATcell{2}(indx))
0364                     if KCATcell{4}(indx) > 0
0365                         coeff = min(substrCoeff);
0366                         kCatTmp = KCATcell{4}(indx);
0367                         kcat  = [kcat;kCatTmp/coeff];
0368                     end
0369                 end
0370             end
0371         end
0372     else
0373         kcat = KCATcell{4}(EC_indexes);
0374     end
0375 end
0376 %Return maximum value:
0377 if isempty(kcat)
0378     kcat = 0;
0379 else
0380     matches        = length(kcat);
0381     [kcat,MaxIndx] = max(kcat);
0382 end
0383 %Avoid SA*Mw values over the diffusion limit rate  [Bar-Even et al. 2011]
0384 if kcat>(1E7)
0385     kcat = 1E7;
0386 end
0387 end
0388 
0389 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0390 %Make the string matches of the ECs. This is heavy, so only do it once!
0391 %
0392 function EC_indexes = extract_string_matches(EC,EC_cell,wild,ECIndexIds,EcIndexIndices)
0393 EC_indexes = [];
0394 EC_indexesOld = [];
0395 if wild
0396     if (~isempty(ECIndexIds)) %In some cases the EC_cell is not from KCatCell
0397         X = find(contains(ECIndexIds, EC));
0398         for j = 1:length(X)
0399             EC_indexes = [EC_indexes,EcIndexIndices{X(j)}];
0400         end
0401     else %Not optimized
0402         for j=1:length(EC_cell)
0403             if strfind(EC_cell{j},EC)==1
0404                 EC_indexes = [EC_indexes,j];
0405             end
0406         end
0407     end
0408 else
0409     if (~isempty(ECIndexIds)) %In some cases the EC_cell is not from KCatCell
0410         mtch = find(strcmpi(EC,ECIndexIds));
0411         if (~isempty(mtch))
0412             EC_indexes = EcIndexIndices{mtch};
0413         end
0414     else %%Not optimized
0415         if ~isempty(EC_cell)
0416             EC_indexes = transpose(find(strcmpi(EC,EC_cell)));
0417         end
0418     end
0419 end
0420 
0421 end
0422 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0423 %Extract the indexes of the entries in the BRENDA data that meet the
0424 %conditions specified by the search criteria
0425 function EC_indexes = extract_indexes(EC_indCellStringMatches,subs_cell,orgs_cell,subs,...
0426     substrate,organism, org_index,...
0427     phylDist)
0428 
0429 EC_indexes = EC_indCellStringMatches;%reuse so the string comparisons are not run many times
0430 
0431 %If substrate=true then it will extract only the substrates appereances
0432 %indexes in the EC subset from the BRENDA cell array
0433 
0434 if substrate
0435     if (~isempty(EC_indexes)) %optimization
0436         Subs_indexes = [];
0437         for l = 1:length(subs)
0438             if (isempty(subs{l}))
0439                 break;
0440             end
0441             Subs_indexes = horzcat(Subs_indexes,EC_indexes(strcmpi(subs(l),...
0442                 subs_cell(EC_indexes))));
0443         end
0444         EC_indexes = Subs_indexes;
0445     end
0446 end
0447 
0448 EC_orgs = orgs_cell(EC_indexes);
0449 %If specific organism values are requested looks for all the organism
0450 %repetitions on the subset BRENDA cell array(EC_indexes)
0451 if string(organism) ~= ''
0452     EC_indexes = EC_indexes(strcmpi(string(organism),EC_orgs));
0453 
0454     %If KEGG code was assigned to the organism (model) then it will look for
0455     %the Kcat value for the closest organism
0456 elseif org_index~='*' %&& org_index~=''
0457     KEGG_indexes = [];temp = [];
0458 
0459     %For relating a phyl dist between the modelled organism and the organisms
0460     %on the BRENDA cell array it should search for a KEGG code for each of
0461     %these
0462     for j=1:length(EC_indexes)
0463         %Assigns a KEGG index for those found on the KEGG struct
0464         orgs_index = find(strcmpi(orgs_cell(EC_indexes(j)),phylDist.names),1);
0465         if ~isempty(orgs_index)
0466             KEGG_indexes = [KEGG_indexes; orgs_index];
0467             temp         = [temp;EC_indexes(j)];
0468             %For values related to organisms without KEGG code, then it will
0469             %look for KEGG code for the first organism with the same genus
0470         else
0471             org = orgs_cell{EC_indexes(j)};
0472             orgGenus = lower(regexprep(org,'\s.*',''));
0473             if isKey(phylDist.genusHashMap,orgGenus) %annoyingly, this seems to be needed
0474                 matchInd = cell2mat(values(phylDist.genusHashMap,{orgGenus}));
0475                 matches = phylDist.uniqueGenusIndices{matchInd};
0476                 k = matches(1);
0477                 KEGG_indexes = [KEGG_indexes;k];
0478                 temp         = [temp;EC_indexes(j)];
0479             end
0480         end
0481     end
0482     %Update the EC_indexes cell array
0483     EC_indexes = temp;
0484     %Looks for the taxonomically closest organism and saves the index of
0485     %its appearences in the BRENDA cell
0486     if ~isempty(EC_indexes)
0487         distances = phylDist.distMat(org_index,KEGG_indexes);
0488         EC_indexes = EC_indexes(distances == min(distances));
0489     end
0490 end
0491 end
0492 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0493 function org_index = find_inKEGG(org_name,names)
0494 org_index      = find(strcmpi(org_name,names));
0495 if numel(org_index)>1
0496     org_index = org_index(1);
0497 elseif isempty(org_index)
0498     org_name    = regexprep(org_name,'\s.*','');
0499     org_index   = find(strcmpi(org_name,names));
0500     if numel(org_index)>1
0501         org_index = org_index(1);
0502     elseif isempty(org_index)
0503         org_index = '*';
0504     end
0505 end
0506 end
0507 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0508 function phylDistStruct =  KEGG_struct(phylpath)
0509 load(phylpath)
0510 phylDistStruct.ids   = transpose(phylDistStruct.ids);
0511 phylDistStruct.names = transpose(phylDistStruct.names);
0512 phylDistStruct.names = regexprep(phylDistStruct.names,'\s*\(.*','');
0513 end
fuzzyKcatMatching

PURPOSE

SYNOPSIS

DESCRIPTION

CROSS-REFERENCE INFORMATION

SUBFUNCTIONS

SOURCE CODE