0001 function phylDistStruct=getPhylDist(keggPath,onlyInKingdom)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023 if nargin<1
0024 keggPath='RAVEN/external/kegg';
0025 else
0026 keggPath=char(keggPath);
0027 end
0028 if nargin<2
0029 onlyInKingdom=false;
0030 end
0031
0032
0033
0034 ravenPath=findRAVENroot();
0035 distFile=fullfile(ravenPath,'external','kegg','keggPhylDist.mat');
0036 if exist(distFile, 'file')
0037 fprintf(['Importing the KEGG phylogenetic distance matrix from ' strrep(distFile,'\','/') '... ']);
0038 load(distFile);
0039 fprintf('COMPLETE\n');
0040 else
0041 fprintf(['Cannot locate ' strrep(distFile,'\','/') '\n']);
0042 if ~isfile(fullfile(keggPath,'taxonomy'))
0043 EM=fprintf(['The file ''taxonomy'' cannot be located at ' strrep(keggPath,'\','/') '/ and should be downloaded from the KEGG FTP.\n']);
0044 dispEM(EM);
0045 else
0046 fprintf('Generating keggPhylDist.mat file... ');
0047
0048 fid = fopen(fullfile(keggPath,'taxonomy'), 'r');
0049
0050 phylDistStruct.ids={};
0051 phylDistStruct.names={};
0052
0053
0054 orgCat={};
0055
0056 currentCat={};
0057
0058
0059 depth=0;
0060
0061
0062
0063 orgCounter=0;
0064 while 1
0065
0066 tline = fgetl(fid);
0067
0068
0069 if ~ischar(tline)
0070 break;
0071 end
0072
0073 if any(tline)
0074
0075 if tline(1)=='#'
0076
0077 sPos=strfind(tline,' ')-1;
0078
0079
0080 sPos=sPos(1);
0081
0082
0083 if sPos<depth
0084 currentCat=currentCat(1:sPos);
0085 end
0086 depth=sPos;
0087
0088 currentCat{depth}=tline(sPos+2:end);
0089 else
0090 orgCounter=orgCounter+1;
0091
0092
0093 sPos=find(isstrprop(tline, 'wspace'));
0094
0095
0096 phylDistStruct.ids{orgCounter}=tline(sPos(1)+1:sPos(2)-1);
0097 phylDistStruct.names{orgCounter}=tline(sPos(3)+1:end);
0098 orgCat{orgCounter}=currentCat;
0099 end
0100 end
0101 end
0102
0103 phylDistStruct.distMat=zeros(numel(phylDistStruct.ids));
0104 phylDistStructOnlyInKingdom.distMat=zeros(numel(phylDistStruct.ids));
0105 phylDistStructOnlyInKingdom.ids=phylDistStruct.ids;
0106 for i=1:numel(phylDistStruct.ids)
0107 for j=1:numel(phylDistStruct.ids)
0108 if ~strcmp(orgCat{i}(1),orgCat{j}(1))
0109 phylDistStructOnlyInKingdom.distMat(i,j)=Inf;
0110 end
0111
0112 dist=numel(orgCat{i})-numel(orgCat{j});
0113 if dist>0
0114 aCat=orgCat{i}(1:end-dist);
0115 else
0116 aCat=orgCat{i};
0117 end
0118 if dist<0
0119 bCat=orgCat{j}(1:end+dist);
0120 else
0121 bCat=orgCat{j};
0122 end
0123
0124
0125
0126 for k=numel(aCat):-1:1
0127 if strcmp(aCat{k},bCat{k})
0128 break;
0129 end
0130 end
0131 phylDistStruct.distMat(i,j)=dist+numel(aCat)-k;
0132 end
0133 end
0134
0135 save(distFile,'phylDistStruct','phylDistStructOnlyInKingdom');
0136 fprintf('COMPLETE\n');
0137 end
0138 end
0139 if onlyInKingdom==true
0140 phylDistStruct=phylDistStructOnlyInKingdom;
0141 end
0142 end