0001 function [model,KOModel]=getModelFromKEGG(keggPath,keepSpontaneous,...
0002 keepUndefinedStoich,keepIncomplete,keepGeneral)
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047 ravenPath=findRAVENroot();
0048
0049 if nargin<1
0050 keggPath=fullfile(ravenPath,'external','kegg');
0051 else
0052 keggPath=char(keggPath);
0053 end
0054 if nargin<2
0055 keepSpontaneous=true;
0056 end
0057 if nargin<3
0058 keepUndefinedStoich=true;
0059 end
0060 if nargin<4
0061 keepIncomplete=true;
0062 end
0063 if nargin<5
0064 keepGeneral=false;
0065 end
0066
0067 modelFile=fullfile(ravenPath,'external','kegg','keggModel.mat');
0068 if exist(modelFile, 'file') && isNewestFile(ravenPath)
0069 fprintf(['Importing the global KEGG model from ' strrep(modelFile,'\','/') '... ']);
0070 load(modelFile);
0071 fprintf('COMPLETE\n');
0072 else
0073 fprintf(['NOTE: The file ' strrep(modelFile,'\','/') ' does not exist or is out-of-date and therefore will be (re)generated\n']);
0074
0075 [model,isSpontaneous,isUndefinedStoich,isIncomplete,isGeneral]=getRxnsFromKEGG(keggPath);
0076
0077
0078
0079 KOs=cell(numel(model.rxns)*2,1);
0080
0081
0082 addToIndex=1;
0083
0084 for i=1:numel(model.rxns)
0085 if isstruct(model.rxnMiriams{i})
0086 for j=1:numel(model.rxnMiriams{i}.name)
0087 if strcmpi('kegg.orthology',model.rxnMiriams{i}.name{j})
0088
0089 KOs(addToIndex)=model.rxnMiriams{i}.value(j);
0090 addToIndex=addToIndex+1;
0091 end
0092 end
0093 end
0094 end
0095
0096 KOs=KOs(1:addToIndex-1);
0097 KOs=unique(KOs);
0098
0099
0100
0101 KOModel=getGenesFromKEGG(keggPath,KOs);
0102
0103 fprintf('Pruning the global KEGG model from the partially annotated, lumped KEGG Orthology entries... ')
0104 model.genes=KOModel.genes;
0105
0106
0107
0108
0109 KOsToRemove=setdiff(KOs, KOModel.rxns);
0110
0111
0112 for i=1:numel(model.rxns)
0113 if isstruct(model.rxnMiriams{i})
0114 for j=1:numel(model.rxnMiriams{i}.name)
0115 toDel=[];
0116 if strcmp(model.rxnMiriams{i}.name{j},'kegg.orthology')
0117 if ismember(model.rxnMiriams{i}.value{j},KOsToRemove)
0118 toDel=[toDel;j];
0119 end
0120 end
0121 end
0122
0123 if any(toDel)
0124
0125 if numel(toDel)==j
0126 model.rxnMiriams{i}=[];
0127 else
0128 model.rxnMiriams{i}.name(toDel)=[];
0129 model.rxnMiriams{i}.value(toDel)=[];
0130 end
0131 end
0132 end
0133 end
0134 fprintf('COMPLETE\n');
0135
0136 fprintf('Constructing the rxnGeneMat for the global KEGG model... 0%% complete');
0137
0138
0139 r=zeros(10000000,1);
0140
0141
0142 c=zeros(10000000,1);
0143 counter=1;
0144 for i=1:numel(model.rxns)
0145 if isstruct(model.rxnMiriams{i})
0146 I=strncmp('kegg.orthology',model.rxnMiriams{i}.name,18);
0147 if any(I)
0148 [J, K]=ismember(model.rxnMiriams{i}.value(I),KOModel.rxns);
0149
0150 [~, L]=find(KOModel.rxnGeneMat(K(J),:));
0151 if any(L)
0152
0153 if counter+numel(L)-1>=numel(r)
0154 r=[r;zeros(numel(r),1)];
0155 c=[c;zeros(numel(c),1)];
0156 end
0157 r(counter:counter+numel(L)-1)=ones(numel(L),1)*i;
0158 c(counter:counter+numel(L)-1)=L(:);
0159 counter=counter+numel(L);
0160 end
0161 end
0162 end
0163 if rem(i-1,100) == 0
0164 progress=pad(num2str(floor(i/numel(model.rxns)*100)),3,'left');
0165 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress);
0166 end
0167 end
0168
0169 model.rxnGeneMat=sparse(r(1:counter-1),c(1:counter-1),ones(counter-1,1));
0170 if size(model.rxnGeneMat,1)~=numel(model.rxns) || size(model.rxnGeneMat,2)~=numel(KOModel.genes)
0171 model.rxnGeneMat(numel(model.rxns),numel(KOModel.genes))=0;
0172 end
0173 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n');
0174
0175
0176 metModel=getMetsFromKEGG(keggPath);
0177
0178 fprintf('Finalizing the global KEGG model... ');
0179
0180 [a, b]=ismember(model.mets,metModel.mets);
0181 a=find(a);
0182 b=b(a);
0183
0184 if ~isfield(model,'metNames')
0185 model.metNames=cell(numel(model.mets),1);
0186 model.metNames(:)={''};
0187 end
0188 model.metNames(a)=metModel.metNames(b);
0189
0190 if ~isfield(model,'metFormulas')
0191 model.metFormulas=cell(numel(model.mets),1);
0192 model.metFormulas(:)={''};
0193 end
0194 model.metFormulas(a)=metModel.metFormulas(b);
0195
0196 if ~isfield(model,'inchis')
0197 model.inchis=cell(numel(model.mets),1);
0198 model.inchis(:)={''};
0199 end
0200 model.inchis(a)=metModel.inchis(b);
0201
0202 if ~isfield(model,'metMiriams')
0203 model.metMiriams=cell(numel(model.mets),1);
0204 end
0205 model.metMiriams(a)=metModel.metMiriams(b);
0206
0207
0208 I=find(~cellfun(@isempty,model.inchis));
0209 for i=1:numel(I)
0210 S=regexp(model.inchis(I(i)),'/','split');
0211 S=S{1};
0212 if numel(S)>=2
0213
0214 model.metFormulas(I(i))=S(2);
0215 end
0216 end
0217
0218
0219
0220 model.comps={'s'};
0221 model.compNames={'System'};
0222 model.metComps=ones(numel(model.mets),1);
0223
0224
0225
0226
0227
0228
0229 I=find(cellfun(@any,strfind(model.mets,'n')) | cellfun(@any,strfind(model.mets,'m')));
0230 model.metNames(I)=model.mets(I);
0231 repNums=1:numel(I);
0232 repIDs=strcat('undefined_',cellfun(@num2str,num2cell(repNums(:)),'UniformOutput',false));
0233 model.mets(I)=repIDs;
0234
0235
0236
0237 I=cellfun(@isempty,model.metNames);
0238 model.metNames(I)=model.mets(I);
0239
0240
0241 model.annotation.defaultLB = -1000;
0242 model.annotation.defaultUB = 1000;
0243
0244
0245 save(modelFile,'model','KOModel','isGeneral','isIncomplete','isUndefinedStoich','isSpontaneous');
0246 fprintf('COMPLETE\n');
0247 end
0248
0249
0250
0251
0252 if keepSpontaneous==false
0253 model=removeReactions(model,intersect(isSpontaneous,model.rxns),true,true);
0254 end
0255 if keepUndefinedStoich==false
0256 model=removeReactions(model,intersect(isUndefinedStoich,model.rxns),true,true);
0257 end
0258 if keepIncomplete==false
0259 model=removeReactions(model,intersect(isIncomplete,model.rxns),true,true);
0260 end
0261 if keepGeneral==false
0262 model=removeReactions(model,intersect(isGeneral,model.rxns),true,true);
0263 end
0264
0265 end
0266
0267 function output = isNewestFile(ravenPath)
0268
0269
0270 modelFile=fullfile(ravenPath,'external','kegg','keggModel.mat');
0271 rxnsFile=fullfile(ravenPath,'external','kegg','keggRxns.mat');
0272 genesFile=fullfile(ravenPath,'external','kegg','keggGenes.mat');
0273 metsFile=fullfile(ravenPath,'external','kegg','keggMets.mat');
0274 if (getFileTime(modelFile)>getFileTime(rxnsFile))&&...
0275 (getFileTime(modelFile)>getFileTime(genesFile))&&...
0276 (getFileTime(modelFile)>getFileTime(metsFile))
0277 output=1;
0278 else
0279 output=0;
0280 end
0281 end
0282
0283 function modTime = getFileTime(fileName)
0284
0285
0286 listing = dir(fileName);
0287 assert(numel(listing) == 1, 'No such file: %s', fileName);
0288 modTime = listing.datenum;
0289 format long;
0290 end