0001 function [model,KOModel]=getModelFromKEGG(keggPath,keepSpontaneous,...
0002 keepUndefinedStoich,keepIncomplete,keepGeneral)
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047 if nargin<1
0048 keggPath='RAVEN/external/kegg';
0049 else
0050 keggPath=char(keggPath);
0051 end
0052 if nargin<2
0053 keepSpontaneous=true;
0054 end
0055 if nargin<3
0056 keepUndefinedStoich=true;
0057 end
0058 if nargin<4
0059 keepIncomplete=true;
0060 end
0061 if nargin<5
0062 keepGeneral=false;
0063 end
0064
0065 ravenPath=findRAVENroot();
0066 modelFile=fullfile(ravenPath,'external','kegg','keggModel.mat');
0067 if exist(modelFile, 'file') && isNewestFile(ravenPath)
0068 fprintf(['Importing the global KEGG model from ' strrep(modelFile,'\','/') '... ']);
0069 load(modelFile);
0070 fprintf('COMPLETE\n');
0071 else
0072 fprintf(['NOTE: The file ' strrep(modelFile,'\','/') ' does not exist or is out-of-date and therefore will be (re)generated\n']);
0073
0074 [model,isSpontaneous,isUndefinedStoich,isIncomplete,isGeneral]=getRxnsFromKEGG(keggPath);
0075
0076
0077
0078 KOs=cell(numel(model.rxns)*2,1);
0079
0080
0081 addToIndex=1;
0082
0083 for i=1:numel(model.rxns)
0084 if isstruct(model.rxnMiriams{i})
0085 for j=1:numel(model.rxnMiriams{i}.name)
0086 if strcmpi('kegg.orthology',model.rxnMiriams{i}.name{j})
0087
0088 KOs(addToIndex)=model.rxnMiriams{i}.value(j);
0089 addToIndex=addToIndex+1;
0090 end
0091 end
0092 end
0093 end
0094
0095 KOs=KOs(1:addToIndex-1);
0096 KOs=unique(KOs);
0097
0098
0099
0100 KOModel=getGenesFromKEGG(keggPath,KOs);
0101
0102 fprintf('Pruning the global KEGG model from the partially annotated, lumped KEGG Orthology entries... ')
0103 model.genes=KOModel.genes;
0104
0105
0106
0107
0108 KOsToRemove=setdiff(KOs, KOModel.rxns);
0109
0110
0111 for i=1:numel(model.rxns)
0112 if isstruct(model.rxnMiriams{i})
0113 for j=1:numel(model.rxnMiriams{i}.name)
0114 toDel=[];
0115 if strcmp(model.rxnMiriams{i}.name{j},'kegg.orthology')
0116 if ismember(model.rxnMiriams{i}.value{j},KOsToRemove)
0117 toDel=[toDel;j];
0118 end
0119 end
0120 end
0121
0122 if any(toDel)
0123
0124 if numel(toDel)==j
0125 model.rxnMiriams{i}=[];
0126 else
0127 model.rxnMiriams{i}.name(toDel)=[];
0128 model.rxnMiriams{i}.value(toDel)=[];
0129 end
0130 end
0131 end
0132 end
0133 fprintf('COMPLETE\n');
0134
0135 fprintf('Constructing the rxnGeneMat for the global KEGG model... 0%% complete');
0136
0137
0138 r=zeros(10000000,1);
0139
0140
0141 c=zeros(10000000,1);
0142 counter=1;
0143 for i=1:numel(model.rxns)
0144 if isstruct(model.rxnMiriams{i})
0145 I=strncmp('kegg.orthology',model.rxnMiriams{i}.name,18);
0146 if any(I)
0147 [J, K]=ismember(model.rxnMiriams{i}.value(I),KOModel.rxns);
0148
0149 [~, L]=find(KOModel.rxnGeneMat(K(J),:));
0150 if any(L)
0151
0152 if counter+numel(L)-1>=numel(r)
0153 r=[r;zeros(numel(r),1)];
0154 c=[c;zeros(numel(c),1)];
0155 end
0156 r(counter:counter+numel(L)-1)=ones(numel(L),1)*i;
0157 c(counter:counter+numel(L)-1)=L(:);
0158 counter=counter+numel(L);
0159 end
0160 end
0161 end
0162 if rem(i-1,100) == 0
0163 progress=pad(num2str(floor(i/numel(model.rxns)*100)),3,'left');
0164 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress);
0165 end
0166 end
0167
0168 model.rxnGeneMat=sparse(r(1:counter-1),c(1:counter-1),ones(counter-1,1));
0169 if size(model.rxnGeneMat,1)~=numel(model.rxns) || size(model.rxnGeneMat,2)~=numel(KOModel.genes)
0170 model.rxnGeneMat(numel(model.rxns),numel(KOModel.genes))=0;
0171 end
0172 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n');
0173
0174
0175 metModel=getMetsFromKEGG(keggPath);
0176
0177 fprintf('Finalizing the global KEGG model... ');
0178
0179 [a, b]=ismember(model.mets,metModel.mets);
0180 a=find(a);
0181 b=b(a);
0182
0183 if ~isfield(model,'metNames')
0184 model.metNames=cell(numel(model.mets),1);
0185 model.metNames(:)={''};
0186 end
0187 model.metNames(a)=metModel.metNames(b);
0188
0189 if ~isfield(model,'metFormulas')
0190 model.metFormulas=cell(numel(model.mets),1);
0191 model.metFormulas(:)={''};
0192 end
0193 model.metFormulas(a)=metModel.metFormulas(b);
0194
0195 if ~isfield(model,'inchis')
0196 model.inchis=cell(numel(model.mets),1);
0197 model.inchis(:)={''};
0198 end
0199 model.inchis(a)=metModel.inchis(b);
0200
0201 if ~isfield(model,'metMiriams')
0202 model.metMiriams=cell(numel(model.mets),1);
0203 end
0204 model.metMiriams(a)=metModel.metMiriams(b);
0205
0206
0207 I=find(~cellfun(@isempty,model.inchis));
0208 for i=1:numel(I)
0209 S=regexp(model.inchis(I(i)),'/','split');
0210 S=S{1};
0211 if numel(S)>=2
0212
0213 model.metFormulas(I(i))=S(2);
0214 end
0215 end
0216
0217
0218
0219 model.comps={'s'};
0220 model.compNames={'System'};
0221 model.metComps=ones(numel(model.mets),1);
0222
0223
0224
0225
0226
0227
0228 I=find(cellfun(@any,strfind(model.mets,'n')) | cellfun(@any,strfind(model.mets,'m')));
0229 model.metNames(I)=model.mets(I);
0230 repNums=1:numel(I);
0231 repIDs=strcat('undefined_',cellfun(@num2str,num2cell(repNums(:)),'UniformOutput',false));
0232 model.mets(I)=repIDs;
0233
0234
0235
0236 I=cellfun(@isempty,model.metNames);
0237 model.metNames(I)=model.mets(I);
0238
0239
0240 model.annotation.defaultLB = -1000;
0241 model.annotation.defaultUB = 1000;
0242
0243
0244 save(modelFile,'model','KOModel','isGeneral','isIncomplete','isUndefinedStoich','isSpontaneous');
0245 fprintf('COMPLETE\n');
0246 end
0247
0248
0249
0250
0251 if keepSpontaneous==false
0252 model=removeReactions(model,intersect(isSpontaneous,model.rxns),true,true);
0253 end
0254 if keepUndefinedStoich==false
0255 model=removeReactions(model,intersect(isUndefinedStoich,model.rxns),true,true);
0256 end
0257 if keepIncomplete==false
0258 model=removeReactions(model,intersect(isIncomplete,model.rxns),true,true);
0259 end
0260 if keepGeneral==false
0261 model=removeReactions(model,intersect(isGeneral,model.rxns),true,true);
0262 end
0263
0264 end
0265
0266 function output = isNewestFile(ravenPath)
0267
0268
0269 modelFile=fullfile(ravenPath,'external','kegg','keggModel.mat');
0270 rxnsFile=fullfile(ravenPath,'external','kegg','keggRxns.mat');
0271 genesFile=fullfile(ravenPath,'external','kegg','keggGenes.mat');
0272 metsFile=fullfile(ravenPath,'external','kegg','keggMets.mat');
0273 if (getFileTime(modelFile)>getFileTime(rxnsFile))&&...
0274 (getFileTime(modelFile)>getFileTime(genesFile))&&...
0275 (getFileTime(modelFile)>getFileTime(metsFile))
0276 output=1;
0277 else
0278 output=0;
0279 end
0280 end
0281
0282 function modTime = getFileTime(fileName)
0283
0284
0285 listing = dir(fileName);
0286 assert(numel(listing) == 1, 'No such file: %s', fileName);
0287 modTime = listing.datenum;
0288 format long;
0289 end