0001 function [outModel, geneLocalization, transportStruct, scores,...
0002 removedRxns] = predictLocalization(model, GSS,...
0003 defaultCompartment, transportCost, maxTime, plotResults)
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065 if nargin<4
0066 transportCost=ones(numel(model.mets),1)*0.5;
0067 end
0068 if numel(transportCost)==1
0069 transportCost=ones(numel(model.mets),1)*transportCost;
0070 end
0071 transportCost=transportCost(:);
0072
0073 if numel(transportCost)~=numel(model.mets)
0074 EM='The vector of transport costs must have the same dimension as model.mets';
0075 dispEM(EM,true);
0076 end
0077 if nargin<5
0078 maxTime=15;
0079 end
0080 if nargin<6
0081 plotResults=false;
0082 end
0083
0084 if isfield(model,'rxnComps')
0085 model=rmfield(model,'rxnComps');
0086 EM='The model structure contains information about reaction compartmentalization. This is not supported by this function. The rxnComps field has been deleted';
0087 dispEM(EM,false);
0088 end
0089 if isfield(model,'geneComps')
0090 model=rmfield(model,'geneComps');
0091 EM='The model structure contains information about gene compartmentalization. This is not supported by this function. The geneComps field has been deleted';
0092 dispEM(EM,false);
0093 end
0094
0095 defaultCompartment=char(defaultCompartment);
0096 I=ismember(defaultCompartment,GSS.compartments);
0097 if I==false
0098 EM='defaultCompartment not found in GSS';
0099 dispEM(EM);
0100 end
0101
0102 if numel(model.comps)>1
0103 EM='The model has several compartments. All compartments will be merged';
0104 dispEM(EM,false);
0105 model=mergeCompartments(model,true,true);
0106 end
0107
0108 noGenes = ismember(model.genes,GSS.genes);
0109 if ~all(noGenes)
0110 EM=['For ' num2str(numel(find(~noGenes))) ' of ' num2str(numel(model.genes)) ' model genes no data was found in GSS'];
0111 dispEM(EM,false);
0112 end
0113
0114
0115
0116
0117 model=expandModel(model);
0118
0119
0120
0121 removedRxns={};
0122
0123
0124 originalModelMets=model.mets;
0125 while 1
0126 irrevModel=convertToIrrev(model);
0127
0128 I=sum(irrevModel.S>0,2);
0129
0130
0131 if isfield(irrevModel,'unconstrained')
0132 I(irrevModel.unconstrained~=0)=2;
0133 end
0134 metsToDelete=false(numel(model.mets),1);
0135
0136
0137
0138
0139 for i=1:numel(irrevModel.mets)
0140
0141
0142
0143 if I(i)<2
0144 if I(i)==1
0145
0146 [~, J]=find(irrevModel.S(i,:)>0);
0147
0148
0149
0150
0151 K=irrevModel.S(:,J)<0;
0152 check=find(K & I<=1);
0153
0154 for j=1:numel(check)
0155
0156 [~, L]=find(irrevModel.S(check(j),:)>0);
0157
0158 if ~isempty(L)
0159 rxn=irrevModel.rxns(J);
0160 rxnRev=irrevModel.rxns(L);
0161 if strcmp(strrep(rxn,'_REV',''),strrep(rxnRev,'_REV',''))
0162 metsToDelete(i)=true;
0163 end
0164 else
0165
0166
0167 continue;
0168 end
0169 end
0170 else
0171
0172 metsToDelete(i)=true;
0173 end
0174 end
0175 end
0176
0177 if any(metsToDelete)
0178
0179 [~, I]=find(model.S(metsToDelete,:));
0180 removedRxns=[removedRxns;model.rxns(I)];
0181 model=removeReactions(model,I,true,true);
0182 else
0183
0184 break;
0185 end
0186 end
0187
0188
0189 transportCost=transportCost(ismember(originalModelMets,model.mets));
0190
0191
0192
0193 I=find(sum(model.rxnGeneMat,2)==0);
0194 for i=1:numel(I)
0195 model.genes=[model.genes;['&&FAKE&&' num2str(i)]];
0196 if isfield(model,'geneShortNames')
0197 model.geneShortNames=[model.geneShortNames;{''}];
0198 end
0199 if isfield(model,'geneMiriams')
0200 model.geneMiriams=[model.geneMiriams;{[]}];
0201 end
0202 if isfield(model,'proteins')
0203 model.proteins=[model.proteins;{[]}];
0204 end
0205 if isfield(model,'geneFrom')
0206 model.geneFrom=[model.geneFrom;{{'FAKE'}}];
0207 end
0208 model.rxnGeneMat(I(i),numel(model.genes))=1;
0209 model.grRules{I(i)}='';
0210 end
0211
0212
0213
0214
0215 I=setdiff(model.genes,GSS.genes);
0216 GSS.genes=[GSS.genes;I];
0217 GSS.scores=[GSS.scores;ones(numel(I),numel(GSS.compartments))*0.5];
0218
0219
0220
0221
0222
0223
0224
0225 genes=unique(model.grRules);
0226 nGenes=strrep(genes,'(','');
0227 nGenes=strrep(nGenes,')','');
0228
0229 complexes=setdiff(nGenes,model.genes);
0230 if ~isempty(complexes)
0231 if isempty(complexes{1})
0232 complexes(1)=[];
0233 end
0234 end
0235 cScores=zeros(numel(complexes),numel(GSS.compartments));
0236 for i=1:numel(complexes)
0237 genesInComplex=regexp(complexes{i},' and ','split');
0238
0239
0240 [I, J]=ismember(genesInComplex,GSS.genes);
0241
0242 if any(I)
0243
0244 mScores=mean(GSS.scores(J(I),:));
0245
0246
0247
0248 mScores=(mScores.*sum(I)+(numel(genesInComplex)-sum(I))*0.5)/numel(genesInComplex);
0249 else
0250 EM=['Could not parse grRule "' complexes{i} '". Assigning score 0.0 in all compartments'];
0251 dispEM(EM,false);
0252 mScores=ones(1,numel(genesInComplex))*0.5;
0253 end
0254 cScores(i,:)=mScores;
0255
0256
0257 model.genes=[model.genes;complexes{i}];
0258 if isfield(model,'geneMiriams')
0259 model.geneMiriams=[model.geneMiriams;{[]}];
0260 end
0261 if isfield(model,'geneShortNames')
0262 model.geneShortNames=[model.geneShortNames;{''}];
0263 end
0264 if isfield(model,'proteins')
0265 model.proteins=[model.proteins;{''}];
0266 end
0267 if isfield(model,'geneFrom')
0268 model.geneFrom=[model.geneFrom;{'COMPLEX'}];
0269 end
0270
0271
0272 I=ismember(model.grRules,['(' complexes{i} ')']);
0273
0274
0275 if ~isempty(I)
0276 model.rxnGeneMat(I,:)=0;
0277 model.rxnGeneMat(I,numel(model.genes))=1;
0278 end
0279 end
0280
0281
0282 GSS.genes=[GSS.genes;complexes];
0283 GSS.scores=[GSS.scores;cScores];
0284
0285
0286
0287 model=removeReactions(model,{},false,true);
0288
0289
0290
0291
0292
0293
0294 [~, I]=getExchangeRxns(model);
0295
0296
0297 J=1:numel(model.rxns);
0298 J(I)=[];
0299 model=permuteModel(model,[I;J'],'rxns');
0300
0301
0302 nER=numel(I);
0303
0304
0305 if isfield(model,'unconstrained')
0306 I=find(model.unconstrained);
0307 J=1:numel(model.mets);
0308 J(I)=[];
0309 model=permuteModel(model,[I;J'],'mets');
0310
0311 transportCost=transportCost([I;J']);
0312
0313 nEM=numel(I);
0314 else
0315 nEM=0;
0316 end
0317
0318
0319
0320 model.rxnGeneMat(1:nER,:)=0;
0321 model.grRules(1:nER)={''};
0322
0323
0324 model=removeReactions(model,{},false,true);
0325
0326
0327
0328
0329 [~, J]=ismember(model.genes,GSS.genes);
0330 GSS.genes=model.genes;
0331 GSS.scores=GSS.scores(J,:);
0332
0333
0334
0335 [~, J]=ismember(defaultCompartment,GSS.compartments);
0336 reorder=1:numel(GSS.compartments);
0337 reorder(J)=[];
0338 reorder=[J reorder];
0339 GSS.scores=GSS.scores(:,reorder);
0340 GSS.compartments=GSS.compartments(reorder);
0341
0342
0343
0344
0345 oldS=model.S;
0346 model.S(model.S>0)=1;
0347 model.S(model.S<0)=-1;
0348
0349
0350
0351
0352 model.S(:,model.rev==1)=model.S(:,model.rev==1).*10;
0353
0354
0355
0356
0357 nRxns=numel(model.rxns)-nER;
0358 nMets=numel(model.mets)-nEM;
0359 nGenes=numel(model.genes);
0360 nComps=numel(GSS.compartments);
0361
0362
0363
0364
0365
0366
0367
0368
0369
0370
0371
0372 s=repmat(eye(nMets)*0,1,nComps-1);
0373 s=[zeros(numel(model.mets)-nMets,size(s,2));s];
0374 S=[model.S sparse(numel(model.mets),nRxns*(nComps-1)) s];
0375 s=[sparse(nMets*(nComps-1),numel(model.rxns)+nRxns*(nComps-1)) eye(nMets*(nComps-1))*0];
0376 S=[S;s];
0377
0378
0379 transportCost=[transportCost(1:nEM);repmat(transportCost(nEM+1:end),nComps,1)];
0380
0381
0382
0383 g2c=false(nGenes,nComps);
0384
0385 g2c(:,1)=true;
0386
0387
0388 tic;
0389 bestScore=-inf;
0390 bestS=[];
0391 bestg2c=[];
0392
0393
0394 plotScore=[];
0395 nTrans=[];
0396 totScore=[];
0397 minScore=sum(min(GSS.scores,[],2));
0398 maxScore=sum(max(GSS.scores,[],2));
0399
0400 while toc<maxTime*60
0401
0402
0403
0404
0405
0406 [I, J]=find(g2c);
0407 geneToMove=randsample(nGenes,1,true,max(GSS.scores(I,:),[],2)-GSS.scores(sub2ind(size(g2c),I,J))+0.1);
0408
0409
0410
0411
0412 toComp=randsample(nComps,1,true,GSS.scores(geneToMove,:)+0.2);
0413
0414
0415 if toComp==find(g2c(geneToMove,:))
0416 continue;
0417 end
0418
0419
0420 [newS, newg2c]=moveGene(S,model,g2c,geneToMove,toComp,nRxns,nMets);
0421
0422
0423
0424
0425 wasConnected=false;
0426 for j=1:10
0427
0428 unconnected=findUnconnected(newS,nEM);
0429
0430
0431 if any(unconnected)
0432
0433
0434
0435 [geneIndex, moveTo, deltaConnected, deltaScore]=selectGenes(newS,nEM,nMets,nER,nRxns,model,unconnected,g2c,GSS);
0436
0437
0438
0439
0440
0441 [score, I]=max(1.5*deltaScore+deltaConnected);
0442
0443
0444
0445 hasToAddTransport=true;
0446 if ~isempty(deltaConnected)
0447 if score>0
0448 hasToAddTransport=false;
0449 end
0450 end
0451
0452
0453
0454 if hasToAddTransport==false
0455 [newS, newg2c]=moveGene(newS,model,g2c,geneIndex(I),moveTo(I),nRxns,nMets);
0456 else
0457
0458
0459 transMet=unconnected(randsample(numel(unconnected),1));
0460
0461
0462 comps=ceil((transMet-nEM)/((size(S,1)-nEM)/nComps));
0463
0464
0465
0466 dcIndex=transMet-(comps-1)*nMets;
0467
0468
0469
0470 allIndexes=dcIndex;
0471 for k=1:nComps-1
0472 allIndexes=[allIndexes;dcIndex+nMets*k];
0473 end
0474
0475
0476
0477 I=sum(newS(allIndexes,:)~=0,2)>0;
0478
0479
0480
0481
0482 connectedUsed=setdiff(allIndexes(I),unconnected);
0483
0484
0485
0486 if isempty(connectedUsed)
0487 break;
0488 end
0489
0490
0491
0492 if transMet==dcIndex
0493 newS=addTransport(newS,nRxns,nER,nMets,nEM,nComps,transMet,connectedUsed(randsample(numel(connectedUsed),1)));
0494 else
0495
0496
0497 I=connectedUsed(connectedUsed<(nMets+nEM));
0498 if any(I)
0499 newS=addTransport(newS,nRxns,nER,nMets,nEM,nComps,transMet,I(randsample(numel(I),1)));
0500 else
0501
0502
0503
0504 break;
0505 end
0506 end
0507 end
0508 else
0509 wasConnected=true;
0510 break;
0511 end
0512 end
0513
0514
0515
0516 if wasConnected==true
0517
0518 activeTransport=find(sum(newS(:,nER+nRxns*nComps+1:end),2));
0519
0520
0521 unconnected=findUnconnected(newS(:,1:nER+nRxns*nComps),nEM);
0522
0523
0524 I=setdiff(activeTransport,unconnected);
0525
0526
0527
0528 newS(:,find(sum(newS(I,nER+nRxns*nComps+1:end))==4)+nER+nRxns*nComps)=0;
0529
0530
0531
0532 [score, geneScore, trCost]=scoreModel(newS,newg2c,GSS,transportCost);
0533
0534
0535 if score>bestScore
0536 bestScore=score;
0537 bestS=newS;
0538 bestg2c=newg2c;
0539 end
0540
0541
0542 if score>=bestScore
0543 plotScore=[plotScore;geneScore];
0544 nTrans=[nTrans;trCost];
0545 totScore=[totScore;score];
0546 S=newS;
0547 g2c=newg2c;
0548
0549 if plotResults==true
0550 subplot(3,2,1);
0551 spy(S);
0552 subplot(3,2,2);
0553 plot(plotScore,'r');
0554 xlabel('Gene score');
0555 subplot(3,2,3);
0556 plot((plotScore-minScore)/(maxScore-minScore),'r');
0557 xlabel('Gene score relative to predictions');
0558 subplot(3,2,4);
0559 plot(nTrans,'g');
0560 xlabel('Transport cost');
0561 subplot(3,2,5);
0562 plot(totScore,'b');
0563 xlabel('Total score');
0564 subplot(3,2,6);
0565 pause(0.2);
0566 end
0567 end
0568 end
0569 end
0570 scores.totScore=score;
0571 scores.geneScore=geneScore;
0572 scores.transCost=trCost;
0573
0574
0575 [I, J]=find(bestS(nEM+1:nEM+nMets,end-nMets*(nComps-1)+1:end));
0576 J=ceil(J/nMets+1);
0577 transportStruct.mets=model.metNames(I+nEM);
0578 transportStruct.toComp=GSS.compartments(J);
0579
0580 [I, J]=find(bestg2c);
0581 geneLocalization.genes=GSS.genes(I);
0582 geneLocalization.comps=GSS.compartments(J);
0583
0584
0585 [~, I]=sort(geneLocalization.genes);
0586 geneLocalization.genes=geneLocalization.genes(I);
0587 geneLocalization.comps=geneLocalization.comps(I);
0588
0589
0590 I=strncmp('&&FAKE&&',geneLocalization.genes,8);
0591 geneLocalization.genes(I)=[];
0592 geneLocalization.comps(I)=[];
0593
0594
0595
0596
0597
0598
0599
0600 outModel=model;
0601 outModel.S=oldS;
0602
0603
0604 copyPart=outModel.S(nEM+1:end,nER+1:end);
0605
0606
0607 copyRxnGeneMat=outModel.rxnGeneMat(nER+1:end,:);
0608 outModel.rxnGeneMat=[outModel.rxnGeneMat;repmat(copyRxnGeneMat,nComps-1,1)];
0609
0610
0611
0612
0613 nStartComps=numel(outModel.comps);
0614 if nStartComps==1
0615 outModel.comps={'1'};
0616 outModel.compNames=GSS.compartments(1);
0617 else
0618 if model.metComps(1)==1
0619 outModel.compNames(1)=GSS.compartments(1);
0620 else
0621 outModel.compNames(2)=GSS.compartments(1);
0622 end
0623 end
0624 outModel.compNames=[outModel.compNames;GSS.compartments(2:end)'];
0625
0626
0627 for i=1:numel(GSS.compartments)-1
0628 outModel.comps=[outModel.comps;num2str(numel(outModel.comps)+1)];
0629 end
0630
0631 outModel.compOutside=cell(numel(outModel.comps),1);
0632 outModel.compOutside(:)={''};
0633
0634 for i=1:nComps-1
0635 outModel.S=[outModel.S sparse(size(outModel.S,1),nRxns)];
0636 outModel.S=[outModel.S;[sparse(nMets,nRxns*i+nER) copyPart]];
0637 outModel.rxns=[outModel.rxns;strcat(outModel.rxns(nER+1:nER+nRxns),'_',GSS.compartments{i+1})];
0638 outModel.rxnNames=[outModel.rxnNames;strcat(outModel.rxnNames(nER+1:nER+nRxns),' (',GSS.compartments{i+1},')')];
0639 outModel.lb=[outModel.lb;outModel.lb(nER+1:nER+nRxns)];
0640 outModel.ub=[outModel.ub;outModel.ub(nER+1:nER+nRxns)];
0641 outModel.rev=[outModel.rev;outModel.rev(nER+1:nER+nRxns)];
0642 outModel.c=[outModel.c;outModel.c(nER+1:nER+nRxns)];
0643 if isfield(outModel,'grRules')
0644 outModel.grRules=[outModel.grRules;outModel.grRules(nER+1:nER+nRxns)];
0645 end
0646 if isfield(outModel,'subSystems')
0647 outModel.subSystems=[outModel.subSystems;outModel.subSystems(nER+1:nER+nRxns)];
0648 end
0649 if isfield(outModel,'eccodes')
0650 outModel.eccodes=[outModel.eccodes;outModel.eccodes(nER+1:nER+nRxns)];
0651 end
0652 if isfield(outModel,'rxnFrom')
0653 outModel.rxnFrom=[outModel.rxnFrom;outModel.rxnFrom(nER+1:nER+nRxns)];
0654 end
0655 if isfield(outModel,'rxnMiriams')
0656 outModel.rxnMiriams=[outModel.rxnMiriams;outModel.rxnMiriams(nER+1:nER+nRxns)];
0657 end
0658 if isfield(outModel,'rxnNotes')
0659 outModel.rxnNotes=[outModel.rxnNotes;outModel.rxnNotes(nER+1:nER+nRxns)];
0660 end
0661 if isfield(outModel,'rxnReferences')
0662 outModel.rxnReferences=[outModel.rxnReferences;outModel.rxnReferences(nER+1:nER+nRxns)];
0663 end
0664 if isfield(outModel,'rxnConfidenceScores')
0665 outModel.rxnConfidenceScores=[outModel.rxnConfidenceScores;outModel.rxnConfidenceScores(nER+1:nER+nRxns)];
0666 end
0667 if isfield(outModel,'rxnDeltaG')
0668 outModel.rxnDeltaG=[outModel.rxnDeltaG;outModel.rxnDeltaG(nER+1:nER+nRxns)];
0669 end
0670 outModel.mets=[outModel.mets;strcat(outModel.mets(nEM+1:nEM+nMets),'_',GSS.compartments{i+1})];
0671 outModel.metNames=[outModel.metNames;outModel.metNames(nEM+1:nEM+nMets)];
0672 outModel.b=[outModel.b;outModel.b(nEM+1:nEM+nMets,:)];
0673 I=ones(nMets,1)*nStartComps+i;
0674 outModel.metComps=[outModel.metComps;I];
0675 if isfield(outModel,'inchis')
0676 outModel.inchis=[outModel.inchis;outModel.inchis(nEM+1:nEM+nMets)];
0677 end
0678 if isfield(outModel,'metSmiles')
0679 outModel.metSmiles=[outModel.metSmiles;outModel.metSmiles(nEM+1:nEM+nMets)];
0680 end
0681 if isfield(outModel,'unconstrained')
0682 outModel.unconstrained=[outModel.unconstrained;outModel.unconstrained(nEM+1:nEM+nMets)];
0683 end
0684 if isfield(outModel,'metMiriams')
0685 outModel.metMiriams=[outModel.metMiriams;outModel.metMiriams(nEM+1:nEM+nMets)];
0686 end
0687 if isfield(outModel,'metFormulas')
0688 outModel.metFormulas=[outModel.metFormulas;outModel.metFormulas(nEM+1:nEM+nMets)];
0689 end
0690 if isfield(outModel,'metFrom')
0691 outModel.metFrom=[outModel.metFrom;outModel.metFrom(nEM+1:nEM+nMets)];
0692 end
0693 if isfield(outModel,'metCharges')
0694 outModel.metCharges=[outModel.metCharges;outModel.metCharges(nEM+1:nEM+nMets)];
0695 end
0696 if isfield(outModel,'metDeltaG')
0697 outModel.metDeltaG=[outModel.metDeltaG;outModel.metDeltaG(nEM+1:nEM+nMets)];
0698 end
0699 end
0700
0701
0702 transS=bestS(:,numel(outModel.rxns)+1:end);
0703 J=sum(transS)>0;
0704
0705
0706
0707 transS(transS~=0)=1;
0708 transS(1:nEM+nMets,:)=transS(1:nEM+nMets,:)*-1;
0709 I=find(sum(transS>0,2));
0710 nTransRxns=numel(I);
0711 outModel.S=[outModel.S transS(:,J)];
0712 filler=ones(nTransRxns,1);
0713 outModel.lb=[outModel.lb;filler*-1000];
0714 outModel.ub=[outModel.ub;filler*1000];
0715 outModel.rev=[outModel.rev;filler];
0716 outModel.c=[outModel.c;filler*0];
0717 outModel.rxnGeneMat=[outModel.rxnGeneMat;sparse(nTransRxns,numel(outModel.genes))];
0718
0719 for i=1:numel(I)
0720 outModel.rxns=[outModel.rxns;strcat('transport',num2str(i))];
0721 outModel.rxnNames=[outModel.rxnNames;['Transport of ',outModel.metNames{I(i)}]];
0722 if isfield(outModel,'grRules')
0723 outModel.grRules=[outModel.grRules;{''}];
0724 end
0725 if isfield(outModel,'rxnMiriams')
0726 outModel.rxnMiriams=[outModel.rxnMiriams;{[]}];
0727 end
0728 if isfield(outModel,'subSystems')
0729 outModel.subSystems=[outModel.subSystems;{{'Inferred transport reactions'}}];
0730 end
0731 if isfield(outModel,'eccodes')
0732 outModel.eccodes=[outModel.eccodes;{''}];
0733 end
0734 if isfield(outModel,'rxnFrom')
0735 outModel.rxnFrom=[outModel.rxnFrom;{''}];
0736 end
0737 if isfield(outModel,'rxnNotes')
0738 outModel.rxnNotes=[outModel.rxnNotes;{''}];
0739 end
0740 if isfield(outModel,'rxnReferences')
0741 outModel.rxnReferences=[outModel.rxnReferences;{''}];
0742 end
0743 if isfield(outModel,'rxnConfidenceScores')
0744 outModel.rxnConfidenceScores=[outModel.rxnConfidenceScores;NaN];
0745 end
0746 if isfield(outModel,'rxnDeltaG')
0747 outModel.rxnDeltaG=[outModel.rxnDeltaG;NaN];
0748 end
0749 end
0750
0751
0752
0753 [~, J]=find(bestS(:,1:nER+nComps*nRxns));
0754 K=true(numel(outModel.rxns),1);
0755 K(J)=false;
0756 K(end-nTransRxns+1:end)=false;
0757 outModel=removeReactions(outModel,K,true);
0758
0759
0760 I=strncmp('&&FAKE&&',outModel.genes,8);
0761 outModel.genes(I)=[];
0762 if isfield(outModel,'geneMiriams')
0763 outModel.geneMiriams(I)=[];
0764 end
0765 if isfield(outModel,'geneShortNames')
0766 outModel.geneShortNames(I)=[];
0767 end
0768 if isfield(outModel,'proteins')
0769 outModel.proteins(I)=[];
0770 end
0771 outModel.rxnGeneMat(:,I)=[];
0772
0773
0774 [grRules,rxnGeneMat] = standardizeGrRules(outModel,true);
0775 outModel.grRules = grRules;
0776 outModel.rxnGeneMat = rxnGeneMat;
0777 end
0778
0779
0780 function [S, g2c]=moveGene(S,model,g2c,geneToMove,toComp,nRxns,nMets)
0781
0782 currentComp=find(g2c(geneToMove,:));
0783 g2c(geneToMove,:)=false;
0784 g2c(geneToMove,toComp)=true;
0785
0786
0787 [I, ~]=find(model.rxnGeneMat(:,geneToMove));
0788
0789
0790 oldRxns=I+(currentComp-1)*nRxns;
0791
0792
0793 newRxns=I+(toComp-1)*nRxns;
0794
0795
0796
0797 metChange=nMets*(toComp-currentComp);
0798
0799
0800 [I, J, K]=find(S(:,oldRxns));
0801 I=I+metChange;
0802
0803
0804 S(:,oldRxns)=0;
0805 S(sub2ind(size(S),I,newRxns(J)))=K;
0806 end
0807
0808
0809
0810
0811
0812
0813
0814 function unconnected=findUnconnected(S,nEM,metsToCheck)
0815 if nargin>2
0816
0817
0818 I=false(size(S,1),1);
0819 I(1:nEM)=true;
0820 I(metsToCheck)=true;
0821 S=S(I,:);
0822 end
0823
0824 em=false(size(S,1),1);
0825 em(1:nEM)=true;
0826
0827
0828 I=sum(S>2,1) | sum(S>2,1);
0829 revS=S;
0830 revS(:,I)=revS(:,I)*-1;
0831
0832
0833
0834
0835 connected=sum(S>0,2)>1 | em | sum(S~=0,2)==0 | sum(S(:,~I)>0,2)>0 | sum(S(:,I)~=0,2)>1;
0836
0837
0838 unconnected=sum(S>0 | revS>0,2)==0 & connected==false;
0839
0840
0841 maybeUnconnected=~connected & ~unconnected;
0842
0843
0844
0845
0846
0847
0848
0849 deadRxns=any(S(maybeUnconnected,:)>0) & any(S(maybeUnconnected,:)<0);
0850
0851
0852 problematic=any(S(:,deadRxns)~=0,2);
0853
0854
0855
0856 unconnected(problematic & maybeUnconnected)=true;
0857
0858
0859 if nargin>2
0860 unconnected=metsToCheck(unconnected(nEM+1:end));
0861 else
0862 unconnected=find(unconnected);
0863 end
0864 end
0865
0866
0867
0868
0869
0870
0871
0872 function [geneIndex, moveTo, deltaConnected, deltaScore]=selectGenes(S,nEM,nMets,nER,nRxns,model,unconnected,g2c,GSS)
0873
0874 moveTo=zeros(numel(model.genes),1);
0875 deltaConnected=zeros(numel(model.genes),1);
0876
0877
0878 nComps=size(g2c,2);
0879 comps=ceil((unconnected-nEM)/((size(S,1)-nEM)/nComps));
0880
0881
0882
0883 dcIndexes=unique(unconnected-(comps-1)*nMets);
0884
0885
0886 allIndexes=dcIndexes;
0887 for i=1:nComps-1
0888 allIndexes=[allIndexes;dcIndexes+nMets*i];
0889 end
0890
0891
0892 I=sum(S>2,1) | sum(S>2,1);
0893 revS=S;
0894 revS(:,I)=revS(:,I)*-1;
0895
0896
0897
0898 newMets=setdiff(allIndexes,unconnected);
0899 [~, potential]=find(S(newMets,:)>0 | revS(newMets,:)>0);
0900 potential(potential<=nER | potential>nER+nRxns*nComps)=[];
0901
0902
0903 rxnComps=ceil((potential-nER)/(nRxns));
0904
0905
0906
0907 dcRxnIndexes=potential-(rxnComps-1)*nRxns;
0908
0909
0910 genes=find(sum(model.rxnGeneMat(dcRxnIndexes,:)>0,1));
0911
0912
0913
0914
0915
0916
0917
0918
0919 [~, J]=find(g2c(genes,:));
0920
0921
0922
0923 geneScores=GSS.scores(sub2ind(size(g2c),genes(:),J));
0924 modGeneScores=1.1-geneScores;
0925 if numel(genes)>25
0926 rGenes=genes(randsample(numel(genes),min(numel(genes),25),true,modGeneScores));
0927
0928
0929 rGenes=unique(rGenes);
0930
0931
0932 [~, I]=ismember(rGenes,genes);
0933 geneScores=geneScores(I);
0934 genes=rGenes;
0935 end
0936 for i=1:numel(genes)
0937
0938
0939
0940
0941 rxns=find(model.rxnGeneMat(:,genes(i)));
0942
0943
0944 mets=find(sum(model.S(:,rxns)~=0,2)>0);
0945
0946
0947 allIndexes=mets;
0948 for j=1:nComps-1
0949 allIndexes=[allIndexes;mets+nMets*j];
0950 end
0951
0952
0953
0954
0955
0956 [I, ~]=find(model.S(:,rxns));
0957 moveToComps=unique(comps(ismember(dcIndexes,I)));
0958
0959
0960 bestMove=-inf;
0961 bestComp=[];
0962 for j=1:numel(moveToComps)
0963 newS=moveGene(S,model,g2c,genes(i),moveToComps(j),nRxns,nMets);
0964
0965
0966
0967 dConnected=numel(unconnected)-numel(findUnconnected(newS,nEM,[allIndexes;unconnected]));
0968 if dConnected>bestMove
0969 bestMove=dConnected;
0970 bestComp=moveToComps(j);
0971 end
0972 end
0973
0974
0975 moveTo(genes(i))=bestComp;
0976 deltaConnected(genes(i))=bestMove;
0977 end
0978
0979
0980 geneIndex=genes(:);
0981 moveTo=moveTo(geneIndex);
0982 deltaConnected=deltaConnected(geneIndex);
0983 deltaScore=GSS.scores(sub2ind(size(g2c),geneIndex(:),moveTo))-geneScores;
0984 end
0985
0986
0987
0988
0989
0990
0991 function S=addTransport(S,nRxns,nER,nMets,nEM,nComps,metA,metB)
0992 mets=[metA;metB];
0993
0994 comps=ceil((mets-nEM)/((size(S,1)-nEM)/nComps));
0995
0996 if sum(comps==1)~=1
0997 EM='Tried to create a transport reaction from a non-default compartment';
0998 dispEM(EM);
0999 end
1000
1001
1002 rIndex=(nER+nRxns*nComps)+mets(comps~=1)-nEM-nMets;
1003
1004 S(mets,rIndex)=2;
1005 end
1006
1007
1008
1009 function [score, geneScore, transportCost]=scoreModel(S,g2c,GSS,transportCost)
1010 [I, J]=find(g2c);
1011 geneScore=sum(GSS.scores(sub2ind(size(g2c),I,J)));
1012 [I, ~]=find(S==2);
1013 I=unique(I);
1014 transportCost=sum(transportCost(I));
1015 score=geneScore-transportCost;
1016 end
1017
1018
1019
1020
1021 function y = randsample(n, k, replace, w)
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046 if nargin < 2
1047 error('stats:randsample:TooFewInputs','Requires two input arguments.');
1048 elseif numel(n) == 1
1049 population = [];
1050 else
1051 population = n;
1052 n = numel(population);
1053 if length(population)~=n
1054 error('stats:randsample:BadPopulation','POPULATION must be a vector.');
1055 end
1056 end
1057
1058 if nargin < 3
1059 replace = false;
1060 end
1061
1062 if nargin < 4
1063 w = [];
1064 elseif ~isempty(w)
1065 if length(w) ~= n
1066 if isempty(population)
1067 error('stats:randsample:InputSizeMismatch',...
1068 'W must have length equal to N.');
1069 else
1070 error('stats:randsample:InputSizeMismatch',...
1071 'W must have the same length as the population.');
1072 end
1073 else
1074 p = w(:)' / sum(w);
1075 end
1076 end
1077
1078 switch replace
1079
1080
1081 case {true, 'true', 1}
1082 if isempty(w)
1083 y = ceil(n .* rand(k,1));
1084 else
1085 [dum, y] = histc(rand(k,1),[0 cumsum(p)]);
1086 end
1087
1088
1089 case {false, 'false', 0}
1090 if k > n
1091 if isempty(population)
1092 error('stats:randsample:SampleTooLarge',...
1093 'K must be less than or equal to N for sampling without replacement.');
1094 else
1095 error('stats:randsample:SampleTooLarge',...
1096 'K must be less than or equal to the population size.');
1097 end
1098 end
1099
1100 if isempty(w)
1101
1102
1103
1104 if 4*k > n
1105 rp = randperm(n);
1106 y = rp(1:k);
1107
1108
1109
1110
1111 else
1112 x = zeros(1,n);
1113 sumx = 0;
1114 while sumx < k
1115 x(ceil(n * rand(1,k-sumx))) = 1;
1116 sumx = sum(x);
1117 end
1118 y = find(x > 0);
1119 y = y(randperm(k));
1120 end
1121 else
1122 error('stats:randsample:NoWeighting',...
1123 'Weighted sampling without replacement is not supported.');
1124 end
1125 otherwise
1126 error('stats:randsample:BadReplaceValue',...
1127 'REPLACE must be either true or false.');
1128 end
1129
1130 if ~isempty(population)
1131 y = population(y);
1132 else
1133 y = y(:);
1134 end
1135 end