Description of prepINITModel

0001 function prepData = prepINITModel(origRefModel, taskStruct, spontRxnNames, convertGenes, customRxnsToIgnore, extComp, skipScaling)
0002 % prepINITModel
0003 %
0004 % The purpose of this function is to run time-consuming calculation steps that are not
0005 % dependent on the RNA-Seq data.
0006 %
0007 % origRefModel      The model to use. Expected to be something such as Human-GEM,
0008 %                   Mouse-GEM, etc.
0009 % taskStruct        The essential tasks. Can be loaded with for example
0010 %                   taskStruct = parseTaskList('../data/metabolicTasks_Essential.txt');
0011 % spontRxnNames     The spontaneous rxns. (optional, default {})
0012 % convertGenes      If true the genes are converted to gene names (from
0013 %                   ENSEMBL) (optional, default false)
0014 % customRxnsToIgnore These reactions can be ignored in the ignore mask
0015 %                   (specifying b7=1) (optional, default = {})
0016 % extComp           Name of the external compartment, typically 's' or 'e'. This
0017 %                   is used for identifying exch and import rxns (optional, default = 'e')
0018 % prepData          The resulting prepData structure which is used as input to ftINIT
0019 % skipScaling       If true the scaling step is not run on the minimal model. The
0020 %                   scaling is there to remove large differences between the
0021 %                   stoichiometric coefficients within a reaction, since such
0022 %                   differences creates numerical issues in the ftINIT algorithm
0023 %                   due to limitations in solver resolution. However,
0024 %                   the current scaling step is also risky and may lead to that
0025 %                   some reactions cannot carry flux, since it changes the stoichiometry
0026 %                   of the reactions with large differences. If you experience problems
0027 %                   where the solution is infeasible, it may be worth trying to turn off
0028 %                   the scaling. Note that it is only the minModel that is scaled,
0029 %                   the scaling will not be present in the final model.
0030 %                   Default: (optional, default = false)
0031 %
0032 % Usage: prepData = prepINITModel(origRefModel, taskStruct, spontRxnNames, convertGenes, customRxnsToIgnore, extComp)
0033 
0034 
0035 if nargin < 3
0036     spontRxnNames = {}; 
0037 end
0038 
0039 if nargin < 4
0040     convertGenes = false; 
0041 end
0042 
0043 if nargin < 5
0044     customRxnsToIgnore = {}; 
0045 end
0046 
0047 if nargin < 6
0048     extComp = 'e';
0049 end
0050 
0051 if nargin < 7
0052     skipScaling = false;
0053 end
0054 disp('Step 1: Gene rules')
0055 [origRefModel.grRules, origRefModel.rxnGeneMat] = standardizeGrRules(origRefModel, true);
0056 
0057 if convertGenes %For mouse we might want to translate in the opposite direction - this has to be done before calling this function in that case.
0058     [origRefModel.grRules, origRefModel.genes, origRefModel.rxnGeneMat] = translateGrRules(origRefModel.grRules, 'Name');
0059 end
0060 
0061 
0062 % Get list of all dead-end/inacessible/constrained reactions in the model.
0063 % We don't merge linear dependent reactions here, that will happen later.
0064 % We also don't remove reversibility here, we don't want that in the final
0065 % models produced. Therefore, this is done as a separate step.
0066 % This takes quite some time to run
0067 disp('Step 2: First simplification')
0068 [~,deletedDeadEndRxns] = simplifyModel(origRefModel,true,false,true,true,true);
0069 
0070 % get reduced model
0071 cModel = removeReactions(origRefModel,deletedDeadEndRxns,false,true);
0072 
0073 
0074 %Then, run the tasks to find out which reactions are essential. These rxns
0075 %will be forced to carry flux in the optimization later
0076 
0077 % determine reactions essential for tasks - takes ~10 min
0078 % need to add boundary mets first - but only to a temporary model, we don't
0079 % want those for further processing
0080 disp('Step 3: Check tasks (~10 min)')
0081 if ~isempty(taskStruct)
0082     bModel = closeModel(cModel);
0083     [taskReport, essentialRxnMat, ~, essentialFluxes] = checkTasks(bModel,[],true,false,true,taskStruct);
0084 
0085     %extract the essential rxns:
0086     sel = sum(essentialRxnMat,2) > 0;
0087     selInd = find(sel);
0088     essentialRxns = bModel.rxns(selInd);%382 rxns for human-GEM
0089 
0090     %Find metabolites present in taskStruct. We want to avoid removing
0091     %these metabolites from the final model (even though they may no longer
0092     %participate in any reacitons) so that the final model is still able to
0093     %complete all of the tasks without any errors.
0094     taskMets = union(vertcat(taskStruct.inputs),vertcat(taskStruct.outputs));
0095     taskMets = union(taskMets, parseRxnEqu(vertcat(taskStruct.equations)));
0096     modelMets = strcat(cModel.metNames,'[',cModel.comps(cModel.metComps),']');
0097     [inModel,metInd] = ismember(taskMets,modelMets);
0098     essentialMetsForTasks = cModel.mets(metInd(inModel));%118 mets
0099 
0100     %Remove tasks that cannot be performed
0101     taskStruct(taskReport.ok == false) = [];
0102 
0103 else
0104     essentialRxns = {};
0105     essentialMetsForTasks = {};
0106     taskReport = {};
0107 end
0108 
0109 
0110 % remove metabolites separately to avoid removing those needed for tasks
0111 unusedMets = cModel.mets(all(cModel.S == 0,2));%1248 in Human-GEM
0112 cModel = removeMets(cModel, setdiff(unusedMets,essentialMetsForTasks));
0113 
0114 if ~isempty(taskStruct)
0115 
0116     %Here, we decide on a direction in which the essential reactions should be forced.
0117     essentialRevDir = false(length(essentialRxns),1);
0118     pp = zeros(length(essentialRxns),1);
0119     nn = zeros(length(essentialRxns),1);
0120     for i = 1:length(selInd)
0121         pos = sum(essentialFluxes(selInd(i),essentialRxnMat(selInd(i),:)) > 0);
0122         neg = sum(essentialFluxes(selInd(i),essentialRxnMat(selInd(i),:)) < 0);
0123         essentialRevDir(i) = pos < neg;
0124         pp(i) = pos;
0125         nn(i) = neg;
0126     end
0127 
0128     %sum(pp>0 & nn>0) %0, so it is not a big problem that different tasks share essential
0129     %reactions but in different directions, at least no such cases exist now, so we ignore this for now.
0130 
0131     %Now create a minimal model for the MILP - we want to make this as small as we can, but we
0132     %don't want to use this later when we create the final models - just for the first MILP.
0133     %We don't want boundary metabolites in here either.
0134 
0135     %We do a trick with the essential rxns here:
0136     %What we want is to get rid of the reversibility of the reactions, because reversible reactions require
0137     %an integer each in the MILP, and we can also get the case that it may be cheaper to force flux in the wrong direction, which
0138     %will probably not give the gap-filling of reactions that we want.
0139     %So, we simply change the reversible reactions to irreversible, and flip their direction if the direction
0140     %to force flux in is negative. We don't need to do anything else, runINIT will then handle all essential rxns
0141     %as irreversible (and will not add any integers etc. for them).
0142 
0143     minModel1 = cModel;
0144     %for the essential rxns where we should force flux in the positive direction, just make them irreversible
0145     %for the negative direction, we do the same, but also flip the direction of the reaction
0146 
0147     %first flip the reactions with negative direction
0148     %constructEquations(minModel1,minModel1.rxns(selInd(essentialRevDir))) %looks good
0149     minModel1 = reverseRxns(minModel1, minModel1.rxns(selInd(essentialRevDir)));
0150     %constructEquations(minModel2,minModel2.rxns(selInd(essentialRevDir))) %looks good
0151 
0152     %Then make them irreversible
0153     minModel1.rev(selInd) = 0;
0154     minModel1.lb(selInd) = 0;
0155 else
0156     minModel1 = cModel;
0157 end
0158 %Now, remove reversibility on any reactions that can only be run in one direction - that simplifies the problem
0159 %This takes a long time to run (at least an hour or so).
0160 %This is an important step, we go from 5533 to 3734 reversible rxns, which reduces the size of the MILP,
0161 %since irreversible rxns are easier to handle.
0162 disp('Step 4: Second simplification (~1 hour)')
0163 minModel2 = simplifyModel(minModel1,false,false,false,false,false,false,true);
0164 
0165 disp('Step 5: Final work')
0166 
0167 %Now, merge all linear dependent rxns - this reduces the size of the model a lot.
0168 %Size goes from 11888 rxns to 7922 rxns for human-GEM, so this is an important step
0169 [minModel3,origRxnIds,groupIds]=mergeLinear(minModel2, {});
0170 
0171 %we now need to figure out what happened to the essential rxns in the merge step above.
0172 %Note that it would be ideal to run checkTasks on the result from mergeLinear, but that doesn't work.
0173 %The exchange rxns have in many cases been merged with other reactions, making it difficult to run
0174 %the tasks. So, instead we run checkTasks before, and figure out which merged reactions the
0175 %essential reactions belong to now
0176 
0177 %It may be possible to create a minModel2b, run mergeLinear on that (which is quick), and
0178 %then run checkTasks on the minimized model. Would be good to check if this gives the
0179 %same result, I'm not sure. The code would get less complicated.
0180 
0181 
0182 if ~isempty(taskStruct)
0183 
0184     %find all potential rxns
0185     rxnIndOrig = ismember(origRxnIds, essentialRxns);
0186     ids = unique(groupIds(rxnIndOrig));
0187     ids = ids(ids > 0);%0 means it has not been merged
0188     rxnCandidates = unique([essentialRxns;origRxnIds(ismember(groupIds, ids))]);%388, essentialRxns is 382
0189     newEssentialRxns = minModel3.rxns(ismember(minModel3.rxns,rxnCandidates));%196 in Human-GEM
0190     %setdiff(newEssentialRxns, essentialRxns)%only 2, that is very few
0191     %setdiff(essentialRxns, newEssentialRxns)%188, that is very many
0192     %It is probably the case that many essential reactions are linearly dependent on one another, which makes sense.
0193 else
0194     newEssentialRxns = {};
0195 end
0196 %What is left now is to remove some rxns from the problem, such as exchange rxns and spontaneous rxns.
0197 %We set the rxn scores of these to exactly 0, and make sure no other scores are zero.
0198 %runINIT will then handle this, we do >0 and <0 to get positive and negative scores, the rest
0199 %are not included in the problem.
0200 %But, this has to happen in the scoring - we identify the reactions in the origrxns here, and then fix this
0201 %in the scoring for each sample (the function groupRxnScores handles this and is run for each sample).
0202 
0203 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0204 %identify the reactions to ignore in the minModel2
0205 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0206 
0207 %Identify reactions that should be ignored by tINIT, i.e. tINIT will not remove these regardless of score
0208 
0209 
0210 [~,exchRxnInd] = getExchangeRxns(minModel2);
0211 toIgnoreExch = false(numel(minModel2.rxns),1);
0212 toIgnoreExch(exchRxnInd) = true;
0213 toIgnoreImportRxns = false(numel(minModel2.rxns),1);
0214 toIgnoreSimpleTransp = false(numel(minModel2.rxns),1);
0215 toIgnoreAdvTransp = false(numel(minModel2.rxns),1);
0216 toIgnoreS = false(numel(minModel2.rxns),1);
0217 
0218 
0219 
0220 %sum(toIgnore) %1500 for Human-GEM
0221 %find simple transport reactions with no GPRs:
0222 %We really only skip the reactions with two metabolites
0223 %where they are the same but different compartments - only skip the transport into the cell for now
0224 numMets = sum(minModel2.S ~= 0, 1);
0225 scomp = find(strcmp(minModel2.comps,extComp));
0226 for i = 1:length(minModel2.rxns)
0227     %check that it has no GPR and that the rxn has two metabolites
0228     if strcmp(minModel2.grRules(i),'') && (numMets(i) == 2)
0229       metsComps = minModel2.metComps(minModel2.S(:,i) ~= 0);
0230       metNames = minModel2.metNames(minModel2.S(:,i) ~= 0);
0231       if metsComps(1) ~= metsComps(2) && (metsComps(1) == scomp || metsComps(2) == scomp) %different compartments, one is the extComp
0232           if strcmp(metNames{1}, metNames{2}) %the same metabolite, it is a transport reaction
0233               toIgnoreImportRxns(i) = true;
0234           end
0235       elseif metsComps(1) ~= metsComps(2) %different compartments, no check for extComp
0236           if strcmp(metNames{1}, metNames{2}) %the same metabolite, it is a transport reaction
0237               toIgnoreSimpleTransp(i) = true;
0238           end
0239       end
0240     else %check for advanced
0241         metsInd = find(minModel2.S(:,i) ~= 0);
0242         %check that we have an even number of mets that is larger than 2 and that we don't have a GPR
0243         if rem(length(metsInd),2) == 0 && length(metsInd) > 2 && isempty(minModel2.grRules{i})
0244             %Now check that we have pairs of metabolites that are transported
0245             SVals = full(minModel2.S(metsInd,i));
0246             metNames = minModel2.metNames(metsInd);
0247             comps = minModel2.metComps(metsInd);
0248             success = true;
0249             while ~isempty(metNames)
0250                 if length(metNames) < 2 %this should not happen
0251                    error('We should never arrive here!');
0252                 end
0253                 metMatch = find(strcmp(metNames(2:length(metNames)),metNames{1})) + 1;
0254                 if length(metMatch) ~= 1 %we want a match, and only one (if there is some other complex transport rxn, we skip that)
0255                    success = false;
0256                    break;
0257                 end
0258                 if (SVals(1) + SVals(metMatch)) ~= 0 %the stoichiometry is not right
0259                    success = false;
0260                    break;
0261                 end
0262                 if comps(1) == comps(metMatch) %they must be in different compartments (maybe this need not to be checked)
0263                    success = false;
0264                    break;
0265                 end
0266                 %now remove the pair and continue with the next
0267                 SVals([1;metMatch]) = [];
0268                 metNames([1;metMatch]) = [];
0269                 comps([1;metMatch]) = [];
0270             end
0271             toIgnoreAdvTransp(i) = success;
0272         end          
0273     end
0274 end
0275 
0276 %Spontaneous reactions:
0277 toIgnoreSpont = ismember(minModel2.rxns, spontRxnNames);%only 10 rxns in human-GEM
0278 
0279 %rxns without GPRs in the s compartment (outside the cell)
0280 sCompInd = find(strcmp(minModel2.comps, extComp));
0281 for i = 1:length(minModel2.rxns)
0282     metsInd = find(minModel2.S(:,i) ~= 0);
0283     %check that it has more than 1 metabolite (not exch rxn) and no GPR
0284     if length(metsInd) > 1 && isempty(minModel2.grRules{i})
0285         if  all(minModel2.metComps(metsInd) == sCompInd)
0286             toIgnoreS(i) = true;
0287         end
0288     end
0289 end
0290 
0291 toIgnoreAllWithoutGPRs = cellfun(@isempty,minModel2.grRules);
0292 
0293 
0294 %sum(toIgnore)%in total 2375 rxns in human-GEM
0295 %sum(toIgnoreAllTransp)%in total 3337 rxns in human-GEM, so 1,000 more
0296 
0297 %Now, try to scale the model to become more favorable for the solver.
0298 %In general, we try to make all fluxes as similar as possible.
0299 %      There is room for improvement here, this function is pretty simple.
0300 %      It only rescales reactions, while it would be possible to rescale
0301 %      metabolites as well (ROS => kROS, albumin => millialbumin, etc., but
0302 %      scaled freely with a mathematical method). It could potentially open up
0303 %      for using less strict margins in the MILP, and make it run faster.
0304 if (skipScaling)
0305     scaledMinModel = minModel3;
0306 else
0307     scaledMinModel = rescaleModelForINIT(minModel3);
0308     scaledMinModel.ub(scaledMinModel.ub > 0) = 1000;
0309     scaledMinModel.lb(scaledMinModel.lb < 0) = -1000;
0310 end
0311 % create data structure with pre-processing results
0312 prepData.taskReport = taskReport;
0313 prepData.essentialRxns = newEssentialRxns; %essential rxns in the minModel
0314 prepData.taskStruct = taskStruct;
0315 prepData.refModel = cModel;
0316 prepData.minModel = scaledMinModel;
0317 prepData.refModelWithBM = closeModel(cModel); %do this here so we don't have to do this for each sample
0318 prepData.groupIds = groupIds;
0319 prepData.essentialMetsForTasks = essentialMetsForTasks;
0320 
0321 prepData.toIgnoreExch = toIgnoreExch;
0322 prepData.toIgnoreImportRxns = toIgnoreImportRxns;
0323 prepData.toIgnoreSimpleTransp = toIgnoreSimpleTransp;
0324 prepData.toIgnoreAdvTransp = toIgnoreAdvTransp;
0325 prepData.toIgnoreSpont = toIgnoreSpont;
0326 prepData.toIgnoreS = toIgnoreS;
0327 prepData.toIgnoreCustomRxns = ismember(minModel2.rxns, customRxnsToIgnore);
0328 prepData.toIgnoreAllWithoutGPRs = toIgnoreAllWithoutGPRs;
0329 
0330 end
prepINITModel

PURPOSE

SYNOPSIS

DESCRIPTION

CROSS-REFERENCE INFORMATION

SOURCE CODE