Home > core > standardizeGrRules.m

standardizeGrRules

PURPOSE ^

standardizeGrRules

SYNOPSIS ^

function [grRules,rxnGeneMat,indexes2check] = standardizeGrRules(model,embedded)

DESCRIPTION ^

 standardizeGrRules
   Standardizes gene-rxn rules in a model according to the following
       - No overall containing brackets
       - Just enzyme complexes are enclosed into brackets
       - ' and ' & ' or ' strings are strictly set to lowercases

   A rxnGeneMat matrix consistent with the standardized grRules is created.

   model        a model structure
   embedded     true if this function is called inside of another 
                RAVEN function (optional, default false)

   grRules      [nRxns x 1] cell array with the standardized grRules
   rxnGeneMat   [nRxns x nGenes]Sparse matrix consitent with the
                standardized grRules
   
   If this function is going to be used in a model reconstruction or
   modification pipeline it is recommended to run this function just
   at the beginning of the process.

 Usage: [grRules,rxnGeneMat,indexes2check]=standardizeGrRules(model,embedded)

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SUBFUNCTIONS ^

SOURCE CODE ^

0001 function [grRules,rxnGeneMat,indexes2check] = standardizeGrRules(model,embedded)
0002 % standardizeGrRules
0003 %   Standardizes gene-rxn rules in a model according to the following
0004 %       - No overall containing brackets
0005 %       - Just enzyme complexes are enclosed into brackets
0006 %       - ' and ' & ' or ' strings are strictly set to lowercases
0007 %
0008 %   A rxnGeneMat matrix consistent with the standardized grRules is created.
0009 %
0010 %   model        a model structure
0011 %   embedded     true if this function is called inside of another
0012 %                RAVEN function (optional, default false)
0013 %
0014 %   grRules      [nRxns x 1] cell array with the standardized grRules
0015 %   rxnGeneMat   [nRxns x nGenes]Sparse matrix consitent with the
0016 %                standardized grRules
0017 %
0018 %   If this function is going to be used in a model reconstruction or
0019 %   modification pipeline it is recommended to run this function just
0020 %   at the beginning of the process.
0021 %
0022 % Usage: [grRules,rxnGeneMat,indexes2check]=standardizeGrRules(model,embedded)
0023 
0024 %Preallocate fields
0025 n          = length(model.rxns);
0026 [g,~]      = size(model.genes);
0027 rxnGeneMat = sparse(n,g);
0028 grRules    = cell(n,1);
0029 genes      = model.genes;
0030 if nargin<2
0031     embedded = false;
0032 end
0033 
0034 if isfield(model,'grRules')
0035     originalGrRules=model.grRules; 
0036     originalGrRules=grRulesPreparation(originalGrRules);
0037     %Search for potential logical errors in the grRules field
0038     indexes2check = findPotentialErrors(originalGrRules,embedded,model);
0039     
0040     for i=1:length(originalGrRules)
0041         originalSTR = originalGrRules{i};
0042         grRules{i,:}  = originalSTR;
0043         %Non-empty grRules are splitted in all their different isoenzymes
0044         genesSets   = getSimpleGeneSets(originalSTR);
0045         rxnGeneMat  = modifyRxnGeneMat(genesSets,genes,rxnGeneMat,i);
0046         %standardize the non-conflicting grRules
0047         if ~ismember(i,indexes2check)
0048             newSTR = [];
0049             if ~isempty(genesSets)
0050                 %For each simple genes set in the rule
0051                 for j=1:length(genesSets)
0052                     simpleSet  = genesSets{j};
0053                     %Enclose simpleSet in brackets
0054                     if length(genesSets)>1
0055                         if ~isempty(strfind(simpleSet,' and '))
0056                             simpleSet = horzcat('(',simpleSet,')');
0057                         end
0058                     end
0059                     %Separate genesSets in the substring (in case of
0060                     %isoenzymes)
0061                     if j<length(genesSets)
0062                         newSTR = [newSTR, simpleSet, ' or '];
0063                         %Add the last simpleSet
0064                     else
0065                         newSTR = [newSTR, simpleSet];
0066                     end
0067                 end
0068                 %Update grRule
0069                 grRules{i} = char(newSTR);
0070             end
0071         end
0072     end
0073 else
0074     error('The model does not have a grRules field.')
0075 end
0076 
0077 end
0078 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0079 %Function that gets a cell array with all the simple geneSets in a given
0080 %grRule string
0081 function genesSets = getSimpleGeneSets(originalSTR)
0082 genesSets  = [];
0083 %If gene rule is not empty split in all its different isoenzymes
0084 if ~isempty(originalSTR)
0085     originalSTR = strtrim(originalSTR);
0086     %Remove all brackets
0087     originalSTR = strrep(originalSTR,'(','');
0088     originalSTR = strrep(originalSTR,')','');
0089     %Split all the different genesSets
0090     genesSets  = transpose(strsplit(originalSTR,' or '));
0091 end
0092 end
0093 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0094 %Function that gets a cell array of simple genes sets (single genes or
0095 %enzyme complexes) associated with the i-th reaction and modifies the
0096 %correspondent row in the rxnGeneMat accordingly.
0097 function rxnGeneMat = modifyRxnGeneMat(genesSets,modelGenes,rxnGeneMat,i)
0098 
0099 if ~isempty(genesSets)
0100     for j=1:length(genesSets)
0101         simpleSet  = genesSets{j};
0102         %        rxnGeneMat = modifyRxnGeneMat(simpleSet,model.genes,rxnGeneMat,i);
0103         %Get individual genes
0104         STR   = strrep(simpleSet,') and (',' and ');
0105         genes = strsplit(STR,' ');
0106         for k=1:length(genes)
0107             if ~strcmpi(genes(k),' and ')
0108                 %Get gene index
0109                 genePos = find(strcmpi(modelGenes,genes(k)));
0110                 if ~isempty(genePos)
0111                     rxnGeneMat(i,genePos) = 1;
0112                     %else
0113                     %In this case the gene should be added to the
0114                     %genes field (and to all of its dependencies)
0115                 end
0116             end
0117         end
0118     end
0119 end
0120 end
0121 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0122 %Function that gets the model field grRules and returns the indexes of the
0123 %rules in which the pattern ") and (" is present.
0124 function indexes2check = findPotentialErrors(grRules,embedded,model)
0125 indxs_l       = find(~cellfun(@isempty,strfind(grRules,') and (')));
0126 indxs_l_L     = find(~cellfun(@isempty,strfind(grRules,') and')));
0127 indxs_l_R     = find(~cellfun(@isempty,strfind(grRules,'and (')));
0128 indexes2check = vertcat(indxs_l,indxs_l_L,indxs_l_R);
0129 indexes2check = unique(indexes2check);
0130 
0131 if ~isempty(indexes2check)
0132     
0133     if ~embedded
0134         STR = 'Potentially problematic ") AND (", ") AND" or "AND ("relat';
0135         STR = [STR,'ionships found in\n\n'];
0136         for i=1:length(indexes2check)
0137             index = indexes2check(i);
0138             STR = [STR '  - grRule #' model.rxns{index} ': ' grRules{index} '\n'];
0139         end
0140         STR = [STR,'\n This kind of relationships should only be present '];
0141         STR = [STR,'in  reactions catalysed by complexes of isoenzymes e'];
0142         STR = [STR,'.g.\n\n  - (G1 or G2) and (G3 or G4)\n\n For these c'];
0143         STR = [STR,'ases modify the grRules manually, writing all the po'];
0144         STR = [STR,'ssible combinations e.g.\n\n  - (G1 and G3) or (G1 a'];
0145         STR = [STR,'nd G4) or (G2 and G3) or (G2 and G4)\n\n For other c'];
0146         STR = [STR,'ases modify the correspondent grRules avoiding:\n\n '];
0147         STR = [STR,' 1) Overall container brackets, e.g.\n        "(G1 a'];
0148         STR = [STR,'nd G2)" should be "G1 and G2"\n\n  2) Single unit en'];
0149         STR = [STR,'zymes enclosed into brackets, e.g.\n        "(G1)" s'];
0150         STR = [STR,'hould be "G1"\n\n  3) The use of uppercases for logi'];
0151         STR = [STR,'cal operators, e.g.\n        "G1 OR G2" should be "G'];
0152         STR = [STR,'1 or G2"\n\n  4) Unbalanced brackets, e.g.\n        '];
0153         STR = [STR,'"((G1 and G2) or G3" should be "(G1 and G2) or G3"\n'];
0154         warning(sprintf(STR))
0155     end
0156 end
0157 end
0158 
0159 function grRules = grRulesPreparation(grRules)
0160 %Remove unnecessary blanks
0161 grRules=strrep(grRules,'  ',' ');
0162 grRules=strrep(grRules,'( ','(');
0163 grRules=strrep(grRules,' )',')');
0164 % Make sure that AND and OR strings are in lowercase
0165 grRules=strrep(grRules,' AND ',' and ');
0166 grRules=strrep(grRules,' OR ',' or ');
0167 end

Generated by m2html © 2005