0001 function [rxnScores, geneScores, hpaScores, arrayScores] = scoreComplexModel(model,hpaData,arrayData,tissue,celltype,noGeneScore,isozymeScoring,complexScoring,multipleCellScoring,hpaLevelScores)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067 if nargin < 5
0068 celltype = [];
0069 end
0070 if nargin < 6 || isempty(noGeneScore)
0071 noGeneScore = -2;
0072 end
0073 if nargin < 7 || isempty(isozymeScoring)
0074 isozymeScoring = 'max';
0075 end
0076 if nargin < 8 || isempty(complexScoring)
0077 complexScoring = 'min';
0078 end
0079 if nargin < 9 || isempty(multipleCellScoring)
0080 multipleCellScoring = 'max';
0081 end
0082 if nargin < 10
0083
0084 hpaLevelScores.names = {'High' 'Medium' 'Low' 'None' 'Strong' 'Moderate' 'Weak' 'Negative' 'Not detected'};
0085 hpaLevelScores.scores = [20 15 10 -8 20 15 10 -8 -8];
0086 end
0087
0088 if isempty(hpaData) && isempty(arrayData)
0089 EM = 'Must supply hpaData, arrayData or both';
0090 dispEM(EM);
0091 end
0092 if ~ismember(lower(isozymeScoring),{'min','max','median','average'})
0093 EM = 'Valid options for isozymeScoring are "min", "max", "median", or "average"';
0094 dispEM(EM);
0095 end
0096 if ~ismember(lower(complexScoring),{'min','max','median','average'})
0097 EM = 'Valid options for complexScoring are "min", "max", "median", or "average"';
0098 dispEM(EM);
0099 end
0100 if ~ismember(lower(multipleCellScoring),{'max','average'})
0101 EM = 'Valid options for multipleCellScoring are "max" or "average"';
0102 dispEM(EM);
0103 end
0104
0105
0106
0107
0108 if ~isempty(arrayData)
0109 if numel(unique(arrayData.tissues)) < 2
0110 if ~isfield(arrayData,'threshold') || isempty(arrayData.threshold)
0111 EM = 'arrayData must contain measurements for at least two celltypes/tissues since the score is calculated based on the expression level compared to the overall average';
0112 dispEM(EM);
0113 end
0114 end
0115 end
0116
0117
0118 if isfield(arrayData,'threshold') && (numel(arrayData.threshold) == 1)
0119
0120
0121 arrayData.threshold = arrayData.threshold*ones(size(arrayData.genes));
0122 end
0123
0124
0125 if isempty(arrayData)
0126 arrayData.genes={};
0127 arrayData.tissues={};
0128 arrayData.celltypes={};
0129 arrayData.levels=[];
0130 arrayData.threshold=[];
0131 end
0132 if isempty(hpaData)
0133 hpaData.genes={};
0134 hpaData.tissues={};
0135 hpaData.celltypes={};
0136 hpaData.levels={};
0137 hpaData.types={};
0138 hpaData.reliabilities={};
0139 hpaData.gene2Level=[];
0140 hpaData.gene2Type=[];
0141 hpaData.gene2Reliability=[];
0142 end
0143
0144
0145 if ~ismember(upper(tissue),upper(hpaData.tissues)) && ~ismember(upper(tissue),upper(arrayData.tissues))
0146 EM = 'The tissue name does not match';
0147 dispEM(EM);
0148 end
0149 if any(celltype)
0150
0151 if ~isfield(hpaData,'celltypes') || ~isfield(arrayData,'celltypes')
0152 EM = 'Both hpaData and arrayData must contain cell type information if cell type is to be used';
0153 dispEM(EM);
0154 end
0155 if ~ismember(upper(celltype),upper(hpaData.celltypes)) && ~ismember(upper(celltype),upper(arrayData.celltypes))
0156 EM = 'The cell type name does not match';
0157 dispEM(EM);
0158 end
0159 end
0160
0161
0162
0163 J = ~strcmpi(hpaData.tissues,tissue);
0164
0165
0166 if any(celltype)
0167 J = J | ~strcmpi(hpaData.celltypes,celltype);
0168 end
0169
0170 hpaData.tissues(J)=[];
0171 if isfield(hpaData,'celltypes')
0172 hpaData.celltypes(J)=[];
0173 end
0174 if isfield(hpaData,'gene2Level')
0175 hpaData.gene2Level(:,J)=[];
0176 end
0177 if isfield(hpaData,'gene2Type')
0178 hpaData.gene2Type(:,J)=[];
0179 end
0180 if isfield(hpaData,'gene2Reliability')
0181 hpaData.gene2Reliability(:,J)=[];
0182 end
0183
0184
0185
0186 if ~isempty(hpaData.genes)
0187 I = ~ismember(hpaData.genes,model.genes) | sum(hpaData.gene2Level,2) == 0;
0188 else
0189 I = [];
0190 end
0191 hpaData.genes(I) = [];
0192 if isfield(hpaData,'gene2Level')
0193 hpaData.gene2Level(I,:) = [];
0194 end
0195 if isfield(hpaData,'gene2Type')
0196 hpaData.gene2Type(I,:) = [];
0197 end
0198 if isfield(hpaData,'gene2Reliability')
0199 hpaData.gene2Reliability(I,:) = [];
0200 end
0201
0202 I = strcmpi(arrayData.tissues,tissue);
0203
0204 if any(celltype)
0205 I = I & strcmpi(arrayData.celltypes,celltype);
0206 end
0207
0208
0209
0210 J = ~ismember(arrayData.genes,model.genes) | all(isnan(arrayData.levels(:,I)),2);
0211 arrayData.genes(J) = [];
0212 arrayData.levels(J,:) = [];
0213 if isfield(arrayData,'threshold')
0214 arrayData.threshold(J) = [];
0215 end
0216
0217
0218
0219
0220
0221
0222
0223
0224
0225
0226 tempArrayLevels = arrayData.levels;
0227 tempArrayLevels(isnan(tempArrayLevels)) = 0;
0228 if isfield(arrayData,'threshold') && ~isempty(arrayData.threshold)
0229
0230
0231 average = arrayData.threshold;
0232 else
0233 average = sum(tempArrayLevels,2)./sum(~isnan(arrayData.levels),2);
0234 end
0235 if strcmpi(multipleCellScoring,'max')
0236 current = max(tempArrayLevels(:,I),[],2);
0237 else
0238 current = sum(tempArrayLevels(:,I),2)./sum(~isnan(arrayData.levels(:,I)),2);
0239 end
0240 if ~isempty(current)
0241 aScores = 5*log(current./average);
0242 else
0243 aScores = [];
0244 end
0245 aScores(aScores > 0) = min(aScores(aScores > 0),10);
0246 aScores(aScores < 0) = max(aScores(aScores < 0),-5);
0247 aScores(isnan(aScores)) = -5;
0248
0249
0250 [I, J] = ismember(upper(hpaData.levels),upper(hpaLevelScores.names));
0251 if ~all(I)
0252 EM = 'There are expression level categories that do not match to hpaLevelScores';
0253 dispEM(EM);
0254 end
0255 [K, L, M] = find(hpaData.gene2Level);
0256 scores = hpaLevelScores.scores(J);
0257 if strcmpi(multipleCellScoring,'max')
0258 hScores = max(sparse(K,L,scores(M),numel(hpaData.genes),numel(hpaData.tissues)),[],2);
0259 else
0260 hScores = mean(sparse(K,L,scores(M),numel(hpaData.genes),numel(hpaData.tissues)),2);
0261 end
0262
0263
0264 geneScores = nan(numel(model.genes),1);
0265 hpaScores = -Inf(numel(model.genes),1);
0266 arrayScores = -Inf(numel(model.genes),1);
0267
0268 [I, J] = ismember(model.genes,arrayData.genes);
0269 geneScores(I) = aScores(J(I));
0270 arrayScores(I) = aScores(J(I));
0271
0272 [I, J] = ismember(model.genes,hpaData.genes);
0273 geneScores(I) = hScores(J(I));
0274 hpaScores(I) = hScores(J(I));
0275
0276
0277
0278 [uRules,~,rule_ind] = unique(model.grRules);
0279 uScores = nan(size(uRules));
0280
0281
0282 uRules = regexprep(uRules,' and ',' & ');
0283 uRules = regexprep(uRules,' or ',' | ');
0284
0285
0286 for i = 1:numel(uRules)
0287 if isempty(uRules{i})
0288 uScores(i) = noGeneScore;
0289 elseif contains(uRules{i},'&') && contains(uRules{i},'|')
0290 uScores(i) = scoreComplexRule(uRules{i},model.genes,geneScores,isozymeScoring,complexScoring);
0291 else
0292 uScores(i) = scoreSimpleRule(uRules{i},model.genes,geneScores,isozymeScoring,complexScoring);
0293 end
0294 end
0295
0296
0297 uScores(isnan(uScores)) = noGeneScore;
0298
0299
0300 rxnScores = uScores(rule_ind);
0301
0302 end
0303
0304
0305
0306 function rScore = scoreSimpleRule(rule,genes,gScores,isozymeScoring,complexScoring)
0307
0308
0309 if ~contains(rule,'&')
0310 scoreMethod = isozymeScoring;
0311 elseif ~contains(rule,'|')
0312 scoreMethod = complexScoring;
0313 else
0314 error('This function cannot handle complex gene rules.');
0315 end
0316 ruleGenes = unique(regexp(rule,'[^&|\(\) ]+','match'));
0317 [~,geneInd] = ismember(ruleGenes,genes);
0318 switch lower(scoreMethod)
0319 case 'min'
0320 rScore = min(gScores(geneInd),[],'omitnan');
0321 case 'max'
0322 rScore = max(gScores(geneInd),[],'omitnan');
0323 case 'median'
0324 rScore = median(gScores(geneInd),'omitnan');
0325 case 'average'
0326 rScore = mean(gScores(geneInd),'omitnan');
0327 end
0328
0329 end
0330
0331
0332
0333 function rScore = scoreComplexRule(rule,genes,gScores,isozymeScoring,complexScoring)
0334
0335
0336
0337
0338 search_phrases = {'\([^&|\(\) ]+( & [^&|\(\) ]+)+\)', '\([^&|\(\) ]+( \| [^&|\(\) ]+)+\)'};
0339
0340
0341 subsets = {};
0342 c = 1;
0343 r_orig = rule;
0344 for k = 1:100
0345 for j = 1:length(search_phrases)
0346 new_subset = regexp(rule,search_phrases{j},'match')';
0347 if ~isempty(new_subset)
0348 subsets = [subsets; new_subset];
0349 subset_nums = arrayfun(@num2str,(c:length(subsets))','UniformOutput',false);
0350 rule = regexprep(rule,search_phrases{j},strcat('#',subset_nums,'#'),'once');
0351 c = c + length(new_subset);
0352 end
0353 end
0354 if isequal(rule,r_orig)
0355 break;
0356 else
0357 r_orig = rule;
0358 end
0359 end
0360 subsets{end+1} = rule;
0361
0362
0363 for i = 1:numel(subsets)
0364 gScores = [gScores; scoreSimpleRule(subsets{i},genes,gScores,isozymeScoring,complexScoring)];
0365 genes = [genes; {strcat('#',num2str(i),'#')}];
0366 end
0367
0368
0369 rScore = gScores(end);
0370
0371 end
0372