0001 function [rxnScores, geneScores, hpaScores, arrayScores]=scoreModel(model,hpaData,arrayData,tissue,celltype,noGeneScore,multipleGeneScoring,multipleCellScoring,hpaLevelScores)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059 if nargin<3
0060 arrayData=[];
0061 end
0062 if nargin<5
0063 celltype=[];
0064 else
0065 celltype=char(celltype);
0066 end
0067 if nargin<6
0068 noGeneScore=-2;
0069 end
0070 if nargin<7
0071 multipleGeneScoring='best';
0072 else
0073 multipleGeneScoring=char(multipleGeneScoring);
0074 end
0075 if nargin<8
0076 multipleCellScoring='best';
0077 else
0078 multipleCellScoring=char(multipleCellScoring);
0079 end
0080 if nargin<9
0081
0082 hpaLevelScores.names={'High' 'Medium' 'Low' 'None' 'Strong' 'Moderate' 'Weak' 'Negative' 'Not detected'};
0083 hpaLevelScores.scores=[20 15 10 -8 20 15 10 -8 -8];
0084 end
0085
0086 if isempty(hpaData) && isempty(arrayData)
0087 EM='Must supply hpaData, arrayData or both';
0088 dispEM(EM);
0089 end
0090 if ~strcmpi(multipleGeneScoring,'best') && ~strcmpi(multipleGeneScoring,'average')
0091 EM='Valid options for multipleGeneScoring are "best" or "average"';
0092 dispEM(EM);
0093 end
0094 if ~strcmpi(multipleCellScoring,'best') && ~strcmpi(multipleCellScoring,'average')
0095 EM='Valid options for multipleCellScoring are "best" or "average"';
0096 dispEM(EM);
0097 end
0098
0099
0100
0101
0102 if ~isempty(arrayData)
0103 if numel(unique(arrayData.tissues))<2
0104 if ~isfield(arrayData,'threshold') || isempty(arrayData.threshold)
0105 EM='arrayData must contain measurements for at least two celltypes/tissues since the score is calculated based on the expression level compared to the overall average';
0106 dispEM(EM);
0107 end
0108 end
0109 end
0110
0111
0112 if isfield(arrayData,'threshold') && (numel(arrayData.threshold) == 1)
0113
0114
0115 arrayData.threshold = arrayData.threshold*ones(size(arrayData.genes));
0116 end
0117
0118
0119
0120 if isempty(arrayData)
0121 arrayData.genes={};
0122 arrayData.tissues={};
0123 arrayData.celltypes={};
0124 arrayData.levels=[];
0125 arrayData.threshold=[];
0126 end
0127 if isempty(hpaData)
0128 hpaData.genes={};
0129 hpaData.tissues={};
0130 hpaData.celltypes={};
0131 hpaData.levels={};
0132 hpaData.types={};
0133 hpaData.reliabilities={};
0134 hpaData.gene2Level=[];
0135 hpaData.gene2Type=[];
0136 hpaData.gene2Reliability=[];
0137 end
0138
0139
0140 if ~ismember(upper(tissue),upper(hpaData.tissues)) && ~ismember(upper(tissue),upper(arrayData.tissues))
0141 EM='The tissue name does not match';
0142 dispEM(EM);
0143 end
0144 if any(celltype)
0145
0146 if ~isfield(hpaData,'celltypes') || ~isfield(arrayData,'celltypes')
0147 EM='Both hpaData and arrayData must contain cell type information if cell type is to be used';
0148 dispEM(EM);
0149 end
0150 if ~ismember(upper(celltype),upper(hpaData.celltypes)) && ~ismember(upper(celltype),upper(arrayData.celltypes))
0151 EM='The cell type name does not match';
0152 dispEM(EM);
0153 end
0154 end
0155
0156
0157
0158 J=~strcmpi(hpaData.tissues,tissue);
0159
0160
0161 if any(celltype)
0162 J=J | ~strcmpi(hpaData.celltypes,celltype);
0163 end
0164
0165 hpaData.tissues(J)=[];
0166 if isfield(hpaData,'celltypes')
0167 hpaData.celltypes(J)=[];
0168 end
0169 if isfield(hpaData,'gene2Level')
0170 hpaData.gene2Level(:,J)=[];
0171 end
0172 if isfield(hpaData,'gene2Type')
0173 hpaData.gene2Type(:,J)=[];
0174 end
0175 if isfield(hpaData,'gene2Reliability')
0176 hpaData.gene2Reliability(:,J)=[];
0177 end
0178
0179
0180
0181 if ~isempty(hpaData.genes)
0182 I=~ismember(hpaData.genes,model.genes) | sum(hpaData.gene2Level,2)==0;
0183 else
0184 I=[];
0185 end
0186 hpaData.genes(I)=[];
0187 if isfield(hpaData,'gene2Level')
0188 hpaData.gene2Level(I,:)=[];
0189 end
0190 if isfield(hpaData,'gene2Type')
0191 hpaData.gene2Type(I,:)=[];
0192 end
0193 if isfield(hpaData,'gene2Reliability')
0194 hpaData.gene2Reliability(I,:)=[];
0195 end
0196
0197 I=strcmpi(arrayData.tissues,tissue);
0198
0199 if any(celltype)
0200 I=I & strcmpi(arrayData.celltypes,celltype);
0201 end
0202
0203
0204
0205 J=~ismember(arrayData.genes,model.genes) | myAll(isnan(arrayData.levels(:,I)),2);
0206 arrayData.genes(J)=[];
0207 arrayData.levels(J,:)=[];
0208 if isfield(arrayData,'threshold')
0209 arrayData.threshold(J) = [];
0210 end
0211
0212
0213
0214
0215
0216
0217
0218
0219
0220
0221 tempArrayLevels=arrayData.levels;
0222 tempArrayLevels(isnan(tempArrayLevels))=0;
0223 if isfield(arrayData,'threshold') && ~isempty(arrayData.threshold)
0224
0225
0226
0227 average=arrayData.threshold;
0228 else
0229 average=sum(tempArrayLevels,2)./sum(~isnan(arrayData.levels),2);
0230 end
0231 if strcmpi(multipleCellScoring,'best')
0232 current=max(tempArrayLevels(:,I),[],2);
0233 else
0234 current=sum(tempArrayLevels(:,I),2)./sum(~isnan(arrayData.levels(:,I)),2);
0235 end
0236 if ~isempty(current)
0237 aScores=5*log(current./average);
0238 else
0239 aScores=[];
0240 end
0241 aScores(aScores>0)=min(aScores(aScores>0),10);
0242 aScores(aScores<0)=max(aScores(aScores<0),-5);
0243 aScores(isnan(aScores)) = -5;
0244
0245
0246 [I, J]=ismember(upper(hpaData.levels),upper(hpaLevelScores.names));
0247 if ~all(I)
0248 EM='There are expression level categories that do not match to hpaLevelScores';
0249 dispEM(EM);
0250 end
0251 [K, L, M]=find(hpaData.gene2Level);
0252 scores=hpaLevelScores.scores(J);
0253 if strcmpi(multipleCellScoring,'best')
0254 hScores=max(sparse(K,L,scores(M),numel(hpaData.genes),numel(hpaData.tissues)),[],2);
0255 else
0256 hScores=mean(sparse(K,L,scores(M),numel(hpaData.genes),numel(hpaData.tissues)),2);
0257 end
0258
0259
0260 geneScores=inf(numel(model.genes),1)*-1;
0261 hpaScores=geneScores;
0262 arrayScores=geneScores;
0263
0264 [I, J]=ismember(model.genes,hpaData.genes);
0265 hpaScores(I)=hScores(J(I));
0266 geneScores(I)=hScores(J(I));
0267 [I, J]=ismember(model.genes,arrayData.genes);
0268 arrayScores(I)=aScores(J(I));
0269 geneScores(I & myIsInf(geneScores))=aScores(J(I & myIsInf(geneScores)));
0270
0271
0272 I=ismember(model.genes,hpaData.genes) | ismember(model.genes,arrayData.genes);
0273 model.genes(~I)=[];
0274 model.rxnGeneMat(:,~I)=[];
0275
0276
0277 [hpaExist, hpaMap]=ismember(model.genes,hpaData.genes);
0278 [arrayExist, arrayMap]=ismember(model.genes,arrayData.genes);
0279
0280
0281 rxnScores=ones(numel(model.rxns),1)*noGeneScore;
0282
0283
0284 for i=1:numel(model.rxns)
0285
0286 I=find(model.rxnGeneMat(i,:));
0287 if any(I)
0288
0289 if any(hpaExist(I))
0290
0291 if strcmpi(multipleGeneScoring,'best')
0292 rxnScores(i)=max(hScores(hpaMap(I(hpaExist(I)))));
0293 else
0294 rxnScores(i)=mean(hScores(hpaMap(I(hpaExist(I)))));
0295 end
0296 else
0297
0298 if any(arrayExist(I))
0299
0300 if strcmpi(multipleGeneScoring,'best')
0301 rxnScores(i)=max(aScores(arrayMap(I(arrayExist(I)))));
0302 else
0303 rxnScores(i)=mean(aScores(arrayMap(I(arrayExist(I)))));
0304 end
0305 end
0306 end
0307 end
0308 end
0309 end
0310
0311
0312
0313 function y=myIsInf(x)
0314 y=isinf(x);
0315 if isempty(y)
0316 y=[];
0317 end
0318 end
0319 function y=myAll(x,dim)
0320 y=all(x,dim);
0321 if isempty(y)
0322 y=[];
0323 end
0324 end