0001 function compStruct = compareMultipleModels(models,printResults,plotResults,groupVector,funcCompare,taskFile)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045 if ~(exist('mdscale.m','file') && exist('pdist.m','file') && exist('squareform.m','file') && exist('tsne.m','file'))
0046 error('The MATLAB Statistics and Machine Learning Toolbox is required for this function')
0047 end
0048
0049
0050 if nargin < 2 || isempty(printResults)
0051 printResults=false;
0052 end
0053 if nargin < 3 || isempty(plotResults)
0054 plotResults=false;
0055 end
0056 if nargin < 4
0057 groupVector = [];
0058 elseif ~isnumeric(groupVector)
0059
0060 [groupNames,~,groupVector] = unique(groupVector);
0061 else
0062
0063 groupNames = arrayfun(@num2str,unique(groupVector),'UniformOutput',false);
0064 end
0065 if nargin < 5 || isempty(funcCompare)
0066 funcCompare = false;
0067 end
0068 if nargin < 6
0069 taskFile = [];
0070 else
0071 taskFile=char(taskFile);
0072 end
0073 if numel(models) <= 1
0074 EM = 'Cannot compare only one model. Use printModelStats if you want a summary of a model';
0075 dispEM(EM);
0076 end
0077 if isempty(taskFile) && funcCompare
0078 EM = 'Cannot perform the functional comparison without a task file. Specify taskFile or set funcCompare to FALSE.';
0079 dispEM(EM);
0080 end
0081
0082
0083 compStruct.modelIDs = {};
0084 fprintf('\n Getting model IDs \n')
0085 for i = 1:numel(models)
0086 if ~ischar(models{i}.id)
0087 compStruct.modelIDs{i,1} = models{i}.id{1};
0088 else
0089 compStruct.modelIDs{i,1} = models{i}.id;
0090 end
0091 end
0092 fprintf('*** Done \n\n')
0093
0094
0095
0096
0097
0098
0099 for i = 1:numel(models)
0100 cells = cellfun(@iscell,models{i}.subSystems);
0101 models{i}.subSystems(cells) = cellfun(@(s) s{1}, models{i}.subSystems(cells), 'UniformOutput', false);
0102 end
0103
0104
0105
0106
0107 field = 'subSystems';
0108 fprintf('\n Comparing subsystem utilization \n')
0109 if any(~cellfun(@(m) isfield(m,field),models))
0110 fprintf('\nWARNING: At least one model does not contain the field "subSystems". \n')
0111 fprintf(' Skipping subsystem comparison. \n\n')
0112 else
0113 [id,compMat] = compareModelField(models,field);
0114 compStruct.subsystems.ID = id;
0115 compStruct.subsystems.matrix = compMat;
0116 fprintf('*** Done \n\n')
0117
0118 if printResults
0119
0120 fprintf('*** Comparison of reaction subsystem populations:\n\n');
0121
0122 nrow = min([15,numel(compStruct.subsystems.ID)]);
0123 ncol = min([10,numel(compStruct.modelIDs)]);
0124 summaryArray = [{field}, compStruct.modelIDs(1:ncol)'];
0125 summaryArray = [summaryArray; [compStruct.subsystems.ID(1:nrow), ...
0126 arrayfun(@num2str,compStruct.subsystems.matrix(1:nrow,1:ncol),'UniformOutput',false)]];
0127
0128 charArray = [];
0129 for i = 1:size(summaryArray,2)
0130 charArray = [charArray, char(strcat(summaryArray(:,i),{' '}))];
0131 end
0132 disp(charArray);
0133 if numel(compStruct.subsystems.ID) > 15
0134 fprintf('...\n');
0135 end
0136 fprintf('\n\n');
0137 end
0138
0139 if plotResults==true
0140
0141 figure;
0142 plottingData = (compStruct.subsystems.matrix - mean(compStruct.subsystems.matrix,2))./mean(compStruct.subsystems.matrix,2);
0143 color_map = redblue(length(0:.01:2));
0144 h = genHeatMap(plottingData',compStruct.subsystems.ID,compStruct.modelIDs,'both','euclidean',color_map,[-1,1]);
0145 title('Subsystem Coverage - all subsystems','FontSize',18,'FontWeight','bold')
0146
0147
0148 keepSubs = (sum(plottingData~=0,2) ~= 0);
0149 if sum(keepSubs) > 1
0150 figure;
0151 h_small = genHeatMap(plottingData(keepSubs,:)',compStruct.subsystems.ID(keepSubs),...
0152 compStruct.modelIDs,'both','euclidean',color_map,[-1,1]);
0153 title('Subsystem Coverage','FontSize',18,'FontWeight','bold')
0154
0155
0156 figure;
0157 color_map_bw = [1 1 1;0 0 0];
0158 h_enriched = genHeatMap(plottingData(keepSubs,:)',compStruct.subsystems.ID(keepSubs),...
0159 compStruct.modelIDs,'both','euclidean',color_map_bw,[-10^-4,10^-4]);
0160 title('Subsystem Enrichment','FontSize',18,'FontWeight','bold')
0161 end
0162 end
0163
0164 end
0165
0166
0167 field = 'rxns';
0168 fprintf('\n Comparing model reaction correlations \n')
0169
0170
0171 [id,binary_matrix] = compareModelField(models,field);
0172 compStruct.reactions.IDs = id;
0173 compStruct.reactions.matrix = binary_matrix;
0174
0175
0176 compStruct.structComp = 1-squareform(pdist(binary_matrix','hamming'));
0177 fprintf('*** Done \n\n')
0178 if plotResults == true
0179 color_map = [ones(100,1) linspace(1,0,100)' linspace(1,0,100)'];
0180 figure;
0181 h = genHeatMap(compStruct.structComp,compStruct.modelIDs,compStruct.modelIDs,'both','euclidean',color_map);
0182 title('Structural Similarity','FontSize',18,'FontWeight','bold')
0183 end
0184
0185
0186 rng(42)
0187 fprintf('\n Comparing model reaction structures \n')
0188 if exist('tsne') > 0
0189 proj_coords = tsne(double(binary_matrix'),'Distance','hamming','NumDimensions',3);
0190 compStruct.structCompMap = proj_coords;
0191 axis_labels = {'tSNE 1';'tSNE 2';'tSNE 3'};
0192 else
0193
0194
0195 [proj_coords,stress,disparities] = mdscale(pdist(double(binary_matrix'),'hamming'),3);
0196 compStruct.structCompMap = proj_coords;
0197 axis_labels = {'MDS 1';'MDS 2';'MDS 3'};
0198 end
0199 fprintf('*** Done \n\n')
0200
0201
0202 if plotResults == true
0203 figure; hold on;
0204 if ~isempty(groupVector)
0205 color_data = groupVector;
0206 if length(groupNames) <= 7
0207
0208 color_palette = lines(length(groupNames));
0209 else
0210 color_palette = parula(length(groupNames));
0211 end
0212 colormap(color_palette);
0213 else
0214 color_data = 'k';
0215 end
0216 scatter3(proj_coords(:,1),proj_coords(:,2),proj_coords(:,3),35,color_data,'filled');
0217 view(135,25);
0218 xlabel(axis_labels{1}); ylabel(axis_labels{2}); zlabel(axis_labels{3});
0219 set(gca,'FontSize',14,'LineWidth',1.25);
0220 title('Structural Comparison','FontSize',18,'FontWeight','bold')
0221
0222
0223 if ~isempty(groupVector)
0224 for i = 1:length(groupNames)
0225 h(i) = scatter3([],[],[],35,color_palette(i,:),'filled');
0226 end
0227 legend(h,groupNames);
0228 end
0229 end
0230
0231
0232 if funcCompare == true && ~isempty(taskFile)
0233 fprintf('\n Checking model performance on specified tasks. \n')
0234 taskStructure=parseTaskList(taskFile);
0235 for i = 1:numel(models)
0236 fprintf('\n Checking model # %.0f \n',i)
0237 taskReport{i} = checkTasks(models{i},[],false,false,false,taskStructure);
0238 end
0239
0240
0241 taskMatrix = zeros(length(taskReport{1}.ok),numel(taskReport));
0242 for i = 1:numel(taskReport)
0243 taskMatrix(:,i) = taskReport{i}.ok;
0244 end
0245 compStruct.funcComp.matrix = taskMatrix;
0246 compStruct.funcComp.tasks = taskReport{1}.description;
0247 fprintf('*** Done \n\n')
0248
0249
0250 if plotResults == true
0251 figure;
0252 color_map_bw = [1 1 1;0 0 0];
0253 h_enriched = genHeatMap(taskMatrix,compStruct.modelIDs,...
0254 taskReport{1}.description,'both','euclidean',color_map_bw,[0,1]);
0255 title('Functional Comparison - All Tasks','FontSize',18,'FontWeight','bold')
0256
0257 figure;
0258 color_map_bw = [1 1 1;0 0 0];
0259 h_enriched = genHeatMap(taskMatrix(intersect(find(sum(taskMatrix,2)~=numel(models)),find(sum(taskMatrix,2)~=0)),:),...
0260 compStruct.modelIDs,taskReport{1}.description(intersect(find(sum(taskMatrix,2)~=numel(models)),find(sum(taskMatrix,2)~=0))),...
0261 'both','euclidean',color_map_bw,[0,1]);
0262 title('Functional Similarity','FontSize',18,'FontWeight','bold')
0263 end
0264 end
0265
0266 end
0267
0268
0269
0270 function [id,compMat] = compareModelField(models,field)
0271
0272
0273
0274
0275 hasfield = cellfun(@(m) isfield(m,field),models);
0276 id = cellfun(@(m) m.(field),models(hasfield),'UniformOutput',false);
0277 id = unique(vertcat(id{:}));
0278
0279
0280 compMat = zeros(numel(id),numel(models));
0281 for i = 1:numel(models)
0282 [~,entryIndex] = ismember(models{i}.(field),id);
0283 compMat(:,i) = histcounts(entryIndex, 0.5:1:(numel(id)+0.5));
0284 end
0285 end
0286
0287
0288 function h = genHeatMap(data,colnames,rownames,clust_dim,clust_dist,col_map,col_bounds,grid_color)
0289
0290
0291
0292
0293
0294
0295
0296
0297
0298
0299
0300
0301
0302
0303
0304
0305
0306
0307
0308
0309
0310
0311
0312
0313
0314
0315
0316
0317
0318
0319
0320
0321
0322
0323
0324
0325
0326
0327 if nargin < 4 || isempty(clust_dim)
0328 clust_dim = 'none';
0329 elseif ~ismember(clust_dim,{'none','rows','cols','both'})
0330 error('%s is not a valid CLUST_DIM option. Choose "none", "rows", "cols", or "both".',clust_dim);
0331 end
0332 if nargin < 5 || isempty(clust_dist)
0333 clust_dist = 'euclidean';
0334 end
0335 if nargin < 6 || isempty(col_map)
0336 col_map = 'hot';
0337 end
0338 if nargin < 7 || isempty(col_bounds)
0339 col_bounds = [min(data(:)),max(data(:))];
0340 end
0341 if nargin < 8
0342 grid_color = 'none';
0343 end
0344
0345
0346 linkage_method = 'average';
0347 if ismember(clust_dim,{'rows','both'})
0348 L = linkage(data,linkage_method,clust_dist);
0349 row_ind = optimalleaforder(L,pdist(data,clust_dist));
0350 else
0351 row_ind = 1:size(data,1);
0352 end
0353
0354 if ismember(clust_dim,{'cols','both'})
0355 L = linkage(data',linkage_method,clust_dist);
0356 col_ind = optimalleaforder(L,pdist(data',clust_dist));
0357 else
0358 col_ind = 1:size(data,2);
0359 end
0360
0361
0362 sortdata = data(row_ind,col_ind);
0363 sortrows = rownames(row_ind);
0364 sortcols = colnames(col_ind);
0365
0366
0367 if (length(colnames) == length(rownames)) && all(strcmp(colnames,rownames))
0368
0369 sortdata = fliplr(sortdata);
0370 sortcols = flipud(sortcols);
0371 end
0372
0373
0374 sortdata(end+1,end+1) = 0;
0375
0376
0377 a = axes;
0378 set(a,'YAxisLocation','Right','XTick',[],'YTick', (1:size(sortdata,1))+0.5,'YTickLabels',sortrows);
0379 set(a,'TickLength',[0 0],'XLim',[1 size(sortdata,2)],'YLim',[1 size(sortdata,1)]);
0380 hold on
0381
0382 h = pcolor(sortdata);
0383 set(h,'EdgeColor',grid_color);
0384 set(gca,'XTick', (1:size(sortdata,2))+0.5);
0385 set(gca,'YTick', (1:size(sortdata,1))+0.5);
0386 set(gca,'XTickLabels',sortcols,'YTickLabels',sortrows);
0387 set(gca,'XTickLabelRotation',90);
0388 colormap(col_map);
0389
0390 if ~isempty(col_bounds)
0391 caxis(col_bounds);
0392 end
0393
0394 end
0395
0396
0397 function c = redblue(m)
0398
0399
0400
0401
0402
0403
0404
0405
0406
0407
0408
0409
0410
0411 if nargin < 1, m = size(get(gcf,'colormap'),1); end
0412
0413 if (mod(m,2) == 0)
0414
0415 m1 = m*0.5;
0416 r = (0:m1-1)'/max(m1-1,1);
0417 g = r;
0418 r = [r; ones(m1,1)];
0419 g = [g; flipud(g)];
0420 b = flipud(r);
0421 else
0422
0423 m1 = floor(m*0.5);
0424 r = (0:m1-1)'/max(m1,1);
0425 g = r;
0426 r = [r; ones(m1+1,1)];
0427 g = [g; 1; flipud(g)];
0428 b = flipud(r);
0429 end
0430
0431 c = [r g b];
0432 end