0001 function compStruct = compareMultipleModels(models,printResults,plotResults,groupVector,funcCompare,taskFile)
0002 
0003 
0004 
0005 
0006 
0007 
0008 
0009 
0010 
0011 
0012 
0013 
0014 
0015 
0016 
0017 
0018 
0019 
0020 
0021 
0022 
0023 
0024 
0025 
0026 
0027 
0028 
0029 
0030 
0031 
0032 
0033 
0034 
0035 
0036 
0037 
0038 
0039 
0040 
0041 
0042 
0043 
0044 
0045 if ~(exist('mdscale.m','file') && exist('pdist.m','file') && exist('squareform.m','file') && exist('tsne.m','file'))
0046     error('The MATLAB Statistics and Machine Learning Toolbox is required for this function')
0047 end
0048 
0049 
0050 if nargin < 2 || isempty(printResults)
0051     printResults=false;
0052 end
0053 if nargin < 3 || isempty(plotResults)
0054     plotResults=false;
0055 end
0056 if nargin < 4
0057     groupVector = [];
0058 elseif ~isnumeric(groupVector)
0059     
0060     [groupNames,~,groupVector] = unique(groupVector);
0061 else
0062     
0063     groupNames = arrayfun(@num2str,unique(groupVector),'UniformOutput',false);
0064 end
0065 if nargin < 5 || isempty(funcCompare)
0066     funcCompare = false;
0067 end
0068 if nargin < 6
0069     taskFile = [];
0070 else
0071     taskFile=char(taskFile);
0072 end
0073 if numel(models) <= 1
0074     EM = 'Cannot compare only one model. Use printModelStats if you want a summary of a model';
0075     dispEM(EM);
0076 end
0077 if isempty(taskFile) && funcCompare
0078     EM = 'Cannot perform the functional comparison without a task file. Specify taskFile or set funcCompare to FALSE.';
0079     dispEM(EM);
0080 end
0081 
0082 
0083 compStruct.modelIDs = {};
0084 fprintf('\n Getting model IDs \n')    
0085 for i = 1:numel(models)
0086     if ~ischar(models{i}.id)  
0087         compStruct.modelIDs{i,1} = models{i}.id{1};
0088     else
0089         compStruct.modelIDs{i,1} = models{i}.id;
0090     end
0091 end
0092 fprintf('*** Done \n\n')
0093 
0094 
0095 
0096 
0097 
0098 
0099 for i = 1:numel(models)
0100     cells = cellfun(@iscell,models{i}.subSystems);
0101     models{i}.subSystems(cells) = cellfun(@(s) s{1}, models{i}.subSystems(cells), 'UniformOutput', false);
0102 end
0103 
0104 
0105 
0106 
0107 field = 'subSystems';
0108 fprintf('\n Comparing subsystem utilization \n')
0109 if any(~cellfun(@(m) isfield(m,field),models))
0110     fprintf('\nWARNING: At least one model does not contain the field "subSystems". \n')
0111     fprintf('         Skipping subsystem comparison. \n\n')
0112 else
0113     [id,compMat] = compareModelField(models,field);
0114     compStruct.subsystems.ID = id;
0115     compStruct.subsystems.matrix = compMat;
0116     fprintf('*** Done \n\n')
0117     
0118     if printResults
0119         
0120         fprintf('*** Comparison of reaction subsystem populations:\n\n');
0121         
0122         nrow = min([15,numel(compStruct.subsystems.ID)]);
0123         ncol = min([10,numel(compStruct.modelIDs)]);
0124         summaryArray = [{field}, compStruct.modelIDs(1:ncol)'];
0125         summaryArray = [summaryArray; [compStruct.subsystems.ID(1:nrow), ...
0126             arrayfun(@num2str,compStruct.subsystems.matrix(1:nrow,1:ncol),'UniformOutput',false)]];
0127         
0128         charArray = [];
0129         for i = 1:size(summaryArray,2)
0130             charArray = [charArray, char(strcat(summaryArray(:,i),{'   '}))];
0131         end
0132         disp(charArray);
0133         if numel(compStruct.subsystems.ID) > 15
0134             fprintf('...\n');
0135         end
0136         fprintf('\n\n');
0137     end
0138     
0139     if plotResults==true
0140         
0141         figure;
0142         plottingData = (compStruct.subsystems.matrix - mean(compStruct.subsystems.matrix,2))./mean(compStruct.subsystems.matrix,2);
0143         color_map = redblue(length(0:.01:2));
0144         h = genHeatMap(plottingData',compStruct.subsystems.ID,compStruct.modelIDs,'both','euclidean',color_map,[-1,1]);
0145         title('Subsystem Coverage - all subsystems','FontSize',18,'FontWeight','bold')
0146         
0147         
0148         keepSubs = (sum(plottingData~=0,2) ~= 0);
0149         if sum(keepSubs) > 1
0150             figure;
0151             h_small = genHeatMap(plottingData(keepSubs,:)',compStruct.subsystems.ID(keepSubs),...
0152                 compStruct.modelIDs,'both','euclidean',color_map,[-1,1]);
0153             title('Subsystem Coverage','FontSize',18,'FontWeight','bold')
0154             
0155             
0156             figure;
0157             color_map_bw = [1 1 1;0 0 0];
0158             h_enriched = genHeatMap(plottingData(keepSubs,:)',compStruct.subsystems.ID(keepSubs),...
0159                 compStruct.modelIDs,'both','euclidean',color_map_bw,[-10^-4,10^-4]);
0160             title('Subsystem Enrichment','FontSize',18,'FontWeight','bold')
0161         end
0162     end
0163     
0164 end
0165 
0166 
0167 field = 'rxns';
0168 fprintf('\n Comparing model reaction correlations \n')
0169 
0170 
0171 [id,binary_matrix] = compareModelField(models,field);
0172 compStruct.reactions.IDs = id;
0173 compStruct.reactions.matrix = binary_matrix;
0174 
0175 
0176 compStruct.structComp = 1-squareform(pdist(binary_matrix','hamming'));
0177 fprintf('*** Done \n\n')
0178 if plotResults == true
0179     color_map = [ones(100,1) linspace(1,0,100)' linspace(1,0,100)'];
0180     figure;
0181     h = genHeatMap(compStruct.structComp,compStruct.modelIDs,compStruct.modelIDs,'both','euclidean',color_map);
0182     title('Structural Similarity','FontSize',18,'FontWeight','bold')
0183 end
0184 
0185 
0186 rng(42) 
0187 fprintf('\n Comparing model reaction structures \n')
0188 if exist('tsne') > 0
0189     proj_coords = tsne(double(binary_matrix'),'Distance','hamming','NumDimensions',3); 
0190     compStruct.structCompMap = proj_coords;
0191     axis_labels = {'tSNE 1';'tSNE 2';'tSNE 3'};
0192 else 
0193      
0194      
0195     [proj_coords,stress,disparities] = mdscale(pdist(double(binary_matrix'),'hamming'),3);
0196     compStruct.structCompMap = proj_coords;
0197     axis_labels = {'MDS 1';'MDS 2';'MDS 3'};
0198 end
0199 fprintf('*** Done \n\n')
0200 
0201 
0202 if plotResults == true
0203     figure; hold on;
0204     if ~isempty(groupVector)
0205         color_data = groupVector;
0206         if length(groupNames) <= 7
0207             
0208             color_palette = lines(length(groupNames));
0209         else
0210             color_palette = parula(length(groupNames));
0211         end
0212         colormap(color_palette);
0213     else
0214         color_data = 'k';
0215     end
0216     scatter3(proj_coords(:,1),proj_coords(:,2),proj_coords(:,3),35,color_data,'filled');
0217     view(135,25);  
0218     xlabel(axis_labels{1}); ylabel(axis_labels{2}); zlabel(axis_labels{3});
0219     set(gca,'FontSize',14,'LineWidth',1.25);
0220     title('Structural Comparison','FontSize',18,'FontWeight','bold')
0221     
0222     
0223     if ~isempty(groupVector)
0224         for i = 1:length(groupNames)
0225             h(i) = scatter3([],[],[],35,color_palette(i,:),'filled');
0226         end
0227         legend(h,groupNames);
0228     end
0229 end
0230 
0231 
0232 if funcCompare == true && ~isempty(taskFile)
0233     fprintf('\n Checking model performance on specified tasks. \n')
0234     taskStructure=parseTaskList(taskFile);
0235     for i = 1:numel(models)
0236         fprintf('\n Checking model # %.0f \n',i)
0237         taskReport{i} = checkTasks(models{i},[],false,false,false,taskStructure);
0238     end    
0239     
0240     
0241     taskMatrix = zeros(length(taskReport{1}.ok),numel(taskReport));
0242         for i = 1:numel(taskReport)
0243             taskMatrix(:,i) = taskReport{i}.ok;
0244         end
0245     compStruct.funcComp.matrix = taskMatrix;
0246     compStruct.funcComp.tasks = taskReport{1}.description;
0247     fprintf('*** Done \n\n')
0248    
0249     
0250     if plotResults == true
0251         figure;
0252         color_map_bw = [1 1 1;0 0 0];
0253         h_enriched = genHeatMap(taskMatrix,compStruct.modelIDs,...
0254             taskReport{1}.description,'both','euclidean',color_map_bw,[0,1]);
0255         title('Functional Comparison - All Tasks','FontSize',18,'FontWeight','bold')
0256         
0257         figure;
0258         color_map_bw = [1 1 1;0 0 0];
0259         h_enriched = genHeatMap(taskMatrix(intersect(find(sum(taskMatrix,2)~=numel(models)),find(sum(taskMatrix,2)~=0)),:),...
0260             compStruct.modelIDs,taskReport{1}.description(intersect(find(sum(taskMatrix,2)~=numel(models)),find(sum(taskMatrix,2)~=0))),...
0261             'both','euclidean',color_map_bw,[0,1]);
0262         title('Functional Similarity','FontSize',18,'FontWeight','bold')
0263     end
0264 end
0265 
0266 end
0267 
0268 
0269 
0270 function [id,compMat] = compareModelField(models,field)
0271     
0272     
0273     
0274     
0275     hasfield = cellfun(@(m) isfield(m,field),models);
0276     id = cellfun(@(m) m.(field),models(hasfield),'UniformOutput',false);
0277     id = unique(vertcat(id{:}));
0278     
0279     
0280     compMat = zeros(numel(id),numel(models));
0281     for i = 1:numel(models)
0282         [~,entryIndex] = ismember(models{i}.(field),id);  
0283         compMat(:,i) = histcounts(entryIndex, 0.5:1:(numel(id)+0.5));  
0284     end
0285 end
0286 
0287 
0288 function h = genHeatMap(data,colnames,rownames,clust_dim,clust_dist,col_map,col_bounds,grid_color)
0289 
0290 
0291 
0292 
0293 
0294 
0295 
0296 
0297 
0298 
0299 
0300 
0301 
0302 
0303 
0304 
0305 
0306 
0307 
0308 
0309 
0310 
0311 
0312 
0313 
0314 
0315 
0316 
0317 
0318 
0319 
0320 
0321 
0322 
0323 
0324 
0325 
0326 
0327 if nargin < 4 || isempty(clust_dim)
0328     clust_dim = 'none';
0329 elseif ~ismember(clust_dim,{'none','rows','cols','both'})
0330     error('%s is not a valid CLUST_DIM option. Choose "none", "rows", "cols", or "both".',clust_dim);
0331 end
0332 if nargin < 5 || isempty(clust_dist)
0333     clust_dist = 'euclidean';
0334 end
0335 if nargin < 6 || isempty(col_map)
0336     col_map = 'hot';
0337 end
0338 if nargin < 7 || isempty(col_bounds)
0339     col_bounds = [min(data(:)),max(data(:))];
0340 end
0341 if nargin < 8
0342     grid_color = 'none';
0343 end
0344 
0345 
0346 linkage_method = 'average';
0347 if ismember(clust_dim,{'rows','both'})
0348     L = linkage(data,linkage_method,clust_dist);
0349     row_ind = optimalleaforder(L,pdist(data,clust_dist));
0350 else
0351     row_ind = 1:size(data,1);
0352 end
0353 
0354 if ismember(clust_dim,{'cols','both'})
0355     L = linkage(data',linkage_method,clust_dist);
0356     col_ind = optimalleaforder(L,pdist(data',clust_dist));
0357 else
0358     col_ind = 1:size(data,2);
0359 end
0360 
0361 
0362 sortdata = data(row_ind,col_ind);
0363 sortrows = rownames(row_ind);
0364 sortcols = colnames(col_ind);
0365 
0366 
0367 if (length(colnames) == length(rownames)) && all(strcmp(colnames,rownames))
0368     
0369     sortdata = fliplr(sortdata);
0370     sortcols = flipud(sortcols);
0371 end
0372 
0373 
0374 sortdata(end+1,end+1) = 0;
0375 
0376 
0377 a = axes;
0378 set(a,'YAxisLocation','Right','XTick',[],'YTick', (1:size(sortdata,1))+0.5,'YTickLabels',sortrows);
0379 set(a,'TickLength',[0 0],'XLim',[1 size(sortdata,2)],'YLim',[1 size(sortdata,1)]);
0380 hold on
0381 
0382 h = pcolor(sortdata);
0383 set(h,'EdgeColor',grid_color);
0384 set(gca,'XTick', (1:size(sortdata,2))+0.5);
0385 set(gca,'YTick', (1:size(sortdata,1))+0.5);
0386 set(gca,'XTickLabels',sortcols,'YTickLabels',sortrows);
0387 set(gca,'XTickLabelRotation',90);
0388 colormap(col_map);
0389 
0390 if ~isempty(col_bounds)
0391     caxis(col_bounds);
0392 end
0393 
0394 end
0395 
0396 
0397 function c = redblue(m)
0398 
0399 
0400 
0401 
0402 
0403 
0404 
0405 
0406 
0407 
0408 
0409 
0410 
0411 if nargin < 1, m = size(get(gcf,'colormap'),1); end
0412 
0413 if (mod(m,2) == 0)
0414     
0415     m1 = m*0.5;
0416     r = (0:m1-1)'/max(m1-1,1);
0417     g = r;
0418     r = [r; ones(m1,1)];
0419     g = [g; flipud(g)];
0420     b = flipud(r);
0421 else
0422     
0423     m1 = floor(m*0.5);
0424     r = (0:m1-1)'/max(m1,1);
0425     g = r;
0426     r = [r; ones(m1+1,1)];
0427     g = [g; 1; flipud(g)];
0428     b = flipud(r);
0429 end
0430 
0431 c = [r g b];
0432 end