Home > core > compareMultipleModels.m

compareMultipleModels

PURPOSE ^

compareMultipleModels

SYNOPSIS ^

function compStruct = compareMultipleModels(models,printResults,plotResults,groupVector,funcCompare,taskFile)

DESCRIPTION ^

 compareMultipleModels
   Compares two or more condition-specific models generated from the same
   base model using high-dimensional comparisons in the reaction-space.

   models              cell array of two or more models
   printResults        true if the results should be printed on the screen
                       (optional, default false)
   plotResults         true if the results should be plotted
                       (optional, default false)
   groupVector         numeric vector or cell array for grouping similar 
                       models, i.e. by tissue (optional, default, all models
                       ungrouped)
   funcCompare         logical, should a functional comparison be run
                       (optional,default, false)
   taskFile            string containing the name of the task file to use
                       for the functional comparison (should be an .xls or 
                       .xlsx file, required for functional comparison)

   compStruct          structure that contains the comparison results
       modelIDs        cell array of model ids
       reactions       substructure containing reaction information
           matrix          binary matrix composed of reactions (rows) in
                           each model (column). This matrix is used as the
                           input for the model comparisons.
           IDs             list of the reactions contained in the reaction
                           matrix.
       subsystems      substructure containing subsystem information
           matrix          matrix with comparison of number of rxns per
                           subsystem
           ID              vector consisting of names of all subsystems
       structComp      matrix with pairwise comparisons of model structure
                       based on (1-Hamming distance) between models
       structCompMap   matrix with 3D tSNE (or MDS) mapping of model
                       structures based on Hamming distances
       funcComp        substructure containing function comparison results
           matrix          matrix with PASS / FAIL (1 / 0) values for each
                           task
           tasks           vector containing names of all tasks

 Usage: compStruct=compareMultipleModels(models,printResults,...
                       plotResults,groupVector,funcCompare,taskFile);

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SUBFUNCTIONS ^

SOURCE CODE ^

0001 function compStruct = compareMultipleModels(models,printResults,plotResults,groupVector,funcCompare,taskFile)
0002 % compareMultipleModels
0003 %   Compares two or more condition-specific models generated from the same
0004 %   base model using high-dimensional comparisons in the reaction-space.
0005 %
0006 %   models              cell array of two or more models
0007 %   printResults        true if the results should be printed on the screen
0008 %                       (optional, default false)
0009 %   plotResults         true if the results should be plotted
0010 %                       (optional, default false)
0011 %   groupVector         numeric vector or cell array for grouping similar
0012 %                       models, i.e. by tissue (optional, default, all models
0013 %                       ungrouped)
0014 %   funcCompare         logical, should a functional comparison be run
0015 %                       (optional,default, false)
0016 %   taskFile            string containing the name of the task file to use
0017 %                       for the functional comparison (should be an .xls or
0018 %                       .xlsx file, required for functional comparison)
0019 %
0020 %   compStruct          structure that contains the comparison results
0021 %       modelIDs        cell array of model ids
0022 %       reactions       substructure containing reaction information
0023 %           matrix          binary matrix composed of reactions (rows) in
0024 %                           each model (column). This matrix is used as the
0025 %                           input for the model comparisons.
0026 %           IDs             list of the reactions contained in the reaction
0027 %                           matrix.
0028 %       subsystems      substructure containing subsystem information
0029 %           matrix          matrix with comparison of number of rxns per
0030 %                           subsystem
0031 %           ID              vector consisting of names of all subsystems
0032 %       structComp      matrix with pairwise comparisons of model structure
0033 %                       based on (1-Hamming distance) between models
0034 %       structCompMap   matrix with 3D tSNE (or MDS) mapping of model
0035 %                       structures based on Hamming distances
0036 %       funcComp        substructure containing function comparison results
0037 %           matrix          matrix with PASS / FAIL (1 / 0) values for each
0038 %                           task
0039 %           tasks           vector containing names of all tasks
0040 %
0041 % Usage: compStruct=compareMultipleModels(models,printResults,...
0042 %                       plotResults,groupVector,funcCompare,taskFile);
0043 
0044 %% Stats toolbox required
0045 if ~(exist('mdscale.m','file') && exist('pdist.m','file') && exist('squareform.m','file') && exist('tsne.m','file'))
0046     error('The MATLAB Statistics and Machine Learning Toolbox is required for this function')
0047 end
0048 
0049 %% Set up input defaults
0050 if nargin < 2 || isempty(printResults)
0051     printResults=false;
0052 end
0053 if nargin < 3 || isempty(plotResults)
0054     plotResults=false;
0055 end
0056 if nargin < 4
0057     groupVector = [];
0058 elseif ~isnumeric(groupVector)
0059     % convert strings to numeric groups
0060     [groupNames,~,groupVector] = unique(groupVector);
0061 else
0062     % generate group names for vector of numbers
0063     groupNames = arrayfun(@num2str,unique(groupVector),'UniformOutput',false);
0064 end
0065 if nargin < 5 || isempty(funcCompare)
0066     funcCompare = false;
0067 end
0068 if nargin < 6
0069     taskFile = [];
0070 else
0071     taskFile=char(taskFile);
0072 end
0073 if numel(models) <= 1
0074     EM = 'Cannot compare only one model. Use printModelStats if you want a summary of a model';
0075     dispEM(EM);
0076 end
0077 if isempty(taskFile) && funcCompare
0078     EM = 'Cannot perform the functional comparison without a task file. Specify taskFile or set funcCompare to FALSE.';
0079     dispEM(EM);
0080 end
0081 
0082 %% Set up model ID structure
0083 compStruct.modelIDs = {};
0084 fprintf('\n Getting model IDs \n')    
0085 for i = 1:numel(models)
0086     if ~ischar(models{i}.id)  % to deal with non-character IDs (cells, strings, etc)
0087         compStruct.modelIDs{i,1} = models{i}.id{1};
0088     else
0089         compStruct.modelIDs{i,1} = models{i}.id;
0090     end
0091 end
0092 fprintf('*** Done \n\n')
0093 
0094 
0095 %% Flatten models' subSystems field
0096 % Convert from cell array of cells to cell array of strings
0097 % NOTE: this function currently only recognizes one subSystem per reaction;
0098 %       additional subSystems will be ignored!
0099 for i = 1:numel(models)
0100     cells = cellfun(@iscell,models{i}.subSystems);
0101     models{i}.subSystems(cells) = cellfun(@(s) s{1}, models{i}.subSystems(cells), 'UniformOutput', false);
0102 end
0103 
0104 
0105 %% Compare models structure & function based on high-dimensional methods
0106 % Compare number of reactions in each subsystem in each model using a heatmap
0107 field = 'subSystems';
0108 fprintf('\n Comparing subsystem utilization \n')
0109 if any(~cellfun(@(m) isfield(m,field),models))
0110     fprintf('\nWARNING: At least one model does not contain the field "subSystems". \n')
0111     fprintf('         Skipping subsystem comparison. \n\n')
0112 else
0113     [id,compMat] = compareModelField(models,field);
0114     compStruct.subsystems.ID = id;
0115     compStruct.subsystems.matrix = compMat;
0116     fprintf('*** Done \n\n')
0117     
0118     if printResults
0119         % This could use some cleaning up
0120         fprintf('*** Comparison of reaction subsystem populations:\n\n');
0121         
0122         nrow = min([15,numel(compStruct.subsystems.ID)]);
0123         ncol = min([10,numel(compStruct.modelIDs)]);
0124         summaryArray = [{field}, compStruct.modelIDs(1:ncol)'];
0125         summaryArray = [summaryArray; [compStruct.subsystems.ID(1:nrow), ...
0126             arrayfun(@num2str,compStruct.subsystems.matrix(1:nrow,1:ncol),'UniformOutput',false)]];
0127         
0128         charArray = [];
0129         for i = 1:size(summaryArray,2)
0130             charArray = [charArray, char(strcat(summaryArray(:,i),{'   '}))];
0131         end
0132         disp(charArray);
0133         if numel(compStruct.subsystems.ID) > 15
0134             fprintf('...\n');
0135         end
0136         fprintf('\n\n');
0137     end
0138     
0139     if plotResults==true
0140         % Plot all subsystems
0141         figure;
0142         plottingData = (compStruct.subsystems.matrix - mean(compStruct.subsystems.matrix,2))./mean(compStruct.subsystems.matrix,2);
0143         color_map = redblue(length(0:.01:2));
0144         h = genHeatMap(plottingData',compStruct.subsystems.ID,compStruct.modelIDs,'both','euclidean',color_map,[-1,1]);
0145         title('Subsystem Coverage - all subsystems','FontSize',18,'FontWeight','bold')
0146         
0147         % Plot only subsystems with deviation from mean
0148         keepSubs = (sum(plottingData~=0,2) ~= 0);
0149         if sum(keepSubs) > 1
0150             figure;
0151             h_small = genHeatMap(plottingData(keepSubs,:)',compStruct.subsystems.ID(keepSubs),...
0152                 compStruct.modelIDs,'both','euclidean',color_map,[-1,1]);
0153             title('Subsystem Coverage','FontSize',18,'FontWeight','bold')
0154             
0155             % Plot enrichment in subsystems with deviation from mean
0156             figure;
0157             color_map_bw = [1 1 1;0 0 0];
0158             h_enriched = genHeatMap(plottingData(keepSubs,:)',compStruct.subsystems.ID(keepSubs),...
0159                 compStruct.modelIDs,'both','euclidean',color_map_bw,[-10^-4,10^-4]);
0160             title('Subsystem Enrichment','FontSize',18,'FontWeight','bold')
0161         end
0162     end
0163     
0164 end
0165 
0166 % Compare overall reaction structure across all models using a heatmap
0167 field = 'rxns';
0168 fprintf('\n Comparing model reaction correlations \n')
0169 
0170 % Create binary matrix of reactions
0171 [id,binary_matrix] = compareModelField(models,field);
0172 compStruct.reactions.IDs = id;
0173 compStruct.reactions.matrix = binary_matrix;
0174 
0175 % calculate hamming similarity
0176 compStruct.structComp = 1-squareform(pdist(binary_matrix','hamming'));
0177 fprintf('*** Done \n\n')
0178 if plotResults == true
0179     color_map = [ones(100,1) linspace(1,0,100)' linspace(1,0,100)'];
0180     figure;
0181     h = genHeatMap(compStruct.structComp,compStruct.modelIDs,compStruct.modelIDs,'both','euclidean',color_map);
0182     title('Structural Similarity','FontSize',18,'FontWeight','bold')
0183 end
0184 
0185 % Compare overall reaction structure across all models using tSNE projection
0186 rng(42) % For consistency
0187 fprintf('\n Comparing model reaction structures \n')
0188 if exist('tsne') > 0
0189     proj_coords = tsne(double(binary_matrix'),'Distance','hamming','NumDimensions',3); % 3D
0190     compStruct.structCompMap = proj_coords;
0191     axis_labels = {'tSNE 1';'tSNE 2';'tSNE 3'};
0192 else % Seems odd to use mdscale if tsne is not found, as both are
0193      % distributed with stats toolbox. If tsne is not present, then also
0194      % mdscale is missing. Anyway, will leave this for now.
0195     [proj_coords,stress,disparities] = mdscale(pdist(double(binary_matrix'),'hamming'),3);
0196     compStruct.structCompMap = proj_coords;
0197     axis_labels = {'MDS 1';'MDS 2';'MDS 3'};
0198 end
0199 fprintf('*** Done \n\n')
0200 
0201 % plot structure comparison results
0202 if plotResults == true
0203     figure; hold on;
0204     if ~isempty(groupVector)
0205         color_data = groupVector;
0206         if length(groupNames) <= 7
0207             % "lines" colormap only has 7 unique colors
0208             color_palette = lines(length(groupNames));
0209         else
0210             color_palette = parula(length(groupNames));
0211         end
0212         colormap(color_palette);
0213     else
0214         color_data = 'k';
0215     end
0216     scatter3(proj_coords(:,1),proj_coords(:,2),proj_coords(:,3),35,color_data,'filled');
0217     view(135,25);  % to make it obvious that it is a 3D plot
0218     xlabel(axis_labels{1}); ylabel(axis_labels{2}); zlabel(axis_labels{3});
0219     set(gca,'FontSize',14,'LineWidth',1.25);
0220     title('Structural Comparison','FontSize',18,'FontWeight','bold')
0221     
0222     % add legend
0223     if ~isempty(groupVector)
0224         for i = 1:length(groupNames)
0225             h(i) = scatter3([],[],[],35,color_palette(i,:),'filled');
0226         end
0227         legend(h,groupNames);
0228     end
0229 end
0230 
0231 % Compare model functions by assessing their capacity to perform tasks
0232 if funcCompare == true && ~isempty(taskFile)
0233     fprintf('\n Checking model performance on specified tasks. \n')
0234     taskStructure=parseTaskList(taskFile);
0235     for i = 1:numel(models)
0236         fprintf('\n Checking model # %.0f \n',i)
0237         taskReport{i} = checkTasks(models{i},[],false,false,false,taskStructure);
0238     end    
0239     
0240     % Save results
0241     taskMatrix = zeros(length(taskReport{1}.ok),numel(taskReport));
0242         for i = 1:numel(taskReport)
0243             taskMatrix(:,i) = taskReport{i}.ok;
0244         end
0245     compStruct.funcComp.matrix = taskMatrix;
0246     compStruct.funcComp.tasks = taskReport{1}.description;
0247     fprintf('*** Done \n\n')
0248    
0249     % Plot results
0250     if plotResults == true
0251         figure;
0252         color_map_bw = [1 1 1;0 0 0];
0253         h_enriched = genHeatMap(taskMatrix,compStruct.modelIDs,...
0254             taskReport{1}.description,'both','euclidean',color_map_bw,[0,1]);
0255         title('Functional Comparison - All Tasks','FontSize',18,'FontWeight','bold')
0256         
0257         figure;
0258         color_map_bw = [1 1 1;0 0 0];
0259         h_enriched = genHeatMap(taskMatrix(intersect(find(sum(taskMatrix,2)~=numel(models)),find(sum(taskMatrix,2)~=0)),:),...
0260             compStruct.modelIDs,taskReport{1}.description(intersect(find(sum(taskMatrix,2)~=numel(models)),find(sum(taskMatrix,2)~=0))),...
0261             'both','euclidean',color_map_bw,[0,1]);
0262         title('Functional Similarity','FontSize',18,'FontWeight','bold')
0263     end
0264 end
0265 
0266 end
0267 
0268 %% Additional Functions
0269 
0270 function [id,compMat] = compareModelField(models,field)
0271     % Generates a list of unique field entries and a matrix quantifying the
0272     % number of appearances of each field entry in each model
0273     
0274     % get unique list of field entries
0275     hasfield = cellfun(@(m) isfield(m,field),models);
0276     id = cellfun(@(m) m.(field),models(hasfield),'UniformOutput',false);
0277     id = unique(vertcat(id{:}));
0278     
0279     % assemble matrix comparing frequency of each entry in each model
0280     compMat = zeros(numel(id),numel(models));
0281     for i = 1:numel(models)
0282         [~,entryIndex] = ismember(models{i}.(field),id);  % get index of each field entry in the unique id list
0283         compMat(:,i) = histcounts(entryIndex, 0.5:1:(numel(id)+0.5));  % determine the frequency at which each index appears
0284     end
0285 end
0286 
0287 
0288 function h = genHeatMap(data,colnames,rownames,clust_dim,clust_dist,col_map,col_bounds,grid_color)
0289 %genHeatMap  Generate a heatmap for a given matrix of data.
0290 %
0291 % Usage:
0292 %
0293 %   genHeatMap(data,colnames,rownames,clust_dim,clust_dist,col_map,col_bounds,grid_color);
0294 %
0295 % Inputs:
0296 %
0297 % data        Numerical matrix.
0298 %
0299 % colnames    Cell array of data column names.
0300 %
0301 % rownames    Cell array of data row names.
0302 %
0303 % clust_dim   'none' - the data will be plotted as provided (DEFAULT)
0304 %             'rows' - cluster/rearrange the rows based on distance
0305 %             'cols' - cluster/rearrange the columns based on distance
0306 %             'both' - cluster/rearrange rows and columns based on distance
0307 %
0308 % clust_dist  Distance metric to be used for clustering, ignored if
0309 %             clust_dim is 'none'. Options are the same as those for
0310 %             distance in, e.g., PDIST ('euclidean', 'hamming', etc.).
0311 %             (DEFAULT = 'euclidean')
0312 %
0313 % col_map     Colormap, provided as string (e.g., 'parula', 'hot', 'etc.')
0314 %             or an Nx3 RGB matrix of N colors.
0315 %             (DEFAULT = 'hot')
0316 %
0317 % col_bounds  A 2-element vector with min and max values, to manually set
0318 %             the bounds of the colormap.
0319 %             (DEFAULT = min/max of data).
0320 %
0321 % grid_color  Color of the grid surrounding the heatmap cells.
0322 %             (DEFAULT = 'none')
0323 %
0324 %
0325 
0326 % handle input arguments
0327 if nargin < 4 || isempty(clust_dim)
0328     clust_dim = 'none';
0329 elseif ~ismember(clust_dim,{'none','rows','cols','both'})
0330     error('%s is not a valid CLUST_DIM option. Choose "none", "rows", "cols", or "both".',clust_dim);
0331 end
0332 if nargin < 5 || isempty(clust_dist)
0333     clust_dist = 'euclidean';
0334 end
0335 if nargin < 6 || isempty(col_map)
0336     col_map = 'hot';
0337 end
0338 if nargin < 7 || isempty(col_bounds)
0339     col_bounds = [min(data(:)),max(data(:))];
0340 end
0341 if nargin < 8
0342     grid_color = 'none';
0343 end
0344 
0345 % perform hierarchical clustering to sort rows (if specified)
0346 linkage_method = 'average';
0347 if ismember(clust_dim,{'rows','both'})
0348     L = linkage(data,linkage_method,clust_dist);
0349     row_ind = optimalleaforder(L,pdist(data,clust_dist));
0350 else
0351     row_ind = 1:size(data,1);
0352 end
0353 % perform hierarchical clustering to sort columns (if specified)
0354 if ismember(clust_dim,{'cols','both'})
0355     L = linkage(data',linkage_method,clust_dist);
0356     col_ind = optimalleaforder(L,pdist(data',clust_dist));
0357 else
0358     col_ind = 1:size(data,2);
0359 end
0360 
0361 % reorder data matrix according to clustering results
0362 sortdata = data(row_ind,col_ind);
0363 sortrows = rownames(row_ind);
0364 sortcols = colnames(col_ind);
0365 
0366 % check if data is square matrix with identical row and column names
0367 if (length(colnames) == length(rownames)) && all(strcmp(colnames,rownames))
0368     % flip data so the diagonal is from upper left to lower right
0369     sortdata = fliplr(sortdata);
0370     sortcols = flipud(sortcols);
0371 end
0372 
0373 % pad data matrix with zeros (pcolor cuts off last row and column)
0374 sortdata(end+1,end+1) = 0;
0375 
0376 % generate pcolor plot
0377 a = axes;
0378 set(a,'YAxisLocation','Right','XTick',[],'YTick', (1:size(sortdata,1))+0.5,'YTickLabels',sortrows);
0379 set(a,'TickLength',[0 0],'XLim',[1 size(sortdata,2)],'YLim',[1 size(sortdata,1)]);
0380 hold on
0381 
0382 h = pcolor(sortdata);
0383 set(h,'EdgeColor',grid_color);
0384 set(gca,'XTick', (1:size(sortdata,2))+0.5);
0385 set(gca,'YTick', (1:size(sortdata,1))+0.5);
0386 set(gca,'XTickLabels',sortcols,'YTickLabels',sortrows);
0387 set(gca,'XTickLabelRotation',90);
0388 colormap(col_map);
0389 
0390 if ~isempty(col_bounds)
0391     caxis(col_bounds);
0392 end
0393 
0394 end
0395 
0396 
0397 function c = redblue(m)
0398 %REDBLUE    Shades of red and blue color map
0399 %   REDBLUE(M), is an M-by-3 matrix that defines a colormap.
0400 %   The colors begin with bright blue, range through shades of
0401 %   blue to white, and then through shades of red to bright red.
0402 %   REDBLUE, by itself, is the same length as the current figure's
0403 %   colormap. If no figure exists, MATLAB creates one.
0404 %
0405 %   For example, to reset the colormap of the current figure:
0406 %
0407 %             colormap(redblue)
0408 %
0409 %   See also HSV, GRAY, HOT, BONE, COPPER, PINK, FLAG,
0410 %   COLORMAP, RGBPLOT.
0411 if nargin < 1, m = size(get(gcf,'colormap'),1); end
0412 
0413 if (mod(m,2) == 0)
0414     % From [0 0 1] to [1 1 1], then [1 1 1] to [1 0 0];
0415     m1 = m*0.5;
0416     r = (0:m1-1)'/max(m1-1,1);
0417     g = r;
0418     r = [r; ones(m1,1)];
0419     g = [g; flipud(g)];
0420     b = flipud(r);
0421 else
0422     % From [0 0 1] to [1 1 1] to [1 0 0];
0423     m1 = floor(m*0.5);
0424     r = (0:m1-1)'/max(m1,1);
0425     g = r;
0426     r = [r; ones(m1+1,1)];
0427     g = [g; 1; flipud(g)];
0428     b = flipud(r);
0429 end
0430 
0431 c = [r g b];
0432 end

Generated by m2html © 2005