0001 function blastStructure=getBlastFromExcel(models,blastFile,organismId)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028 if ~isfile(blastFile)
0029 error('BLAST result file %s cannot be found',string(blastFile));
0030 end
0031 organismId=char(organismId);
0032
0033 blastStructure=[];
0034
0035
0036 organisms=cell(numel(models)+1,1);
0037 organisms{1}=organismId;
0038 for i=1:numel(models)
0039 organisms{i+1}=models{i}.id;
0040 end
0041
0042
0043 [type, sheets]=xlsfinfo(blastFile);
0044
0045
0046 if ~any(regexp(type,'Excel Spreadsheet'))
0047 EM='The file is not a Microsoft Excel Spreadsheet';
0048 dispEM(EM);
0049 end
0050
0051 for i=1:numel(sheets)
0052
0053
0054 [values,dataSheet]=xlsread(blastFile,i);
0055 labels=dataSheet(1,:);
0056 if strcmpi(labels{3},'E-value') && strcmpi(labels{4},'Alignment length') ...
0057 && strcmpi(labels{5},'Identity') && strcmpi(labels{6},'Bitscore') ...
0058 && strcmpi(labels{7},'PPOS')
0059
0060 fromID=find(strcmpi(labels{1},organisms));
0061 toID=find(strcmpi(labels{2},organisms));
0062
0063
0064 if any(fromID) && any(toID) && (toID==1 || fromID==1)
0065
0066
0067
0068 emptyNames=cellfun(@isempty,dataSheet(2:end,1)) | cellfun(@isempty,dataSheet(2:end,2));
0069 if any(emptyNames)
0070 if all(emptyNames)
0071 EM=['Only empty gene names in sheet from ' organisms{fromID} ' to ' organisms{toID}];
0072 dispEM(EM);
0073 else
0074 EM=['Empty gene names in sheet from ' organisms{fromID} ' to ' organisms{toID} '. Ignoring genes with empty names'];
0075 dispEM(EM,false);
0076 end
0077 end
0078 blastStructure(numel(blastStructure)+1).toId=organisms{toID};
0079 blastStructure(numel(blastStructure)).fromId=organisms{fromID};
0080 blastStructure(numel(blastStructure)).fromGenes=dataSheet(2:end,1);
0081 blastStructure(numel(blastStructure)).toGenes=dataSheet(2:end,2);
0082 blastStructure(numel(blastStructure)).evalue=values(:,1);
0083 blastStructure(numel(blastStructure)).aligLen=values(:,2);
0084 blastStructure(numel(blastStructure)).identity=values(:,3);
0085 blastStructure(numel(blastStructure)).bitscore=values(:,4);
0086 blastStructure(numel(blastStructure)).ppos=values(:,5);
0087
0088
0089
0090
0091 I=isnan(blastStructure(end).evalue) | isnan(blastStructure(end).aligLen) | isnan(blastStructure(end).identity);
0092 blastStructure(end).fromGenes(I)=[];
0093 blastStructure(end).toGenes(I)=[];
0094 blastStructure(end).evalue(I)=[];
0095 blastStructure(end).aligLen(I)=[];
0096 blastStructure(end).identity(I)=[];
0097 blastStructure(end).bitscore(I)=[];
0098 blastStructure(end).ppos(I)=[];
0099 else
0100 if isempty(toID) || isempty(fromID)
0101 EM=['The data in sheet ' sheets{i} ' has no corresponding model. Ignoring sheet'];
0102 dispEM(EM,false);
0103 else
0104 EM=['The data in sheet ' sheets{i} ' does not involve the organism of interest. Ignoring sheet'];
0105 dispEM(EM,false);
0106 end
0107 end
0108 else
0109 EM=['The data in sheet ' sheets{i} ' is not correctly formatted. Ignoring sheet'];
0110 dispEM(EM,false);
0111 end
0112 end
0113
0114 end