0001 function GSS = parseScores(inputFile, predictor)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 if nargin<2
0019 predictor='wolf';
0020 else
0021 predictor=char(predictor);
0022 end
0023
0024 fid=fopen(inputFile,'r');
0025
0026 if fid<1
0027 EM='Could not open file';
0028 dispEM(EM);
0029 end
0030
0031 if strcmpi(predictor,'wolf')
0032 A=textscan(fid,'%s','Delimiter','\n','CommentStyle','#');
0033
0034
0035
0036 I=~cellfun(@any,strfind(A{1},'treating'));
0037
0038 B=regexp(A{1}(I),' ','split');
0039
0040
0041 GSS.compartments={};
0042 GSS.scores=[];
0043 GSS.genes=cell(numel(B),1);
0044
0045
0046
0047 for i=1:numel(B)
0048 b=strrep(B{i},',','');
0049 GSS.genes{i}=b{1};
0050
0051
0052
0053 for j=2:2:numel(b)-1
0054 [~, J]=ismember(b(j),GSS.compartments);
0055
0056
0057 if J==0
0058 GSS.compartments=[GSS.compartments;b(j)];
0059 J=numel(GSS.compartments);
0060 GSS.scores=[GSS.scores zeros(numel(B),1)];
0061 end
0062
0063 GSS.scores(i,J)=str2double(b(j+1));
0064 end
0065 end
0066 elseif strcmpi(predictor,'cello')
0067 fid=fopen(inputFile,'r');
0068
0069 tline = fgetl(fid);
0070 tline=regexprep(tline,'^.+#Combined:\t','');
0071 tline=regexprep(tline,'\t#Most-likely-Location.+','');
0072 GSS.compartments=transpose(regexp(tline,'\t','split'));
0073
0074
0075
0076
0077 row=0;
0078 while 1
0079 row=row+1;
0080 tline = fgetl(fid);
0081 if ~ischar(tline)
0082 break;
0083 end
0084 tline=regexprep(tline,'^.+:\t','');
0085 tline=regexprep(tline,' .+','');
0086 tline=regexp(tline,'\t','split');
0087 GSS.scores(row,:)=str2double(tline(1:numel(GSS.compartments)));
0088 GSS.genes{row,1}=tline{1,end};
0089 end
0090 elseif strcmpi(predictor,'deeploc')
0091 fid=fopen(inputFile,'r');
0092
0093 tline = fgetl(fid);
0094 GSS.compartments=regexp(tline,',','split');
0095 GSS.compartments=GSS.compartments(4:end);
0096
0097
0098
0099
0100 row=0;
0101 while 1
0102 row=row+1;
0103 tline = fgetl(fid);
0104 if ~ischar(tline)
0105 break;
0106 end
0107 tline=regexp(tline,',','split');
0108 GSS.scores(row,:)=str2double(tline(4:end));
0109 GSS.genes{row,1}=tline{1,1};
0110 end
0111 end
0112
0113
0114 [~, J, K]=unique(GSS.genes);
0115
0116 if numel(J)~=numel(K)
0117 EM='There are duplicate genes in the input file';
0118 dispEM(EM,false);
0119 GSS.genes=GSS.genes(J);
0120 GSS.scores=GSS.scores(J,:);
0121 end
0122
0123
0124 I=max(GSS.scores,[],2);
0125 GSS.scores=bsxfun(@times, GSS.scores, 1./I);
0126
0127 fclose(fid);
0128 end