Home > io > cleanSheet.m

cleanSheet

PURPOSE ^

cleanSheet

SYNOPSIS ^

function [raw,keptRows,keptCols]=cleanSheet(raw,removeComments,removeOnlyCap,removeNoCap,removeEmptyRows)

DESCRIPTION ^

 cleanSheet
   Cleans up an Excel sheet by removing empty rows/colums (and some other
   checks)

   raw             cell array with the data in the sheet
   removeComments  true if commented lines (non-empty first cell in each
                   row) should be removed (opt, default true)
   removeOnlyCap   remove columns with captions but no other values (opt,
                   default false)
   removeNoCap     remove columns without captions (opt, default true)
   removeEmptyRows remove rows with no non-empty cells (opt, default true)
   
   raw             cleaned version
   keptRows        indexes of the kept rows in the original structure
   keptCols        indexes of the kept columns in the original structure

   Usage: [raw,keptRows,keptCols]=cleanSheet(raw,removeComments,removeOnlyCap,...
               removeNoCap,removeEmptyRows)

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SUBFUNCTIONS ^

SOURCE CODE ^

0001 % cleanSheet
0002 %   Cleans up an Excel sheet by removing empty rows/colums (and some other
0003 %   checks)
0004 %
0005 %   raw             cell array with the data in the sheet
0006 %   removeComments  true if commented lines (non-empty first cell in each
0007 %                   row) should be removed (opt, default true)
0008 %   removeOnlyCap   remove columns with captions but no other values (opt,
0009 %                   default false)
0010 %   removeNoCap     remove columns without captions (opt, default true)
0011 %   removeEmptyRows remove rows with no non-empty cells (opt, default true)
0012 %
0013 %   raw             cleaned version
0014 %   keptRows        indexes of the kept rows in the original structure
0015 %   keptCols        indexes of the kept columns in the original structure
0016 %
0017 %   Usage: [raw,keptRows,keptCols]=cleanSheet(raw,removeComments,removeOnlyCap,...
0018 %               removeNoCap,removeEmptyRows)
0019 
0020 function [raw,keptRows,keptCols]=cleanSheet(raw,removeComments,removeOnlyCap,removeNoCap,removeEmptyRows)
0021 if nargin<2
0022     removeComments=true;
0023 end
0024 if nargin<3
0025     removeOnlyCap=false;
0026 end
0027 if nargin<4
0028     removeNoCap=true;
0029 end
0030 if nargin<5
0031     removeEmptyRows=true;
0032 end
0033 
0034 keptRows=1:size(raw,1);
0035 keptRows=keptRows(:);
0036 keptCols=1:size(raw,2);
0037 keptCols=keptCols(:);
0038 
0039 %First check that it's a cell array. If a sheet is completely empty, then
0040 %raw=NaN
0041 if iscell(raw)
0042     %Clear cells which contain only white spaces or NaN. This could happen
0043     %if you accidentally inserted a space for example. I don't know how NaN
0044     %could occur after switching to Apache POI, but I clear it to be sure
0045     whites=cellfun(@wrapperWS,raw);
0046     raw(whites)={[]};
0047     
0048     %Find the rows that are not commented. This corresponds to the first
0049     %row and the ones which are empty in the first column
0050     if removeComments==true
0051         keepers=cellfun(@isempty,raw(:,1));
0052         keepers(1)=true;
0053         raw=raw(keepers,:);
0054         keptRows=keptRows(keepers);
0055     end
0056     
0057     %Remove columns that don't have string headers. If you cut and paste a
0058     %lot in the sheet there tends to be columns that are empty
0059     if removeNoCap==true
0060         I=cellfun(@isstr,raw(1,:));
0061         raw=raw(:,I);
0062         keptCols=keptCols(I);
0063     end
0064     
0065     %Remove columns which are empty except for header
0066     if removeOnlyCap==true
0067         I=~all(cellfun(@isempty,raw(2:end,:)),1);
0068         raw=raw(:,I);
0069         keptCols=keptCols(I);
0070     end
0071     
0072     %Check if there are any rows that are all empty. This could happen if
0073     %it reads too far or if the user has inserted them for layout reasons.
0074     %Remove any such rows
0075     if removeEmptyRows==true
0076         I=~all(cellfun(@isempty,raw),2);
0077         raw=raw(I,:);
0078         keptRows=keptRows(I);
0079     end
0080 else
0081     raw={[]};
0082 end
0083 
0084 %Checks if something is all white spaces or NaN
0085     function I=wrapperWS(A)
0086         if isnan(A)
0087             I=true;
0088         else
0089             %isstrprop gives an error if boolean
0090             if islogical(A)
0091                 I=false;
0092             else
0093                 %I have to make this check, since otherwise it will pick up
0094                 %on doubles for which the ASCII representation is a white
0095                 %space character
0096                 if isnumeric(A)
0097                     I=false;
0098                 else
0099                     I=all(isstrprop(A,'wspace'));
0100                 end
0101             end
0102         end
0103     end
0104 end

Generated by m2html © 2005