Home

Perl Scripts to Extract Rows and Columns from Many Excel Files

I had to aggregate rows and columns from about 100 MS Excel spreadsheet files from different sheets in the spreadsheets, they were reports filed by a hundred different offices, all the same format. Each extract was in a tab delimited format to throw into another spreadsheet. Why? Well it is a cheap report and analysis tool, everyone knows Excel.

The code is quick, dirty and ugly to boot. But it worked, in a couple hours of scripting I saved some people many many hours of screwing with the 100 spreadsheets.

# Get rows from many Excel spreadsheets in a directory
###################################
#! /usr/local/bin/perl -w
    use strict;
    use Spreadsheet::ParseExcel::Simple;


my $excel_directory = 'Budget';
my $out_directory = 'xxxout';
opendir(EXCELDIR, $excel_directory) || die ("no excel directory");
my @excelfiles = readdir(EXCELDIR);
closedir(EXCELDIR);

chdir($excel_directory);
       my $LPHname;    # String to hold Local Public Health Name.
       my @sheetarray; # Array to hold the row.
       my $sheetcount; # Array element in the row.
       my $sheetname; # Name of the Excel spreadsheet.
       my $sheettemp;  # Temporary string to hold row for join.
       my $cellnumber;  # Cell number in the row.
       my $cellwanted;  # Cell number in the row.
       my $rowwanted;  # Row number wanted.
       my $county_namecell;  # Cell for county name.
       my $county_namerow;  # Row for county name.
foreach my $exxfilename (@excelfiles){
    if ($exxfilename =~ /^\.+.*/) { next; }
    my $xls = Spreadsheet::ParseExcel::Simple->read($exxfilename);
    foreach my $sheet ($xls->sheets) {
       $sheetname= $sheet->{sheet}->{Name}; # Sheet Name


       if ($sheetname !~ '2007 Budget') { next; }
       $sheetcount=0;
       $county_namecell=11;
       $county_namerow=1;
#       $cellwanted=4;
       $rowwanted=11;

       while ($sheet->has_data) {
            my @data = $sheet->next_row;
            $sheetcount++;
         if ($sheetcount==$county_namerow){
            $cellnumber=0;
            foreach my $ttcell (@data) {
                $cellnumber++;
                if ($cellnumber != $county_namecell ){next;};
                 $sheettemp=$sheetarray[$sheetcount];
#                 $sheetarray[$sheetcount]=join("\t",$sheettemp,$ttcell);
                 $LPHname=$ttcell;
            }
          } # Get the name of the Local Health

#     if (($sheetcount < ($rowwanted-1)) || ($sheetcount > ($rowwanted+7))){next
;}
            if ($sheetcount != $rowwanted){next;};
            $cellnumber=0;
     $sheetarray[$sheetcount]=join("\t",$sheettemp,$LPHname);
            foreach my $ttcell (@data) {
                $cellnumber++;
#                if ($cellnumber != $cellwanted ){next;};
                 $sheettemp=$sheetarray[$sheetcount];
                 $sheetarray[$sheetcount]=join("\t",$sheettemp,$ttcell);
            }
       }
    }
    foreach my $sheetline (@sheetarray){
        print $sheetline,"\n";
    }
}
#    foreach my $sheetline (@sheetarray){
#        print $sheetline,"\n";
#    }
exit


###############################################################
# Column extract.
# Get columns from many Excel spreadsheets in a directory
###############################################################

#! /usr/local/bin/perl -w
    use strict;
    use Spreadsheet::ParseExcel::Simple;


my $excel_directory = 'TEST';
opendir(EXCELDIR, $excel_directory) || die ("no excel directory");
my @excelfiles = readdir(EXCELDIR);
closedir(EXCELDIR);

chdir($excel_directory);
       my @sheetarray; # Array to hold the row.
       my $sheetcount; # Array element in the row.
       my $sheetname; # Name of the Excel spreadsheet.
       my $sheettemp;  # Temporary string to hold row for join.
       my $cellnumber;  # cell number in the row.
       my $cellwanted;  # cell number in the row.
       my $rowwanted;  # row number wanted.
       my $county_namecell;  # cell for county name.
       my $county_namerow;  # row for county name.
foreach my $exxfilename (@excelfiles){
    if ($exxfilename =~ /^\.+.*/) { next; }
    my $xls = Spreadsheet::ParseExcel::Simple->read($exxfilename);
    foreach my $sheet ($xls->sheets) {
       $sheetname= $sheet->{sheet}->{Name};


# name the sheet to take stuff out of.
       if ($sheetname !~ '2007 Budget') { next; }
       $sheetcount=0;
$county_namecell=11;
$county_namerow=1;
       $cellwanted=2;
       $rowwanted=5;

       while ($sheet->has_data) {
            my @data = $sheet->next_row;
            $sheetcount++;
if ($sheetcount==$county_namerow){
            $cellnumber=0;
            foreach my $ttcell (@data) {
                $cellnumber++;
                if ($cellnumber != $county_namecell ){next;};
                 $sheettemp=$sheetarray[$sheetcount];
                 $sheetarray[$sheetcount]=join("\t",$sheettemp,$ttcell);
            }
}
     if (($sheetcount < ($rowwanted)) || ($sheetcount > ($rowwanted+5))){next;}
#column boundary starting from rowwanted and getting cellwanted column.
#            if ($sheetcount != $rowwanted){next;};
            $cellnumber=0;
            foreach my $ttcell (@data) {
                $cellnumber++;
                if ($cellnumber != $cellwanted ){next;};
                 $sheettemp=$sheetarray[$sheetcount];
                 $sheetarray[$sheetcount]=join("\t",$sheettemp,$ttcell);
            }
       }
    }
}
    foreach my $sheetline (@sheetarray){
        print $sheetline,"\n";
    }
exit


The views and opinions expressed in this page are strictly those of the page author.
The contents of this page have not been reviewed or approved by the University of Minnesota.