expoweb/scripts/old_or_unused/tablizebyname-csv.pl

#!/usr/bin/perl -w
use strict;
use Getopt::Long;

# sort algorithm on png entries should be improved. but then, shouldn't they always.

((my $progname = $0) =~ s/^.*(\/|\\)//ig); # basename $0

# Parse options
my $no_verbose_progress = 0;
my $usage = 0;
GetOptions('quiet' => \$no_verbose_progress, # be quiet
           'help|?' => \$usage # help!
          );

# Print usage
if ($usage) {
	usage();
}

unless ($ARGV[0]) {
	print "Specify a CSV file name as the program's argument (e.g. Surveys.csv)\n";
	usage();
}

# Read in list of names - this seems not to be used
#open (NAMES, "< $ARGV[0]");
#my @names;
#while (<CSV>) {
#	chomp;
#	push(@names,$_);
#}

open (CSV, "< $ARGV[0]");

# Start writing table file, and write table header
open TABLE, "> surtabnam.html" or die $!;
print TABLE << "END";
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0//EN">
<!-- *** This file is auto-generated by $progname - edit surveys.csv instead -->
<html lang="en">

<head>
<title>Unfinished surveys by person</title>
</head>

<body>

<h1>Unfinished surveys by person</h1>
<FONT SIZE="-3">
<table border=1>
END

<CSV>; # starting to read in csv file, eat header line

my ($year, $surveyno, $date, $cave_surf, $caveno, $cave_name, $location, $svx_file, $instruments, $tape, $notes, $photos, $comments, $finished, $sketch_wo_CL);
my ($newsurveyno, @dir, @plans, @notesimg, @elev, @misc);
print "Progress: *";

# While loop which reads in each line of csv file

#@lines stores the lines for sorting later
my @lines;

while (<CSV>) {
	chomp;
	# Split single line into all the fields
	($year, $surveyno, $date, $cave_surf, $caveno, $cave_name, $location, $svx_file, $instruments, $tape, $notes, $photos, $comments, $finished, $sketch_wo_CL) = &parse_csv($_);

	if ($year ne "2007") { next }

	# Last field of CSV file can have weird form-feeds etc. Kill them
	$sketch_wo_CL =~ s/\r//;

	# Make survey number pure numeric. Pad numbers less than 10 with a zero
	# $surveyno =~ s/[a-zA-Z]//g;
	$newsurveyno = $surveyno;
	if ($surveyno < 10) {
		$newsurveyno = join("", "0", $surveyno);
	}

	# Get the directory contents, do a numeric sort on the files
	if (opendir(DIR, "${year}/${year}#$newsurveyno")) {
		@dir = sort numericsort grep(!/CVS/ && !/^\./, readdir(DIR));
		# HTMLize the files
		foreach ($_, @dir) {
			my $href = "<a href=\"${year}\/${year}\#$newsurveyno\/$_\">";
			$href =~ s/#/%23/g;
			$_ = join('', $href, "$_<\/a><br>");
		}
		# Split the files into their respective categories
		@plans = grep(/plan/i, @dir);
		@notesimg = grep(/notes/i, @dir);
		@elev = grep(/elev/i || /extend/i, @dir);
		@misc = grep(!/plan/i && !/notes/i && !/extend/i && !/elev/i, @dir);
		closedir(DIR);
        } else {
		# Directory for this survey does not exist
		unless ($no_verbose_progress) {
			print "\nWarning: ${year}/${year}#$newsurveyno does not exist ";
		}
		(@plans, @notesimg, @elev, @misc) = undef;
		($plans[0], $notesimg[0], $elev[0], $misc[0]) = ("<b>Survey</b>", "<b>directory</b>", "<b>does not</b>", "<b>exist</b>");
	}


	# Write line out to table
	unless ($finished =~ m\[yY]es\){
               push(@lines,"$notes<tr align=\"center\"><td>$year</td><td>$newsurveyno</td><td>$date</td><td>$cave_surf</td><td>$caveno</td><td>$cave_name</td><td>$location</td><td>$svx_file</td><td>$instruments</td><td>$tape</td><td>$notes</td><td>$photos</td><td>$comments</td><td>$finished</td><td>$sketch_wo_CL</td><td>@plans</td><td>@notesimg</td><td>@elev</td><td>@misc</td></tr>\n");
        }
	print "*";
}
my @sortedlines = sort(@lines);

#$n allows the title of the table to be displayed every 10 lines
my $n=0;
#$name allows each person to have their own section
my $name = "Initialisation";
foreach my $line (@sortedlines)
{
	if ($line =~ /(.*)(\<tr align.*)/){
	   if ($1 eq $name){
              print TABLE "$2\n";
           }
           else{
               	$name = $1;
               	$n=0;
               	print TABLE "</table>\n";
               	if ($1 eq ""){
               	print TABLE "</FONT><h1>Notes person not known</h1>\n";
                }
                else{
               	print TABLE "</FONT><h1>$1</h1>\n";
                }
                print TABLE "<FONT SIZE=\"-3\"><table border=1>\n";
                print TABLE "<tr align=\"center\"><th>Year</th><th>Survey Number</th><th>Date</th><th>Cave/Surface</th><th>Cave Number</th><th>Cave Name</th><th>Location</th><th>Survex file</th><th>Instruments</th><th>Tape</th><th>Notes</th><th>Photos</th><th>Comments</th><th>Finished</th><th>Sketches without centre lines</th><th>Plans</th><th>Notes</th><th>*elev* and *extend</th><th>Anything else</th></tr>\n";
                print TABLE "$2\n";
           }
        }
}


print TABLE << "END";
</table>
</FONT>
</body>
</html>
END

close TABLE;
print "\nInformation: Done\n";

# Parse the CSV file
sub parse_csv {
	my $line = $_[0];
	my @parsedline = ();
	my $field = '';

	while ($line =~ m{ \G(?:^|[,\t])
			(?: "((?> [^"]*) (?> "" [^"]*)*)" | ([^"\t,]*)) }gx) {
		if ($2) {
			$field = $2;
		} elsif ($1) {
			$field = $1;
			$field =~ s/""/"/g;
		} else {
			$field = '';
		}

		push(@parsedline, $field);
	}

	return(@parsedline);
}

# A singularly crap numeric sort
sub numericsort {
	my ($moda, $modb);
	$moda = $a;
	$modb = $b;
	# remove alpha characters, ".", "#" "_" and "-" to enable filenames to be purely numeric (hopefully)
	$moda =~ s/[a-zA-Z\.\\_#-]//g;
	$modb =~ s/[a-zA-Z\.\\_#-]//g;
	if ($moda and $modb) {
		$moda <=> $modb;
	} else {
		# No characters left (no numerics chars in filename. Who cares where it ends up in search order.
		0;
	}
}

# Usage
sub usage {
	print << "EOF";
USAGE: $progname [-options] <CSV file>
	-q, --quiet         Be quiet about progress
	-h, --help          Show this message
EOF
	exit(0);
}