expoweb/1623/204/qmreader.pl

126 lines
2.7 KiB
Perl

open(QM,"qm.html") or die("Can not open file qm.html");
@qm=<QM>;
close QM;
open(QM,"qmoldd.html") or die("Can not open file qmold.html");
@qmold=<QM>;
close QM;
open(OUTPUT,"> qm.csv") or die("Can not open output csv file qm.csv");
for ($n=0; $n < scalar(@qm); $n++)
{
my $descpage="";
my $qmnum1="";
my $qmnum2="";
my $qmnum3="";
my $qmgrade="";
my $area="";
my $desc="";
my $neareststation="";
if ($qm[$n] =~ m/\s*\<dt\>\<a href\=\"(\S*)#q(\S*)\" id\=\"(\S*)\"\>(\S*)\<\/a\>\s(\S*)\<\/dt\>/)
{
$descpage=$1;
$qmnum1=$2;
$qmnum2=$3;
$qmnum3=$4;
$qmgrade=$5;
$n++;
if ($qm[$n] =~ m/\s*\<dd\>(\S*)\:\s*(.*)\[(\S*)\]\<\/dd\>/)
{
$area=$1;
$desc=$2;
$neareststation=$3;
}
elsif ($qm[$n] =~ m/\s*\<dd\>(\S*)\:\s*(.*)\<\/dd\>/)
{
$area=$1;
$desc=$2;
$neareststation="";
}
else
{
print $qm[$n];
}
}
elsif ($qm[$n] =~ m/\s*\<dt\>\<a href\=\"(\S*)#q(\S*)\" id\=\"(\S*)\"\>C(\S*)\<\/a\>\<\/dt\>/)
{
$descpage=$1;
$qmnum1=$2;
$qmnum2=$3;
$qmnum3=$4;
$qmgrade="";
$n++;
if ($qm[$n] =~ m/\s*\<dd\>(\S*)\:\s*(.*)\[(\S*)\]\<\/dd\>/)
{
$area=$1;
$desc=$2;
$neareststation=$3;
}
elsif ($qm[$n] =~ m/\s*\<dd\>(\S*)\:\s*(.*)\<\/dd\>/)
{
$area=$1;
$desc=$2;
$neareststation="";
}
else
{
print $qm[$n];
}
}
else
{
print $qm[$n];
}
if ($qmnum1 ne "")
{
print OUTPUT "\"$qmnum3\",\"$qmgrade\",\"$area\",\"$desc\",\"$descpage\",\"$neareststation\"\n";
}
}
for ($n=0; $n < scalar(@qmold); $n++)
{
my $qmnum="";
my $qmgrade="";
my $area="";
my $desc="";
my $neareststation="";
my $completion="";
if ($qmold[$n] =~ m/\<\/td\>\<\/tr\>\<tr\>\<td\>(\S*)( |\&nbsp;)+(\S+)/)
{
$qmnum=$1;
$qmgrade=$3;
$n++;
if ($qmold[$n] =~ m/\<\/td\>\<td\>(\S*):([^\(^\{]*)/)
{
$area=$1;
$desc=$2;
}
else
{
print $qm[$n];
}
if ($qmold[$n] =~ m/\((.*)\)/)
{
$completion=$1;
}
if ($qmold[$n] =~ m/\[(.*)\]/)
{
$neareststation=$1;
}
print OUTPUT "\"$qmnum\",\"$qmgrade\",\"$area\",\"$desc\",\"\",\"$neareststation\",\"$completion\"\n";
}
else
{
print $qm[$n];
}
}
close OUTPUT;