open(QM,"qm.html") or die("Can not open file qm.html"); @qm=; close QM; open(QM,"qmoldd.html") or die("Can not open file qmold.html"); @qmold=; close QM; open(OUTPUT,"> qm.csv") or die("Can not open output csv file qm.csv"); for ($n=0; $n < scalar(@qm); $n++) { my $descpage=""; my $qmnum1=""; my $qmnum2=""; my $qmnum3=""; my $qmgrade=""; my $area=""; my $desc=""; my $neareststation=""; if ($qm[$n] =~ m/\s*\\(\S*)\<\/a\>\s(\S*)\<\/dt\>/) { $descpage=$1; $qmnum1=$2; $qmnum2=$3; $qmnum3=$4; $qmgrade=$5; $n++; if ($qm[$n] =~ m/\s*\(\S*)\:\s*(.*)\[(\S*)\]\<\/dd\>/) { $area=$1; $desc=$2; $neareststation=$3; } elsif ($qm[$n] =~ m/\s*\(\S*)\:\s*(.*)\<\/dd\>/) { $area=$1; $desc=$2; $neareststation=""; } else { print $qm[$n]; } } elsif ($qm[$n] =~ m/\s*\\C(\S*)\<\/a\>\<\/dt\>/) { $descpage=$1; $qmnum1=$2; $qmnum2=$3; $qmnum3=$4; $qmgrade=""; $n++; if ($qm[$n] =~ m/\s*\(\S*)\:\s*(.*)\[(\S*)\]\<\/dd\>/) { $area=$1; $desc=$2; $neareststation=$3; } elsif ($qm[$n] =~ m/\s*\(\S*)\:\s*(.*)\<\/dd\>/) { $area=$1; $desc=$2; $neareststation=""; } else { print $qm[$n]; } } else { print $qm[$n]; } if ($qmnum1 ne "") { print OUTPUT "\"$qmnum3\",\"$qmgrade\",\"$area\",\"$desc\",\"$descpage\",\"$neareststation\"\n"; } } for ($n=0; $n < scalar(@qmold); $n++) { my $qmnum=""; my $qmgrade=""; my $area=""; my $desc=""; my $neareststation=""; my $completion=""; if ($qmold[$n] =~ m/\<\/td\>\<\/tr\>\\(\S*)( |\ )+(\S+)/) { $qmnum=$1; $qmgrade=$3; $n++; if ($qmold[$n] =~ m/\<\/td\>\(\S*):([^\(^\{]*)/) { $area=$1; $desc=$2; } else { print $qm[$n]; } if ($qmold[$n] =~ m/\((.*)\)/) { $completion=$1; } if ($qmold[$n] =~ m/\[(.*)\]/) { $neareststation=$1; } print OUTPUT "\"$qmnum\",\"$qmgrade\",\"$area\",\"$desc\",\"\",\"$neareststation\",\"$completion\"\n"; } else { print $qm[$n]; } } close OUTPUT;