open(QM,"qm.html") or die("Can not open file qm.html");
@qm=<QM>;
close QM;

open(QM,"qmoldd.html") or die("Can not open file qmold.html");
@qmold=<QM>;
close QM;


open(OUTPUT,"> qm.csv") or die("Can not open output csv file qm.csv");

for ($n=0; $n < scalar(@qm); $n++)
    {
    my $descpage="";
    my $qmnum1="";
    my $qmnum2="";
    my $qmnum3="";
    my $qmgrade="";
    my $area="";
    my $desc="";
    my $neareststation="";

    if ($qm[$n] =~ m/\s*\<dt\>\<a href\=\"(\S*)#q(\S*)\" id\=\"(\S*)\"\>(\S*)\<\/a\>\s(\S*)\<\/dt\>/)
       {
       $descpage=$1;
       $qmnum1=$2;
       $qmnum2=$3;
       $qmnum3=$4;
       $qmgrade=$5;
       $n++;
       if ($qm[$n] =~ m/\s*\<dd\>(\S*)\:\s*(.*)\[(\S*)\]\<\/dd\>/)
          {
          $area=$1;
          $desc=$2;
          $neareststation=$3;
          }
       elsif ($qm[$n] =~ m/\s*\<dd\>(\S*)\:\s*(.*)\<\/dd\>/)
          {
          $area=$1;
          $desc=$2;
          $neareststation="";
          }
       else
          {
          print $qm[$n];
          }
       }
    elsif ($qm[$n] =~ m/\s*\<dt\>\<a href\=\"(\S*)#q(\S*)\" id\=\"(\S*)\"\>C(\S*)\<\/a\>\<\/dt\>/)
       {
       $descpage=$1;
       $qmnum1=$2;
       $qmnum2=$3;
       $qmnum3=$4;
       $qmgrade="";
       $n++;
       if ($qm[$n] =~ m/\s*\<dd\>(\S*)\:\s*(.*)\[(\S*)\]\<\/dd\>/)
          {
          $area=$1;
          $desc=$2;
          $neareststation=$3;
          }
       elsif ($qm[$n] =~ m/\s*\<dd\>(\S*)\:\s*(.*)\<\/dd\>/)
          {
          $area=$1;
          $desc=$2;
          $neareststation="";
          }
       else
          {
          print $qm[$n];
          }
       }
    else
        {
        print $qm[$n];
        }
    if ($qmnum1 ne "") 
        {
        print OUTPUT "\"$qmnum3\",\"$qmgrade\",\"$area\",\"$desc\",\"$descpage\",\"$neareststation\"\n";
        }
    }


for ($n=0; $n < scalar(@qmold); $n++)
    {
    my $qmnum="";
    my $qmgrade="";
    my $area="";
    my $desc="";
    my $neareststation="";
    my $completion="";

    if ($qmold[$n] =~ m/\<\/td\>\<\/tr\>\<tr\>\<td\>(\S*)( |\&nbsp;)+(\S+)/)
       {
       $qmnum=$1;
       $qmgrade=$3;
       $n++;
       if ($qmold[$n] =~ m/\<\/td\>\<td\>(\S*):([^\(^\{]*)/)
          {
          $area=$1;
          $desc=$2;
          }
       else
          {
          print $qm[$n];
          }
       if ($qmold[$n] =~ m/\((.*)\)/)
          {
          $completion=$1;
          }
       if ($qmold[$n] =~ m/\[(.*)\]/)
          {
          $neareststation=$1;
          }
       print OUTPUT "\"$qmnum\",\"$qmgrade\",\"$area\",\"$desc\",\"\",\"$neareststation\",\"$completion\"\n";
       
       }
    else
        {
        print $qm[$n];
        }

    }

close OUTPUT;