Moved from location in loser repo to here

2026-02-20 03:05:19 +00:00 · 2020-04-22 19:52:56 +01:00
parent 25f9430fac
commit 56c0db8c05
1 changed files with 295 additions and 0 deletions
--- a/scripts/svx2qm.py
+++ b/scripts/svx2qm.py
@@ -0,0 +1,295 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright © 2018 Philip Withnall
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the
+# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+# Boston, MA 02111-1307, USA.
+
+import argparse
+import csv
+import sys
+
+
+class QmExtracter:
+    """
+    Class implementing the svx2qm command line tool.
+
+    This provides a way to extract question marks (QMs) from Survex files.
+
+    The code in this class is currently tightly tied to the command line tool.
+    """
+    def __init__(self, debug):
+        self.debug = debug
+
+    def extract_qms(self, svx_files):
+        qms = []
+
+        # Extract the QMs from the various Survex files.
+        for svx_file in svx_files:
+            survey_name_stack = []
+            survey_date = None
+
+            with open(svx_file) as fd:
+                for line in fd:
+                    try:
+                        if line.lower().startswith('*begin'):
+                            parts = line.split()
+                            survey_name_stack.append(parts[1] if len(parts) > 1 else '')
+                            continue
+                        if line.lower().startswith('*end'):
+                            survey_name_stack.pop()
+                            continue
+                        if not survey_date and line.lower().startswith('*date'):
+                            parts = line.split()
+                            if len(parts) > 1:
+                                survey_date = parts[1]
+                            continue
+
+                        # Look for a line matching:
+                        # ;[ QM1    A    surveyname.3    -    description of QM ]
+                        # or
+                        # ;QM1    A    surveyname.3    -    description of QM
+                        is_placeholder = \
+                            (line.startswith(';[') or line.startswith('; ['))
+                        if not line.startswith(';'):
+                            continue
+
+                        fields = line[1:-1].split(None, 4)
+                        if not fields or len(fields) != 5:
+                            continue
+
+                        [name, grade, nearest_station,
+                         resolution_station, description] = fields
+                        if not name.lower().startswith('qm') or len(name) <= 2:
+                            continue
+
+                        # Sanitise the grade.
+                        grade = grade.upper()
+                        if grade not in ['A', 'B', 'C', 'D', 'E', 'X']:
+                            self.__print_error(svx_file, line,
+                                               'Unknown QM grade ‘%s’' % grade)
+                            continue
+
+                        # Sanitise the resolution station.
+                        if resolution_station == '-':
+                            resolution_station = None
+
+                        # Sanitise the description.
+                        description = description.strip()
+
+                        # Warn about (and ignore) lines which are just the
+                        # example template.
+                        if nearest_station.startswith('surveyname.'):
+                            self.__print_error(svx_file, line,
+                                               'QM line is an unmodified '
+                                               'example line')
+                            continue
+
+                        # By this point we should have a survey name from a
+                        # *begin line (or series of them). If not, the survex
+                        # file is malformed.
+                        if not survey_name_stack:
+                            self.__print_error(svx_file, line,
+                                               'No *begin with survey name')
+                            continue
+
+                        survey_name = '.'.join(survey_name_stack)
+
+                        # Warn if the line was a placeholder
+                        if is_placeholder:
+                            self.__print_error(svx_file, line,
+                                               'QM line contains placeholder '
+                                               'square brackets')
+                            continue
+
+                        # Warn if the nearest-station’s name doesn’t match the
+                        # survey name.
+                        if not nearest_station.startswith(survey_name + '.'):
+                            self.__print_error(svx_file, line,
+                                               'QM nearest-station survey '
+                                               'name (‘%s’) doesn’t match '
+                                               '*begin statement in file '
+                                               '(‘%s’)' %
+                                               (nearest_station.split('.')[0],
+                                                survey_name))
+                            continue
+
+                        # Warn if this QM number has been used before, then
+                        # ignore it.
+                        used_before = False
+                        for qm in qms:
+                            if qm[0] == survey_name and qm[2] == name:
+                                self.__print_error(svx_file, line,
+                                                   'QM number ‘%s’ already '
+                                                   'used in this file' % name)
+                                used_before = True
+                                break
+                        if used_before:
+                            continue
+
+                        qms.append((survey_name, survey_date, name, grade,
+                                    nearest_station, resolution_station,
+                                    description))
+                    except (ValueError, IndexError) as e:
+                        self.__print_error(svx_file, line, e)
+                        continue
+
+        # Order them by grade, then date, and then by survey name.
+        qms.sort(key=lambda qm: (qm[3], qm[1], qm[0]))
+        return qms
+
+    def format_qms(self, qms, format, include_resolved=False):
+        if format == 'csv':
+            self.format_qms_csv(qms, include_resolved)
+        elif format == 'human':
+            self.format_qms_human(qms, include_resolved)
+        else:
+            # Should never be reached: input validation should check the format
+            assert(False)
+
+    def format_qms_csv(self, qms, include_resolved=False):
+        writer = csv.writer(sys.stdout)
+
+        writer.writerow(['Survey name', 'Survey date',
+                         'QM name', 'Grade', 'Nearest station',
+                         'Resolution station', 'Description'])
+        for qm in qms:
+            # Do we actually want this QM, if it’s been resolved?
+            if not include_resolved and qm[5]:
+                continue
+
+            writer.writerow(qm)
+
+    def format_qms_human(self, qms, include_resolved=False, colour=True):
+        # Work out the maximum width of each field.
+        field_names = ['Survey name', 'Survey date', 'QM name', 'Grade',
+                       'Nearest station', 'Resolution station']
+        lens = [len(field) for field in field_names]
+        for qm in qms:
+            # Do we actually want this QM, if it’s been resolved?
+            if not include_resolved and qm[5]:
+                continue
+
+            for (idx, field) in enumerate(qm):
+                if idx >= len(field_names):
+                    break
+                lens[idx] = max(lens[idx], len(field) if field else 0)
+
+        # Print a header (bold if possible).
+        if colour:
+            print('\033[1m', end='')
+        line_format = '  '.join(['{:<{}}'] * len(field_names))
+        flattened = [x for t in zip(field_names, lens) for x in t]
+        print(line_format.format(*flattened))
+        if colour:
+            print('\033[0m', end='')
+
+        print('─' * (sum(lens) + 2 * (len(lens) - 1)))
+
+        # Adjust the width of the grade, survey and QM name fields to account
+        # for the color escapes.
+        if colour:
+            lens[0] += 8
+            lens[2] += 8
+            lens[3] += 9
+
+        # Print out the rows.
+        n_printed = 0
+        for qm in qms:
+            (survey_name, survey_date, name, grade, nearest_station, 
+             resolution_station, description) = qm
+
+            # Do we actually want this QM, if it’s been resolved?
+            if not include_resolved and resolution_station:
+                continue
+
+            if not resolution_station:
+                resolution_station = ''
+
+            if colour:
+                try:
+                    # See https://stackoverflow.com/a/33206814/2931197.
+                    grade_colour = {
+                        'A': '32',
+                        'B': '33',
+                        'C': '31',
+                        'D': '31',
+                        'E': '31',
+                        'X': '37',
+                    }[grade]
+                except KeyError:
+                    grade_colour = '00'
+                formatted_grade = '\033[{}m{}\033[0m'.format(grade_colour,
+                                                             grade)
+                formatted_survey_name = '\033[4m{}\033[0m'.format(survey_name)
+                formatted_name = '\033[4m{}\033[0m'.format(name)
+            else:
+                formatted_grade = grade
+                formatted_survey_name = survey_name
+                formatted_name = name
+
+            print(line_format.format(formatted_survey_name, lens[0],
+                                     survey_date, lens[1],
+                                     formatted_name, lens[2],
+                                     formatted_grade, lens[3],
+                                     nearest_station, lens[4],
+                                     resolution_station, lens[5]))
+            print('  ' + description)
+            n_printed += 1
+
+        # Have we finished all the QMs?
+        if n_printed == 0 and not qms:
+            print('No QMs found')
+        elif n_printed == 0:
+            print('No unresolved QMs found (but %u resolved ones were)' %
+                  len(qms))
+
+    def __print_error(self, svx_file, line, exc):
+        sys.stderr.write('%s: %s\n  %s\n' % (svx_file, exc, line))
+
+
+def main():
+    """
+    Main entry point to svx2qm. Handles arguments.
+    
+    Usage example:
+       find -name '*.svx' | xargs ./svx2qm.py --format human
+    """
+    parser = argparse.ArgumentParser(
+        description='Extract question marks (QMs) from one or more Survex '
+                    'files. The QMs must be formatted appropriately, and '
+                    'currently this script only supports commented-out QMs, '
+                    'as the format has not been standardised yet. The QMs can '
+                    'be returned as a human-readable list or as a CSV.')
+    parser.add_argument('svx_files', metavar='SVX-FILE …', nargs='+',
+                        help='SVX files to extract QMs from')
+    parser.add_argument('--format', choices=['csv', 'human'], default='human',
+                        help='output format (default: human)')
+    parser.add_argument('--debug', action='store_true', default=False,
+                        help='output debug information')
+    parser.add_argument('--include-resolved', action='store_true',
+                        default=False,
+                        help='include resolved QMs in the output')
+
+    args = parser.parse_args()
+
+    extracter = QmExtracter(args.debug)
+    qms = extracter.extract_qms(args.svx_files)
+    extracter.format_qms(qms, args.format, args.include_resolved)
+
+
+if __name__ == '__main__':
+    main()