diff --git a/scripts/svx2qm.py b/scripts/svx2qm.py new file mode 100644 index 000000000..e39170011 --- /dev/null +++ b/scripts/svx2qm.py @@ -0,0 +1,295 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Copyright © 2018 Philip Withnall +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. + +import argparse +import csv +import sys + + +class QmExtracter: + """ + Class implementing the svx2qm command line tool. + + This provides a way to extract question marks (QMs) from Survex files. + + The code in this class is currently tightly tied to the command line tool. + """ + def __init__(self, debug): + self.debug = debug + + def extract_qms(self, svx_files): + qms = [] + + # Extract the QMs from the various Survex files. + for svx_file in svx_files: + survey_name_stack = [] + survey_date = None + + with open(svx_file) as fd: + for line in fd: + try: + if line.lower().startswith('*begin'): + parts = line.split() + survey_name_stack.append(parts[1] if len(parts) > 1 else '') + continue + if line.lower().startswith('*end'): + survey_name_stack.pop() + continue + if not survey_date and line.lower().startswith('*date'): + parts = line.split() + if len(parts) > 1: + survey_date = parts[1] + continue + + # Look for a line matching: + # ;[ QM1 A surveyname.3 - description of QM ] + # or + # ;QM1 A surveyname.3 - description of QM + is_placeholder = \ + (line.startswith(';[') or line.startswith('; [')) + if not line.startswith(';'): + continue + + fields = line[1:-1].split(None, 4) + if not fields or len(fields) != 5: + continue + + [name, grade, nearest_station, + resolution_station, description] = fields + if not name.lower().startswith('qm') or len(name) <= 2: + continue + + # Sanitise the grade. + grade = grade.upper() + if grade not in ['A', 'B', 'C', 'D', 'E', 'X']: + self.__print_error(svx_file, line, + 'Unknown QM grade ‘%s’' % grade) + continue + + # Sanitise the resolution station. + if resolution_station == '-': + resolution_station = None + + # Sanitise the description. + description = description.strip() + + # Warn about (and ignore) lines which are just the + # example template. + if nearest_station.startswith('surveyname.'): + self.__print_error(svx_file, line, + 'QM line is an unmodified ' + 'example line') + continue + + # By this point we should have a survey name from a + # *begin line (or series of them). If not, the survex + # file is malformed. + if not survey_name_stack: + self.__print_error(svx_file, line, + 'No *begin with survey name') + continue + + survey_name = '.'.join(survey_name_stack) + + # Warn if the line was a placeholder + if is_placeholder: + self.__print_error(svx_file, line, + 'QM line contains placeholder ' + 'square brackets') + continue + + # Warn if the nearest-station’s name doesn’t match the + # survey name. + if not nearest_station.startswith(survey_name + '.'): + self.__print_error(svx_file, line, + 'QM nearest-station survey ' + 'name (‘%s’) doesn’t match ' + '*begin statement in file ' + '(‘%s’)' % + (nearest_station.split('.')[0], + survey_name)) + continue + + # Warn if this QM number has been used before, then + # ignore it. + used_before = False + for qm in qms: + if qm[0] == survey_name and qm[2] == name: + self.__print_error(svx_file, line, + 'QM number ‘%s’ already ' + 'used in this file' % name) + used_before = True + break + if used_before: + continue + + qms.append((survey_name, survey_date, name, grade, + nearest_station, resolution_station, + description)) + except (ValueError, IndexError) as e: + self.__print_error(svx_file, line, e) + continue + + # Order them by grade, then date, and then by survey name. + qms.sort(key=lambda qm: (qm[3], qm[1], qm[0])) + return qms + + def format_qms(self, qms, format, include_resolved=False): + if format == 'csv': + self.format_qms_csv(qms, include_resolved) + elif format == 'human': + self.format_qms_human(qms, include_resolved) + else: + # Should never be reached: input validation should check the format + assert(False) + + def format_qms_csv(self, qms, include_resolved=False): + writer = csv.writer(sys.stdout) + + writer.writerow(['Survey name', 'Survey date', + 'QM name', 'Grade', 'Nearest station', + 'Resolution station', 'Description']) + for qm in qms: + # Do we actually want this QM, if it’s been resolved? + if not include_resolved and qm[5]: + continue + + writer.writerow(qm) + + def format_qms_human(self, qms, include_resolved=False, colour=True): + # Work out the maximum width of each field. + field_names = ['Survey name', 'Survey date', 'QM name', 'Grade', + 'Nearest station', 'Resolution station'] + lens = [len(field) for field in field_names] + for qm in qms: + # Do we actually want this QM, if it’s been resolved? + if not include_resolved and qm[5]: + continue + + for (idx, field) in enumerate(qm): + if idx >= len(field_names): + break + lens[idx] = max(lens[idx], len(field) if field else 0) + + # Print a header (bold if possible). + if colour: + print('\033[1m', end='') + line_format = ' '.join(['{:<{}}'] * len(field_names)) + flattened = [x for t in zip(field_names, lens) for x in t] + print(line_format.format(*flattened)) + if colour: + print('\033[0m', end='') + + print('─' * (sum(lens) + 2 * (len(lens) - 1))) + + # Adjust the width of the grade, survey and QM name fields to account + # for the color escapes. + if colour: + lens[0] += 8 + lens[2] += 8 + lens[3] += 9 + + # Print out the rows. + n_printed = 0 + for qm in qms: + (survey_name, survey_date, name, grade, nearest_station, + resolution_station, description) = qm + + # Do we actually want this QM, if it’s been resolved? + if not include_resolved and resolution_station: + continue + + if not resolution_station: + resolution_station = '' + + if colour: + try: + # See https://stackoverflow.com/a/33206814/2931197. + grade_colour = { + 'A': '32', + 'B': '33', + 'C': '31', + 'D': '31', + 'E': '31', + 'X': '37', + }[grade] + except KeyError: + grade_colour = '00' + formatted_grade = '\033[{}m{}\033[0m'.format(grade_colour, + grade) + formatted_survey_name = '\033[4m{}\033[0m'.format(survey_name) + formatted_name = '\033[4m{}\033[0m'.format(name) + else: + formatted_grade = grade + formatted_survey_name = survey_name + formatted_name = name + + print(line_format.format(formatted_survey_name, lens[0], + survey_date, lens[1], + formatted_name, lens[2], + formatted_grade, lens[3], + nearest_station, lens[4], + resolution_station, lens[5])) + print(' ' + description) + n_printed += 1 + + # Have we finished all the QMs? + if n_printed == 0 and not qms: + print('No QMs found') + elif n_printed == 0: + print('No unresolved QMs found (but %u resolved ones were)' % + len(qms)) + + def __print_error(self, svx_file, line, exc): + sys.stderr.write('%s: %s\n %s\n' % (svx_file, exc, line)) + + +def main(): + """ + Main entry point to svx2qm. Handles arguments. + + Usage example: + find -name '*.svx' | xargs ./svx2qm.py --format human + """ + parser = argparse.ArgumentParser( + description='Extract question marks (QMs) from one or more Survex ' + 'files. The QMs must be formatted appropriately, and ' + 'currently this script only supports commented-out QMs, ' + 'as the format has not been standardised yet. The QMs can ' + 'be returned as a human-readable list or as a CSV.') + parser.add_argument('svx_files', metavar='SVX-FILE …', nargs='+', + help='SVX files to extract QMs from') + parser.add_argument('--format', choices=['csv', 'human'], default='human', + help='output format (default: human)') + parser.add_argument('--debug', action='store_true', default=False, + help='output debug information') + parser.add_argument('--include-resolved', action='store_true', + default=False, + help='include resolved QMs in the output') + + args = parser.parse_args() + + extracter = QmExtracter(args.debug) + qms = extracter.extract_qms(args.svx_files) + extracter.format_qms(qms, args.format, args.include_resolved) + + +if __name__ == '__main__': + main()