expoweb/scripts/svx2qm.py

296 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright © 2018 Philip Withnall
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
# Boston, MA 02111-1307, USA.
import argparse
import csv
import sys
class QmExtracter:
"""
Class implementing the svx2qm command line tool.
This provides a way to extract question marks (QMs) from Survex files.
The code in this class is currently tightly tied to the command line tool.
"""
def __init__(self, debug):
self.debug = debug
def extract_qms(self, svx_files):
qms = []
# Extract the QMs from the various Survex files.
for svx_file in svx_files:
survey_name_stack = []
survey_date = None
with open(svx_file) as fd:
for line in fd:
try:
if line.lower().startswith('*begin'):
parts = line.split()
survey_name_stack.append(parts[1] if len(parts) > 1 else '')
continue
if line.lower().startswith('*end'):
survey_name_stack.pop()
continue
if not survey_date and line.lower().startswith('*date'):
parts = line.split()
if len(parts) > 1:
survey_date = parts[1]
continue
# Look for a line matching:
# ;[ QM1 A surveyname.3 - description of QM ]
# or
# ;QM1 A surveyname.3 - description of QM
is_placeholder = \
(line.startswith(';[') or line.startswith('; ['))
if not line.startswith(';'):
continue
fields = line[1:-1].split(None, 4)
if not fields or len(fields) != 5:
continue
[name, grade, nearest_station,
resolution_station, description] = fields
if not name.lower().startswith('qm') or len(name) <= 2:
continue
# Sanitise the grade.
grade = grade.upper()
if grade not in ['A', 'B', 'C', 'D', 'E', 'X']:
self.__print_error(svx_file, line,
'Unknown QM grade %s' % grade)
continue
# Sanitise the resolution station.
if resolution_station == '-':
resolution_station = None
# Sanitise the description.
description = description.strip()
# Warn about (and ignore) lines which are just the
# example template.
if nearest_station.startswith('surveyname.'):
self.__print_error(svx_file, line,
'QM line is an unmodified '
'example line')
continue
# By this point we should have a survey name from a
# *begin line (or series of them). If not, the survex
# file is malformed.
if not survey_name_stack:
self.__print_error(svx_file, line,
'No *begin with survey name')
continue
survey_name = '.'.join(survey_name_stack)
# Warn if the line was a placeholder
if is_placeholder:
self.__print_error(svx_file, line,
'QM line contains placeholder '
'square brackets')
continue
# Warn if the nearest-stations name doesnt match the
# survey name.
if not nearest_station.startswith(survey_name + '.'):
self.__print_error(svx_file, line,
'QM nearest-station survey '
'name (%s) doesnt match '
'*begin statement in file '
'(%s)' %
(nearest_station.split('.')[0],
survey_name))
continue
# Warn if this QM number has been used before, then
# ignore it.
used_before = False
for qm in qms:
if qm[0] == survey_name and qm[2] == name:
self.__print_error(svx_file, line,
'QM number %s already '
'used in this file' % name)
used_before = True
break
if used_before:
continue
qms.append((survey_name, survey_date, name, grade,
nearest_station, resolution_station,
description))
except (ValueError, IndexError) as e:
self.__print_error(svx_file, line, e)
continue
# Order them by grade, then date, and then by survey name.
qms.sort(key=lambda qm: (qm[3], qm[1], qm[0]))
return qms
def format_qms(self, qms, format, include_resolved=False):
if format == 'csv':
self.format_qms_csv(qms, include_resolved)
elif format == 'human':
self.format_qms_human(qms, include_resolved)
else:
# Should never be reached: input validation should check the format
assert(False)
def format_qms_csv(self, qms, include_resolved=False):
writer = csv.writer(sys.stdout)
writer.writerow(['Survey name', 'Survey date',
'QM name', 'Grade', 'Nearest station',
'Resolution station', 'Description'])
for qm in qms:
# Do we actually want this QM, if its been resolved?
if not include_resolved and qm[5]:
continue
writer.writerow(qm)
def format_qms_human(self, qms, include_resolved=False, colour=True):
# Work out the maximum width of each field.
field_names = ['Survey name', 'Survey date', 'QM name', 'Grade',
'Nearest station', 'Resolution station']
lens = [len(field) for field in field_names]
for qm in qms:
# Do we actually want this QM, if its been resolved?
if not include_resolved and qm[5]:
continue
for (idx, field) in enumerate(qm):
if idx >= len(field_names):
break
lens[idx] = max(lens[idx], len(field) if field else 0)
# Print a header (bold if possible).
if colour:
print('\033[1m', end='')
line_format = ' '.join(['{:<{}}'] * len(field_names))
flattened = [x for t in zip(field_names, lens) for x in t]
print(line_format.format(*flattened))
if colour:
print('\033[0m', end='')
print('' * (sum(lens) + 2 * (len(lens) - 1)))
# Adjust the width of the grade, survey and QM name fields to account
# for the color escapes.
if colour:
lens[0] += 8
lens[2] += 8
lens[3] += 9
# Print out the rows.
n_printed = 0
for qm in qms:
(survey_name, survey_date, name, grade, nearest_station,
resolution_station, description) = qm
# Do we actually want this QM, if its been resolved?
if not include_resolved and resolution_station:
continue
if not resolution_station:
resolution_station = ''
if colour:
try:
# See https://stackoverflow.com/a/33206814/2931197.
grade_colour = {
'A': '32',
'B': '33',
'C': '31',
'D': '31',
'E': '31',
'X': '37',
}[grade]
except KeyError:
grade_colour = '00'
formatted_grade = '\033[{}m{}\033[0m'.format(grade_colour,
grade)
formatted_survey_name = '\033[4m{}\033[0m'.format(survey_name)
formatted_name = '\033[4m{}\033[0m'.format(name)
else:
formatted_grade = grade
formatted_survey_name = survey_name
formatted_name = name
print(line_format.format(formatted_survey_name, lens[0],
survey_date, lens[1],
formatted_name, lens[2],
formatted_grade, lens[3],
nearest_station, lens[4],
resolution_station, lens[5]))
print(' ' + description)
n_printed += 1
# Have we finished all the QMs?
if n_printed == 0 and not qms:
print('No QMs found')
elif n_printed == 0:
print('No unresolved QMs found (but %u resolved ones were)' %
len(qms))
def __print_error(self, svx_file, line, exc):
sys.stderr.write('%s: %s\n %s\n' % (svx_file, exc, line))
def main():
"""
Main entry point to svx2qm. Handles arguments.
Usage example:
find -name '*.svx' | xargs ./svx2qm.py --format human
"""
parser = argparse.ArgumentParser(
description='Extract question marks (QMs) from one or more Survex '
'files. The QMs must be formatted appropriately, and '
'currently this script only supports commented-out QMs, '
'as the format has not been standardised yet. The QMs can '
'be returned as a human-readable list or as a CSV.')
parser.add_argument('svx_files', metavar='SVX-FILE …', nargs='+',
help='SVX files to extract QMs from')
parser.add_argument('--format', choices=['csv', 'human'], default='human',
help='output format (default: human)')
parser.add_argument('--debug', action='store_true', default=False,
help='output debug information')
parser.add_argument('--include-resolved', action='store_true',
default=False,
help='include resolved QMs in the output')
args = parser.parse_args()
extracter = QmExtracter(args.debug)
qms = extracter.extract_qms(args.svx_files)
extracter.format_qms(qms, args.format, args.include_resolved)
if __name__ == '__main__':
main()