#!/usr/bin/env python3 # -*- coding: utf-8 -*- # Copyright © 2018 Philip Withnall # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the # Free Software Foundation, Inc., 59 Temple Place - Suite 330, # Boston, MA 02111-1307, USA. import argparse import csv import sys class QmExtracter: """ Class implementing the svx2qm command line tool. This provides a way to extract question marks (QMs) from Survex files. The code in this class is currently tightly tied to the command line tool. """ def __init__(self, debug): self.debug = debug def extract_qms(self, svx_files): qms = [] # Extract the QMs from the various Survex files. for svx_file in svx_files: survey_name_stack = [] survey_date = None with open(svx_file) as fd: for line in fd: try: if line.lower().startswith('*begin'): parts = line.split() survey_name_stack.append(parts[1] if len(parts) > 1 else '') continue if line.lower().startswith('*end'): survey_name_stack.pop() continue if not survey_date and line.lower().startswith('*date'): parts = line.split() if len(parts) > 1: survey_date = parts[1] continue # Look for a line matching: # ;[ QM1 A surveyname.3 - description of QM ] # or # ;QM1 A surveyname.3 - description of QM is_placeholder = \ (line.startswith(';[') or line.startswith('; [')) if not line.startswith(';'): continue fields = line[1:-1].split(None, 4) if not fields or len(fields) != 5: continue [name, grade, nearest_station, resolution_station, description] = fields if not name.lower().startswith('qm') or len(name) <= 2: continue # Sanitise the grade. grade = grade.upper() if grade not in ['A', 'B', 'C', 'D', 'E', 'X']: self.__print_error(svx_file, line, 'Unknown QM grade ‘%s’' % grade) continue # Sanitise the resolution station. if resolution_station == '-': resolution_station = None # Sanitise the description. description = description.strip() # Warn about (and ignore) lines which are just the # example template. if nearest_station.startswith('surveyname.'): self.__print_error(svx_file, line, 'QM line is an unmodified ' 'example line') continue # By this point we should have a survey name from a # *begin line (or series of them). If not, the survex # file is malformed. if not survey_name_stack: self.__print_error(svx_file, line, 'No *begin with survey name') continue survey_name = '.'.join(survey_name_stack) # Warn if the line was a placeholder if is_placeholder: self.__print_error(svx_file, line, 'QM line contains placeholder ' 'square brackets') continue # Warn if the nearest-station’s name doesn’t match the # survey name. if not nearest_station.startswith(survey_name + '.'): self.__print_error(svx_file, line, 'QM nearest-station survey ' 'name (‘%s’) doesn’t match ' '*begin statement in file ' '(‘%s’)' % (nearest_station.split('.')[0], survey_name)) continue # Warn if this QM number has been used before, then # ignore it. used_before = False for qm in qms: if qm[0] == survey_name and qm[2] == name: self.__print_error(svx_file, line, 'QM number ‘%s’ already ' 'used in this file' % name) used_before = True break if used_before: continue qms.append((survey_name, survey_date, name, grade, nearest_station, resolution_station, description)) except (ValueError, IndexError) as e: self.__print_error(svx_file, line, e) continue # Order them by grade, then date, and then by survey name. qms.sort(key=lambda qm: (qm[3], qm[1], qm[0])) return qms def format_qms(self, qms, format, include_resolved=False): if format == 'csv': self.format_qms_csv(qms, include_resolved) elif format == 'human': self.format_qms_human(qms, include_resolved) else: # Should never be reached: input validation should check the format assert(False) def format_qms_csv(self, qms, include_resolved=False): writer = csv.writer(sys.stdout) writer.writerow(['Survey name', 'Survey date', 'QM name', 'Grade', 'Nearest station', 'Resolution station', 'Description']) for qm in qms: # Do we actually want this QM, if it’s been resolved? if not include_resolved and qm[5]: continue writer.writerow(qm) def format_qms_human(self, qms, include_resolved=False, colour=True): # Work out the maximum width of each field. field_names = ['Survey name', 'Survey date', 'QM name', 'Grade', 'Nearest station', 'Resolution station'] lens = [len(field) for field in field_names] for qm in qms: # Do we actually want this QM, if it’s been resolved? if not include_resolved and qm[5]: continue for (idx, field) in enumerate(qm): if idx >= len(field_names): break lens[idx] = max(lens[idx], len(field) if field else 0) # Print a header (bold if possible). if colour: print('\033[1m', end='') line_format = ' '.join(['{:<{}}'] * len(field_names)) flattened = [x for t in zip(field_names, lens) for x in t] print(line_format.format(*flattened)) if colour: print('\033[0m', end='') print('─' * (sum(lens) + 2 * (len(lens) - 1))) # Adjust the width of the grade, survey and QM name fields to account # for the color escapes. if colour: lens[0] += 8 lens[2] += 8 lens[3] += 9 # Print out the rows. n_printed = 0 for qm in qms: (survey_name, survey_date, name, grade, nearest_station, resolution_station, description) = qm # Do we actually want this QM, if it’s been resolved? if not include_resolved and resolution_station: continue if not resolution_station: resolution_station = '' if colour: try: # See https://stackoverflow.com/a/33206814/2931197. grade_colour = { 'A': '32', 'B': '33', 'C': '31', 'D': '31', 'E': '31', 'X': '37', }[grade] except KeyError: grade_colour = '00' formatted_grade = '\033[{}m{}\033[0m'.format(grade_colour, grade) formatted_survey_name = '\033[4m{}\033[0m'.format(survey_name) formatted_name = '\033[4m{}\033[0m'.format(name) else: formatted_grade = grade formatted_survey_name = survey_name formatted_name = name print(line_format.format(formatted_survey_name, lens[0], survey_date, lens[1], formatted_name, lens[2], formatted_grade, lens[3], nearest_station, lens[4], resolution_station, lens[5])) print(' ' + description) n_printed += 1 # Have we finished all the QMs? if n_printed == 0 and not qms: print('No QMs found') elif n_printed == 0: print('No unresolved QMs found (but %u resolved ones were)' % len(qms)) def __print_error(self, svx_file, line, exc): sys.stderr.write('%s: %s\n %s\n' % (svx_file, exc, line)) def main(): """ Main entry point to svx2qm. Handles arguments. Usage example: find -name '*.svx' | xargs ./svx2qm.py --format human """ parser = argparse.ArgumentParser( description='Extract question marks (QMs) from one or more Survex ' 'files. The QMs must be formatted appropriately, and ' 'currently this script only supports commented-out QMs, ' 'as the format has not been standardised yet. The QMs can ' 'be returned as a human-readable list or as a CSV.') parser.add_argument('svx_files', metavar='SVX-FILE …', nargs='+', help='SVX files to extract QMs from') parser.add_argument('--format', choices=['csv', 'human'], default='human', help='output format (default: human)') parser.add_argument('--debug', action='store_true', default=False, help='output debug information') parser.add_argument('--include-resolved', action='store_true', default=False, help='include resolved QMs in the output') args = parser.parse_args() extracter = QmExtracter(args.debug) qms = extracter.extract_qms(args.svx_files) extracter.format_qms(qms, args.format, args.include_resolved) if __name__ == '__main__': main()