mirror of
https://expo.survex.com/repositories/expoweb/.git/
synced 2024-11-27 01:32:03 +00:00
114 lines
3.6 KiB
Python
114 lines
3.6 KiB
Python
"""
|
|
Quick and dirty Python script to find references to completed qms in the
|
|
cave description pages. Run this to find which bits of description
|
|
need updating.
|
|
|
|
The list of qms is read from the qm.csv file and any with an entry in the
|
|
"Completion description" column (column 7) are searched for in all the html
|
|
files.
|
|
|
|
The script prints a list of the completed qms that it found references to
|
|
and in which file.
|
|
|
|
Nial Peters - 2011
|
|
"""
|
|
import csv
|
|
import re
|
|
import glob
|
|
import itertools
|
|
import os
|
|
import os.path
|
|
|
|
QM_CSV_FILE = "qm.csv"
|
|
DESC_FOLDER = "."
|
|
|
|
|
|
#####################################################################
|
|
# A few functions copied from std_ops - pasted here to save people
|
|
# having to install std_ops to use this script.
|
|
#####################################################################
|
|
|
|
def flatten(l, ltypes=(list, tuple)):
|
|
"""
|
|
Reduces any iterable containing other iterables into a single list
|
|
of non-iterable items. The ltypes option allows control over what
|
|
element types will be flattened. This algorithm is taken from:
|
|
http://rightfootin.blogspot.com/2006/09/more-on-python-flatten.html
|
|
|
|
>>> print flatten([range(3),range(3,6)])
|
|
[0, 1, 2, 3, 4, 5]
|
|
>>> print flatten([1,2,(3,4)])
|
|
[1, 2, 3, 4]
|
|
>>> print flatten([1,[2,3,[4,5,[6,[7,8,[9,[10]]]]]]])
|
|
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
|
|
>>> print flatten([1,[2,3,[4,5,[6,[7,8,[9,[10]]]]]]], ltypes=())
|
|
[1, [2, 3, [4, 5, [6, [7, 8, [9, [10]]]]]]]
|
|
>>> print flatten([1,2,(3,4)],ltypes=(list))
|
|
[1, 2, (3, 4)]
|
|
"""
|
|
ltype = type(l)
|
|
l = list(l)
|
|
i = 0
|
|
while i < len(l):
|
|
while isinstance(l[i], ltypes):
|
|
if not l[i]:
|
|
l.pop(i)
|
|
i -= 1
|
|
break
|
|
else:
|
|
l[i:i + 1] = l[i]
|
|
i += 1
|
|
return ltype(l)
|
|
|
|
|
|
def find_files(path, recursive=False, pattern='*', skip_links=True, full_paths=False):
|
|
if not os.path.isdir(path):
|
|
raise ValueError, "\'%s\' is not a recognised folder" %path
|
|
|
|
found_files = glob.glob(os.path.normpath(path + os.sep + pattern))
|
|
path_contents = glob.glob(os.path.normpath(path + os.sep + '*'))
|
|
|
|
if skip_links:
|
|
path_contents = [x for x in itertools.ifilterfalse(os.path.islink, path_contents)]
|
|
|
|
dirs = [x for x in itertools.ifilter(os.path.isdir, path_contents)]
|
|
found_files = [x for x in itertools.ifilterfalse(os.path.isdir, found_files)] #now with no dirs in it
|
|
|
|
if recursive:
|
|
found_files += [find_files(x, recursive, pattern, skip_links, full_paths) for x in dirs]
|
|
|
|
if full_paths:
|
|
return [os.path.abspath(x) for x in flatten(found_files) if x]
|
|
else:
|
|
return [x for x in flatten(found_files) if x]
|
|
|
|
#####################################################################
|
|
#####################################################################
|
|
# Main script starts here.
|
|
#####################################################################
|
|
completed_qms = []
|
|
|
|
with open(QM_CSV_FILE,'rb') as ifp:
|
|
# read the qm.csv file assuming it is tab delimited
|
|
qm_reader = csv.reader(ifp, delimiter='\t')
|
|
|
|
for row in qm_reader:
|
|
if row[6] == "" or row[6].isspace():
|
|
#skip rows that have no entry in the Completion description column
|
|
continue
|
|
completed_qms.append(row[0])
|
|
|
|
#get a list of all the html files in the description folder
|
|
html_files = find_files(DESC_FOLDER, pattern="*.html")
|
|
|
|
#search each html file for references to each completed qm
|
|
for desc_file in html_files:
|
|
with open(desc_file,"r") as f:
|
|
contents = f.read()
|
|
for qm in completed_qms:
|
|
if len(re.findall(qm + "(?!\d)",contents))!=0:
|
|
print "Reference to "+qm + " found in "+desc_file
|
|
|
|
|
|
|