faster db creation, safer file reading with 'with'

2025-01-31 15:32:35 +00:00 · 2023-01-28 14:04:32 +00:00 · 2023-01-28 14:04:32 +00:00 · 2704fc42d4
commit 2704fc42d4
parent d9a4069662
1 changed files with 90 additions and 97 deletions
--- a/parsers/QMs.py
+++ b/parsers/QMs.py
@ -1,6 +1,7 @@
 import csv
 import os
 import re
+from pathlib import Path

 from django.conf import settings

@ -65,112 +66,104 @@ def parseCaveQMs(cave, inputFile, ticked=False):
        return nqms

    # qmPath = settings.EXPOWEB+inputFile
-    qmPath = os.path.join(settings.EXPOWEB, inputFile)  # why not use the pathlib stuff ?
+    qmPath = Path(settings.EXPOWEB, inputFile)  

-    qmCSVContents = open(qmPath, "r")
-    dialect = csv.Sniffer().sniff(qmCSVContents.read())
-    qmCSVContents.seek(0, 0)
-    qmReader = csv.reader(qmCSVContents, dialect=dialect)
-    next(qmReader)  # Skip header row
-    n = 0
-    nqms = 0
-    for line in qmReader:
-        try:
-            n += 1
-            year = int(line[0][1:5])
-            f"PH_{int(year)}_{int(n):02d}"
-            QMnum = re.match(r".*?-\d*?-X?(?P<numb>\d*)", line[0]).group("numb")
-            newQM = QM()
-            # newQM.found_by=placeholder
-            newQM.number = QMnum
-            newQM.cave = caveid
-            newQM.blockname = ""
-            if line[1] == "Dig":
-                newQM.grade = "D"
-            else:
-                newQM.grade = line[1]
-            newQM.area = line[2]
-            newQM.location_description = line[3]
-
-            # In the table, completion is indicated by the presence of a completion discription.
-            newQM.completion_description = line[4]
-            newQM.nearest_station_description = line[5]
-            if newQM.completion_description:
-                newQM.ticked = True
-            else:
-                newQM.ticked = False
-
-            newQM.comment = line[6]
+    with open(qmPath, "r") as qmCSVContents:
+        dialect = csv.Sniffer().sniff(qmCSVContents.read())
+        qmCSVContents.seek(0, 0)
+        qmReader = csv.reader(qmCSVContents, dialect=dialect)
+        next(qmReader)  # Skip header row
+        n = 0
+        nqms = 0
+        for line in qmReader:
            try:
-                # year and number are unique for a cave in CSV imports
-                preexistingQM = QM.objects.get(
-                    number=QMnum, found_by__date__year=year
-                )  # if we don't have this one in the DB, save it
-                if (
-                    preexistingQM.new_since_parsing is False
-                ):  # if the pre-existing QM has not been modified, overwrite it - VERY OLD THING
-                    preexistingQM.delete()
+                n += 1
+                year = int(line[0][1:5])
+                f"PH_{int(year)}_{int(n):02d}"
+                QMnum = re.match(r".*?-\d*?-X?(?P<numb>\d*)", line[0]).group("numb")
+                newQM = QM()
+                # newQM.found_by=placeholder
+                newQM.number = QMnum
+                newQM.cave = caveid
+                newQM.blockname = ""
+                if line[1] == "Dig":
+                    newQM.grade = "D"
+                else:
+                    newQM.grade = line[1]
+                newQM.area = line[2]
+                newQM.location_description = line[3]
+
+                # In the table, completion is indicated by the presence of a completion discription.
+                newQM.completion_description = line[4]
+                newQM.nearest_station_description = line[5]
+                if newQM.completion_description:
+                    newQM.ticked = True
+                else:
+                    newQM.ticked = False
+
+                newQM.comment = line[6]
+                try:
+                    # year and number are unique for a cave in CSV imports
+                    preexistingQM = QM.objects.get(
+                        number=QMnum, found_by__date__year=year
+                    )  # if we don't have this one in the DB, save it
+                    if (
+                        preexistingQM.new_since_parsing is False
+                    ):  # if the pre-existing QM has not been modified, overwrite it - VERY OLD THING
+                        preexistingQM.delete()
+                        newQM.expoyear = year
+                        newQM.save()
+                    else:  # otherwise, print that it was ignored
+                        print((" - preserving " + str(preexistingQM) + ", which was edited in admin \r"))
+
+                except QM.DoesNotExist:  # if there is no pre-existing QM, save the new one
                    newQM.expoyear = year
                    newQM.save()
-                else:  # otherwise, print that it was ignored
-                    print((" - preserving " + str(preexistingQM) + ", which was edited in admin \r"))
-
-            except QM.DoesNotExist:  # if there is no pre-existing QM, save the new one
-                newQM.expoyear = year
-                newQM.save()
-            nqms += 1
-        except KeyError:  # check on this one
-            message = f" ! - {qmPath} KeyError {str(line)} "
-            print(message)
-            DataIssue.objects.create(parser="QMs", message=message)
-            continue
-        except IndexError:
-            message = f" ! - {qmPath} IndexError {str(line)} "
-            print(message)
-            DataIssue.objects.create(parser="QMs", message=message)
-            continue
+                nqms += 1
+            except KeyError:  # check on this one
+                message = f" ! - {qmPath} KeyError {str(line)} "
+                print(message)
+                DataIssue.objects.create(parser="QMs", message=message)
+                continue
+            except IndexError:
+                message = f" ! - {qmPath} IndexError {str(line)} "
+                print(message)
+                DataIssue.objects.create(parser="QMs", message=message)
+                continue
    return nqms


 def parse_KH_QMs(kh, inputFile, ticked):
    """import QMs from the 1623-161 (Kaninchenhohle) html pages, different format"""
-    khQMs = open(os.path.join(settings.EXPOWEB, inputFile), "r")
-    khQMs = khQMs.readlines()
-    nqms = 0
-    for line in khQMs:
-        res = re.search(
-            r"name=\"[CB](?P<year>\d*)-(?P<cave>\d*)-(?P<number>\d*).*</a> (?P<grade>[ABDCV])<dd>(?P<description>.*)\[(?P<nearest_station>.*)\]",
-            line,
-        )
-        if res:
-            res = res.groupdict()
-            year = int(res["year"])
-            # logbook placeholder code was previously here. No longer needed.
-            # check if placeholder exists for given year, create it if not
-            # message = " ! - "+ str(year) + " logbook: placeholder entry for '161 KH' created. DUMMY EXPEDITION ID. Should be re-attached to the actual trip."
-            # placeholder, hadToCreate = LogbookEntry.objects.get_or_create(date__year=year, place="161", title="placeholder for QMs in 161", text=message, entry_type="DUMMY", expedition_id=1, defaults={"date": date((year), 1, 1),"cave_slug":str(kh)})
-            # # if hadToCreate:
-            # print(message)
-            # DataIssue.objects.create(parser='QMs', message=message)
-            lookupArgs = {
-                #'found_by':placeholder,
-                "blockname": "",
-                "expoyear": year,
-                "number": res["number"],
-                "cave": kh,
-                "grade": res["grade"],
-            }
-            nonLookupArgs = {
-                "ticked": ticked,
-                "nearest_station_name": res["nearest_station"],
-                "location_description": res["description"],
-            }
-            instance, created = save_carefully(QM, lookupArgs, nonLookupArgs)
-            # if created:
-            # message = f" ! - {instance.code()} QM entry for '161 KH' created. ticked: {ticked}"
-            # print(message)
-            # DataIssue.objects.create(parser='QMs', message=message)
-            nqms += 1
+    with open(os.path.join(settings.EXPOWEB, inputFile), "r") as khQMfile:
+        khQMs = khQMfile.readlines()
+        nqms = 0
+        for line in khQMs:
+            res = re.search(
+                r"name=\"[CB](?P<year>\d*)-(?P<cave>\d*)-(?P<number>\d*).*</a> (?P<grade>[ABDCV])<dd>(?P<description>.*)\[(?P<nearest_station>.*)\]",
+                line,
+            )
+            if res:
+                res = res.groupdict()
+                year = int(res["year"])
+               
+                lookupAttribs = {
+                    #'found_by':placeholder,
+                    "blockname": "",
+                    "expoyear": year,
+                    "number": res["number"],
+                    "cave": kh,
+                    "grade": res["grade"],
+                }
+                nonLookupAttribs = {
+                    "ticked": ticked,
+                    "nearest_station_name": res["nearest_station"],
+                    "location_description": res["description"],
+                }
+                # Create new. We know it doesn't exist as we deleted evrything when we started.
+                instance = QM.objects.create(**nonLookupAttribs, **lookupAttribs)
+                
+                nqms += 1
    return nqms