attempt to simplify wnt horribly wrong

2026-02-20 21:35:20 +00:00 · 2020-06-06 22:51:55 +01:00
parent f8a3c8f5bc
commit fda50ed570
9 changed files with 135 additions and 88 deletions
--- a/core/TESTS/tests.py
+++ b/core/TESTS/tests.py
@@ -51,7 +51,16 @@ class SimpleTest(SimpleTestCase):
        from troggle.core.models_caves import CaveSlug, Cave, CaveAndEntrance, QM, CaveDescription, EntranceSlug, Entrance, Area, SurvexStation
        from troggle.core.forms import CaveForm, CaveAndEntranceFormSet, VersionControlCommentForm, EntranceForm, EntranceLetterForm
        from troggle.helper import login_required_if_public
-
+    def test_import_parses_mix(self):
+        import troggle.parsers.survex 
+        import troggle.parsers.caves
+        import troggle.settings
+        import troggle.flatpages.models
+        import troggle.logbooksdump
+        import troggle.parsers.people
+        import troggle.parsers.surveys
+        import troggle.parsers.logbooks
+        import troggle.parsers.QMs

 __test__ = {"doctest": """
 Another way to test that 1 + 1 is equal to 2.
--- a/core/views_other.py
+++ b/core/views_other.py
@@ -15,6 +15,7 @@ from troggle.core.models_survex import SurvexLeg
 from troggle.helper import login_required_if_public
 from troggle.core.forms import UploadFileForm

+print("** importing troggle/core/views_other.py")

 def showrequest(request):
    return HttpResponse(request.GET)
@@ -57,11 +58,12 @@ def controlPanel(request):
    jobs_completed=[]
    if request.method=='POST':
        if request.user.is_superuser:
+            # NONE of this works now that databaseReset has been so extensively rewritten
    
            #importlist is mostly here so that things happen in the correct order.
            #http post data seems to come in an unpredictable order, so we do it this way.
            importlist=['reinit_db', 'import_people', 'import_caves', 'import_logbooks',
-            'import_survexblks', 'import_QMs', 'import_survexpos', 'import_surveyscans', 'import_tunnelfiles']
+            'import_survexblks', 'import_QMs', 'import_surveyscans', 'import_tunnelfiles']
            databaseReset.dirsredirect()
            for item in importlist:
                if item in request.POST:
--- a/databaseReset.py
+++ b/databaseReset.py
@@ -1,5 +1,4 @@
-from __future__ import (absolute_import, division,
-                        print_function)
+import sys
 import os
 import time
 import timeit
@@ -16,9 +15,18 @@ from django.http import HttpResponse
 from django.core.urlresolvers import reverse

 from troggle.core.models_caves import Cave, Entrance
-import troggle.settings
+import troggle.parsers.caves
+#import troggle.settings
 import troggle.flatpages.models
 import troggle.logbooksdump
+import troggle.parsers.people
+import troggle.parsers.surveys
+import troggle.parsers.logbooks
+import troggle.parsers.QMs
+
+import troggle.core.models
+import troggle.core.models_survex
+import django

 # NOTE databaseReset.py is *imported* by views_other.py as it is used in the control panel
 # presented there.
@@ -31,9 +39,9 @@ if os.geteuid() == 0:
 expouser=settings.EXPOUSER
 expouserpass=settings.EXPOUSERPASS
 expouseremail=settings.EXPOUSER_EMAIL
+print(" - SETTINGS: {} ({:.5}...) <{}> on module loading".format(expouser, expouserpass, expouseremail))
+

-def call_django_tests(n):
-    management.call_command('test', verbosity=n)

 def reinit_db():
    """Rebuild database from scratch. Deletes the file first if sqlite is used,
@@ -51,6 +59,7 @@ def reinit_db():
        cursor.execute("CREATE DATABASE %s" % currentdbname)
        cursor.execute("ALTER DATABASE %s CHARACTER SET=utf8" % currentdbname)
        cursor.execute("USE %s" % currentdbname)
+    print(" - SETTINGS: {} ({:.5}...) <{}> before calling syncuser()".format(expouser, expouserpass, expouseremail))
    syncuser()

 def syncuser():
@@ -74,37 +83,32 @@ def dirsredirect():
        f.save()

 def import_caves():
-    import troggle.parsers.caves
    print("Importing Caves")
    troggle.parsers.caves.readcaves()

 def import_people():
-    import troggle.parsers.people
    print("Importing People (folk.csv)")
    troggle.parsers.people.LoadPersonsExpos()

+def import_surveyscans():
+    print("Importing Survey Scans")
+    troggle.parsers.surveys.LoadListScans()
+
 def import_logbooks():
-    import troggle.parsers.logbooks
    print("Importing Logbooks")
    troggle.parsers.logbooks.LoadLogbooks()

 def import_QMs():
    print("Importing QMs (old caves)")
-    import troggle.parsers.QMs
-    # import process itself runs on qm.csv in only 3 old caves, not the modern ones!
-
-def import_surveyscans():
-    import troggle.parsers.surveys
-    print("Importing Survey Scans")
-    troggle.parsers.surveys.LoadListScans()
+    troggle.parsers.QMs.Load_QMs()

 def import_survexblks():
+    # when this import is moved to the top with the rest it all crashes horribly
    import troggle.parsers.survex 
    print("Importing Survex Blocks")
    troggle.parsers.survex.LoadAllSurvexBlocks()

 def import_survexpos(): 
-    import troggle.parsers.survex
    print("Importing Survex x/y/z Positions")
    troggle.parsers.survex.LoadPos()

@@ -117,7 +121,6 @@ def import_surveyimgs():
    #troggle.parsers.surveys.parseSurveys(logfile=settings.LOGFILE)

 def import_tunnelfiles():
-    import troggle.parsers.surveys
    print("Importing Tunnel files")
    troggle.parsers.surveys.LoadTunnelFiles()

@@ -140,6 +143,10 @@ class JobQueue():
    """A list of import operations to run. Always reports profile times
    in the same order. 
    """
+    dbengine = ""
+    dbname = ""
+    dbdefault =""
+    
    def __init__(self,run):
        self.runlabel = run
        self.queue = [] # tuples of (jobname, jobfunction)
@@ -217,48 +224,20 @@ class JobQueue():
        jobend = time.time()
        jobduration = jobend-jobstart
        print("** Ended job   %s  -  %.1f seconds total." % (self.runlabel,jobduration))
-        
        return True

+    def store_dbsettings(self):
+        self.dbengine = settings.DATABASES['default']['ENGINE']
+        self.dbname = settings.DATABASES['default']['NAME']
+        self.dbdefault = settings.DATABASES['default']

-    def run(self):
-        """First runs all the jobs in the queue against a scratch in-memory db
-        then re-runs the import against the db specified in settings.py
-        Default behaviour is to skip the in-memory phase.
-        When MySQL is the db the in-memory phase crashes as MySQL does not properly
-        relinquish some kind of db connection (not fixed yet)
-        """
-        self.loadprofiles()
-        # save db settings for later
-        dbengine = settings.DATABASES['default']['ENGINE']
-        dbname = settings.DATABASES['default']['NAME']
-        dbdefault = settings.DATABASES['default']
+    def restore_dbsettings(self):
+        settings.DATABASES['default'] = self.dbdefault
+        settings.DATABASES['default']['ENGINE'] = self.dbengine
+        settings.DATABASES['default']['NAME'] = self.dbname

-        skipmem = False
-        if self.runlabel:
-            if self.runlabel == "":
-                skipmem = True
-            elif self.runlabel[0:2] == "F-":
-                skipmem = True
-        else:
-            skipmem = True
-        
-        print("--  ", settings.DATABASES['default']['NAME'], settings.DATABASES['default']['ENGINE'])
-        #print "--  DATABASES.default", settings.DATABASES['default']
-        
-        if dbname ==":memory:":
-            # just run, and save the sql file
-            self.runqonce()
-            self.memdumpsql() # saved contents of scratch db, could be imported later..
-            self.saveprofiles()
-        elif skipmem:
-            self.runqonce() 
-            self.saveprofiles()
-        else:
+    def set_in_memory_dbsettings(self):
        django.db.close_old_connections() # needed if MySQL running?
-            # run all the imports through :memory: first
-            settings.DATABASES['default']['ENGINE'] = 'django.db.backends.sqlite3'
-            settings.DATABASES['default']['NAME'] = ":memory:"
        settings.DATABASES['default'] = {'ENGINE': 'django.db.backends.sqlite3', 
            'AUTOCOMMIT': True, 
            'ATOMIC_REQUESTS': False, 
@@ -271,9 +250,54 @@ class JobQueue():
            'TEST': {'COLLATION': None, 'CHARSET': None, 'NAME': None, 'MIRROR': None}, 
            'PASSWORD': '', 
            'PORT': ''}
+        settings.DATABASES['default']['ENGINE'] = 'django.db.backends.sqlite3'
+        settings.DATABASES['default']['NAME'] = ":memory:"

+    def append_placeholders(self):
+        for j in self.results_order:
+            self.results[j].append(None) # append a placeholder

-            print("--  ", settings.DATABASES['default']['NAME'], settings.DATABASES['default']['ENGINE'])
+    def run_now_django_tests(self,n):
+        self.store_dbsettings()
+        # this leaves the db set to :memory: whatever it was initially
+        management.call_command('test', verbosity=n)
+        django.db.close_old_connections() 
+        self.restore_dbsettings()
+
+    def skip_memory_phase(self):
+        if not self.runlabel:
+            return True
+        else:
+            if self.runlabel == "" or self.runlabel[0:2] == "F-":
+                return True
+        return False
+
+    def run(self):
+        """First runs all the jobs in the queue against a scratch in-memory db
+        then re-runs the import against the db specified in settings.py
+        Default behaviour is to skip the in-memory phase.
+        When MySQL is the db the in-memory phase crashes as MySQL does not properly
+        relinquish some kind of db connection (not fixed yet)
+        """
+        self.loadprofiles()
+        self.store_dbsettings()
+
+        print("-- phase 0 ",  settings.DATABASES['default']['ENGINE'], settings.DATABASES['default']['NAME'])
+        #print "--  DATABASES.default", settings.DATABASES['default']
+        
+        if self.dbname ==":memory:":
+            # just run, and save the sql file
+            self.runqonce()
+            self.memdumpsql() # saved contents of scratch db, could be imported later..
+            self.saveprofiles()
+        elif self.skip_memory_phase():
+            self.runqonce() 
+            self.saveprofiles()
+        else:
+            # run all the imports through :memory: first
+            self.set_in_memory_dbsettings()
+
+            print("-- phase 1 ",  settings.DATABASES['default']['ENGINE'], settings.DATABASES['default']['NAME'])
            #print("--  DATABASES.default", settings.DATABASES['default'])

            # but because the user may be expecting to add this to a db with lots of tables already there,
@@ -281,7 +305,6 @@ class JobQueue():
            # because we are using an empty :memory: database
            # But initiating twice crashes it; so be sure to do it once only.

-
            # Damn. syncdb() is still calling MySQL somehow **conn_params not sqlite3. So crashes on expo server.
            if ("reinit",reinit_db) not in self.queue:
                reinit_db()
@@ -301,20 +324,18 @@ class JobQueue():
            # restore the original db and import again
            # if we wanted to, we could re-import the SQL generated in the first pass to be
            # blazing fast. But for the present just re-import the lot.
-            settings.DATABASES['default'] = dbdefault
-            settings.DATABASES['default']['ENGINE'] = dbengine
-            settings.DATABASES['default']['NAME'] = dbname
-            print("--  ", settings.DATABASES['default']['NAME'], settings.DATABASES['default']['ENGINE'])
+            self.restore_dbsettings()
+            print("-- phase 2 ",  settings.DATABASES['default']['ENGINE'], settings.DATABASES['default']['NAME'])
            
            django.db.close_old_connections() # maybe not needed here
            for j in self.results_order:
                self.results[j].pop() # throw away results from :memory: run
-                self.results[j].append(None) # append a placeholder
+            self.append_placeholders()

            django.db.close_old_connections() # magic rune. works. found by looking in django.db__init__.py
            #django.setup()  # should this be needed?
            
-            self.runqonce() # crashes because it thinks it has no migrations to apply, when it does.
+            self.runqonce() 
            self.saveprofiles()
    
        return True
@@ -405,9 +426,6 @@ def usage():
             """)

 if __name__ == "__main__":
-    import troggle.core.models
-    import sys
-    import django
    django.setup()

    if os.geteuid() == 0:
@@ -419,16 +437,16 @@ if __name__ == "__main__":
    else: 
        runlabel=None

-    call_django_tests(1)
    jq = JobQueue(runlabel)
+    jq.run_now_django_tests(1) 
    
    if len(sys.argv)==1:
        usage()
        exit()
    elif "test" in sys.argv:
-        call_django_tests(2)
        jq.enq("caves",import_caves)
        jq.enq("people",import_people)
+        #jq.run_now_django_tests(2)
    elif "caves" in sys.argv:
        jq.enq("caves",import_caves)
    elif "logbooks" in sys.argv:
@@ -466,6 +484,9 @@ if __name__ == "__main__":
 #       writeCaves()
    elif "profile" in sys.argv: 
        jq.loadprofiles()
+        # need to increment everything runq does
+        print("!! - days before appears as 0.00 - to be fixed")
+        jq.append_placeholders()
        jq.showprofile()
        exit()
    elif "help" in sys.argv:
--- a/parsers/QMs.py
+++ b/parsers/QMs.py
@@ -1,5 +1,3 @@
-# -*- coding: UTF-8 -*-
-
 import csv
 import os
 import re
@@ -113,7 +111,7 @@ def parse_KH_QMs(kh, inputFile):
 
            save_carefully(QM,lookupArgs,nonLookupArgs)
        
-
+def Load_QMs():
    parseCaveQMs(cave='stein',inputFile=r"1623/204/qm.csv")
    parseCaveQMs(cave='hauch',inputFile=r"1623/234/qm.csv")
    parseCaveQMs(cave='kh', inputFile="1623/161/qmtodo.htm")
--- a/parsers/caves.py
+++ b/parsers/caves.py
@@ -8,8 +8,8 @@ import troggle.core.models as models
 import troggle.core.models_caves as models_caves

 def readcaves():
-
  # Clear the cave data issues as we are reloading
+  # Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
  models.DataIssue.objects.filter(parser='caves').delete()

  area_1623 = models_caves.Area.objects.update_or_create(short_name = "1623", parent = None)
@@ -26,6 +26,7 @@ def readcaves():


 def readentrance(filename):
+  # Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
    with open(os.path.join(settings.ENTRANCEDESCRIPTIONS, filename)) as f:
        contents = f.read()
    context = "in file %s" % filename
@@ -89,6 +90,7 @@ def readentrance(filename):
                primary = False

 def readcave(filename):
+  # Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
    with open(os.path.join(settings.CAVEDESCRIPTIONS, filename)) as f:
        contents = f.read()
    context = " in file %s" % filename
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -292,7 +292,7 @@ def SetDatesFromLogbookEntries(expedition):
            persontrip.save()


-def LoadLogbookForExpedition(expedition):
+def LoadLogbookForExpedition(expedition,numentries):
    """ Parses all logbook entries for one expedition 
    """
    global logentries
@@ -358,8 +358,6 @@ def LoadLogbookForExpedition(expedition):
        else:
            print("   ! NO TRIP entries found in logbook, check the syntax.")

-        logentries=[] # flush for next year
-
    if logbook_cached:
        i=0
        for entrytuple in range(len(logentries)):
@@ -368,19 +366,33 @@ def LoadLogbookForExpedition(expedition):
            EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground,\
                entry_type)
            i +=1
-
+    return len(logentries)

 def LoadLogbooks():
    """ This is the master function for parsing all logbooks into the Troggle database. 
    """
    DataIssue.objects.filter(parser='logbooks').delete()
    expos = Expedition.objects.all()
+    if len(expos) <= 1:
+        print(" ! No expeditions found. Load 'people' first.")
    nologbook = ["1976", "1977","1978","1979","1980","1980","1981","1983","1984",
    "1985","1986","1987","1988","1989","1990",]
+    entries = {"2020": 0, "2019": 40, "2018": 148, "2017": 120, "2016": 162, "2015": 158, 
+        "2014": 130, "2013": 102, "2012": 150, "2011": 136, "2010": 44, "2009": 104, 
+        "2008": 98, "2007": 222, "2006": 48, "2005": 110, "2004": 152, "2003": 80, "2002": 62, 
+        "2001": 96, "2000": 108, "1999": 158, "1998": 86, "1997": 106, "1996": 188, "1995": 82, 
+        "1994": 64, "1993": 82, "1992": 122, "1991": 76, "1982": 76}
+    try:
+        os.remove("loadlogbk.log")
+    except OSError:
+        pass
+    with open("loadlogbk.log", "a") as log:
        for expo in expos:
            if expo.year not in nologbook:
                print((" - Logbook for: " + expo.year))
-            LoadLogbookForExpedition(expo)
+                numentries = LoadLogbookForExpedition(expo, entries[expo.year])
+                log.write("{} {} should be {}\n".format(expo.year, numentries, entries[expo.year]))
+


 dateRegex = re.compile(r'<span\s+class="date">(\d\d\d\d)-(\d\d)-(\d\d)</span>', re.S)
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -1,5 +1,3 @@
-from __future__ import (absolute_import, division,
-                        print_function)
 import sys
 import os
 import re
--- a/settings.py
+++ b/settings.py
@@ -19,6 +19,8 @@ import urllib.parse

 import django

+print("** importing troggle/settings.py")
+
 # Note that this builds upon the django system installed
 # global settings in
 # django/conf/global_settings.py which is automatically loaded first.
@@ -158,3 +160,4 @@ TEST_RUNNER = 'django.test.runner.DiscoverRunner'
 from localsettings import *

 #localsettings needs to take precedence. Call it to override any existing vars.
+print("** finished importing troggle/settings.py")
--- a/urls.py
+++ b/urls.py
@@ -42,6 +42,7 @@ actualurlpatterns = patterns('',
    url(r'^newfile', views_other.newFile, name="newFile"), # oddly broken, needs investigating more

    url(r'^getEntrances/(?P<caveslug>.*)', views_caves.get_entrances, name = "get_entrances"), #works
+    # e.g. /getEntrances/1623-161
    url(r'^getQMs/(?P<caveslug>.*)', views_caves.get_qms, name = "get_qms"), # no template "get_qms"?
    url(r'^getPeople/(?P<expeditionslug>.*)', views_logbooks.get_people, name = "get_people"), # fails
    url(r'^getLogBookEntries/(?P<expeditionslug>.*)', views_logbooks.get_logbook_entries, name = "get_logbook_entries"), #works
@@ -122,6 +123,7 @@ actualurlpatterns = patterns('',
    #(r'^survey_scans/(?P<path>.*)$', 'django.views.static.serve', {'document_root': settings.SURVEY_SCANS, 'show_indexes':True}),
    url(r'^survey_scans/$',                        view_surveys.surveyscansfolders, name="surveyscansfolders"), 
    url(r'^survey_scans/(?P<path>[^/]+)/$',        view_surveys.surveyscansfolder,  name="surveyscansfolder"),
+    # This next line is beyond daft. If anyone uploads a file *anywhere* in SURVEY_SCANS which doesn't match, troggle crashes horribly. Has been failing for pdf and JPG files for years:
    url(r'^survey_scans/(?P<path>[^/]+)/(?P<file>[^/]+(?:png|jpg|pdf|jpeg|PNG|JPG|PDF|JPEG))$', 
                                                   view_surveys.surveyscansingle,   name="surveyscansingle"),