#!/bin/sh
echo preparing table of svx *ref links to wallets
# Checks that *ref in .svx files link to valid surveyscans folders
# Just prepares the data as a table, does no error checking itself.
# The validation checking is done by check-refs.sh & check-refs.awk
# 2020-04-05 Philip Sargent
#
# usage:
# cd /loser/
# ./check-svx.sh 
#
# To make this run in a cron job on the server, we need to do:
# 1. make the paths correct for the server in chk-refs.sh
# 2020-04-13
#
# Many formats used in svx files for *ref statements:
# ; ref.:
# ;ref.:
# ; Ref.:
# ; Ref.
# ; Referenz.:
# ;Referenz.:
# ; ref:
# ; ref
# *Ref
# *ref
#   *ref
# *include galatica ; ref 
# *ref 2040#31 ; (additonal comment....)
#
find . -name "*.svx" -type f -print0 | xargs -0 -n 1 awk '
function ltrim(s) { sub(/^[ \t\r\n]+/, "", s); return s } # remove leading whitespace
function rtrim(s) { sub(/[ \t\r\n]+$/, "", s); return s } # remove trailing whitespace
function trim(s)  { return rtrim(ltrim(s)); }
BEGIN {
    reftag = "^([ ]*[;]|[*])[ ]?[Rr]ef(erenz)?[.]?[ ]?[:]?"
    spctag = "[1-2][0-9][0-9][0-9] #X?[0-9][0-9]"
    walltag = "[1-2][0-9][0-9][0-9]#X?[0-9][0-9]"
    inctag = "\\*include .*;[ ]*ref"
    }
$0 ~ reftag {
    gsub(reftag,"") # remove the *ref tag
    $0=trim($0) 
    wallet=match($0,spctag) # matches 2014 #03
    if (wallet != 0) {
        $0=substr($0,0,wallet+3) substr($0,wallet+5,length($0)) # makes 2014#03
        }
    wallet=match($0,walltag) # matches 2014#X03
    if (wallet != 0) { # insert a space after the wallet code
        $0=substr($0,0,wallet+RLENGTH-1) " " substr($0,wallet+RLENGTH,length($0)) 
        }
    
    svxfile=ARGV[1]
    print substr(svxfile,3,length(svxfile)), $0 # chop off the "./"
    }
$0 ~ inctag { #Also references from *include statements:
    $0=trim($0) 
    svxfile=ARGV[1]
    svxfile = substr(svxfile,3,length(svxfile)) # chop off the "./"
    wallet = $5
    refsvxfile = $2
    printf("%s %s %s  INCLUDE\n",svxfile, wallet, refsvxfile)
}
END {
    }
' > svx-refs