Gawk horror (Was DiscoMarkSideways again.)

PJH

Continuing the discussion from 🕶 shadowmod queries:

apart from the apparent formatting that's going to be applied to that fenced block judging from the preview pane

Ok - I can't figure out what's going on there.

Suggestions please?

Sticking in Bug, unless someone can come up with a reasonable explanation when it could be moved to Meta. Or something...

RaceProUK

[pjh@sofa ~]$ (for year in 2000 2001; do for month in {1..12}; do psql -d discourse -c "select timestamp '$month/1/$year' s, timestamp '$month/1/$year' + interval '1 month' e"; done; done) | grep 200
 2000-01-01 00:00:00 | 2000-02-01 00:00:00
 2000-02-01 00:00:00 | 2000-03-01 00:00:00
 2000-03-01 00:00:00 | 2000-04-01 00:00:00
 2000-04-01 00:00:00 | 2000-05-01 00:00:00
 2000-05-01 00:00:00 | 2000-06-01 00:00:00
 2000-06-01 00:00:00 | 2000-07-01 00:00:00
 2000-07-01 00:00:00 | 2000-08-01 00:00:00
 2000-08-01 00:00:00 | 2000-09-01 00:00:00
 2000-09-01 00:00:00 | 2000-10-01 00:00:00
 2000-10-01 00:00:00 | 2000-11-01 00:00:00
 2000-11-01 00:00:00 | 2000-12-01 00:00:00
 2000-12-01 00:00:00 | 2001-01-01 00:00:00
 2001-01-01 00:00:00 | 2001-02-01 00:00:00
 2001-02-01 00:00:00 | 2001-03-01 00:00:00
 2001-03-01 00:00:00 | 2001-04-01 00:00:00
 2001-04-01 00:00:00 | 2001-05-01 00:00:00
 2001-05-01 00:00:00 | 2001-06-01 00:00:00
 2001-06-01 00:00:00 | 2001-07-01 00:00:00
 2001-07-01 00:00:00 | 2001-08-01 00:00:00
 2001-08-01 00:00:00 | 2001-09-01 00:00:00
 2001-09-01 00:00:00 | 2001-10-01 00:00:00
 2001-10-01 00:00:00 | 2001-11-01 00:00:00
 2001-11-01 00:00:00 | 2001-12-01 00:00:00
 2001-12-01 00:00:00 | 2002-01-01 00:00:00
[pjh@sofa ~]$

RaceProUK

[pjh@sofa ~]$ (for year in 2000 2001; do for month in {1..12}; do psql -d discourse -c "select timestamp '$month/1/$year' s, timestamp '$month/1/$year' + interval '1 month' e"; done; done) | grep 200
 2000-01-01 00:00:00 | 2000-02-01 00:00:00
 2000-02-01 00:00:00 | 2000-03-01 00:00:00
 2000-03-01 00:00:00 | 2000-04-01 00:00:00
 2000-04-01 00:00:00 | 2000-05-01 00:00:00
 2000-05-01 00:00:00 | 2000-06-01 00:00:00
 2000-06-01 00:00:00 | 2000-07-01 00:00:00
 2000-07-01 00:00:00 | 2000-08-01 00:00:00
 2000-08-01 00:00:00 | 2000-09-01 00:00:00
 2000-09-01 00:00:00 | 2000-10-01 00:00:00
 2000-10-01 00:00:00 | 2000-11-01 00:00:00
 2000-11-01 00:00:00 | 2000-12-01 00:00:00
 2000-12-01 00:00:00 | 2001-01-01 00:00:00
 2001-01-01 00:00:00 | 2001-02-01 00:00:00
 2001-02-01 00:00:00 | 2001-03-01 00:00:00
 2001-03-01 00:00:00 | 2001-04-01 00:00:00
 2001-04-01 00:00:00 | 2001-05-01 00:00:00
 2001-05-01 00:00:00 | 2001-06-01 00:00:00
 2001-06-01 00:00:00 | 2001-07-01 00:00:00
 2001-07-01 00:00:00 | 2001-08-01 00:00:00
 2001-08-01 00:00:00 | 2001-09-01 00:00:00
 2001-09-01 00:00:00 | 2001-10-01 00:00:00
 2001-10-01 00:00:00 | 2001-11-01 00:00:00
 2001-11-01 00:00:00 | 2001-12-01 00:00:00
 2001-12-01 00:00:00 | 2002-01-01 00:00:00
[pjh@sofa ~]$

RaceProUK

Ah, there you go: you have a stray ` at the end ;)

Onyx

@RaceProUK said:

Ah, there you go: you have a stray ` at the end ;)

There... there were enough. Anything extra should just fucking render. Not break everything before it!

PJH

Ok - in my defence it's early, I've only just started on my 2nd pint of coffee, and I've been dealing with semi-manually importing logfiles into a SQL database which, among the greater WTF's that actually require this to be done, involves the following bit of bash script:

        gawk -v mysql_table=${mysql_table} '
        BEGIN {
        # https://www.gnu.org/software/gawk/manual/html_node/Splitting-By-Content.html - split the output from above
                FPAT = "([^, ]+)|(\"[^\"]+\")"
        }

        {
                # $5 is the date in the format
                # Wed Mar 18 09:51:03 GMT 2015
                # We need it in:
                # YYYY MM DD HH MM SS [DST]"
                # for mktime() to translate into epoch

                len=length($5)
                t = substr($5, 2, len - 2)
                split(t, d, "[ :]") # Wed Mar 18 09,53,44 GMT 2015
                match("JanFebMarAprMayJunJulAugSepOctNovDec",d[2])
                d[2] = (RSTART+2)/3
                epoch=mktime(d[8] " " d[2] " " d[3] " " d[4] " " d[5] " " d[6]  " " d[7])

        #       printf("%s, %s, %s, %s, %s, %s, %s, %s\n", $1, $2, $3, $4, epoch, $6, $7, mysql_table)
                printf("INSERT IGNORE INTO %s (desig, device, key, t_power, t_report, rx, tx) VALUES (%s, %s, %s, %s, %s, %s, %s);\n",
                        mysql_table,
                        $1, $2, $3, $4, epoch, $6, $7);
        }' | mysql --host=${mysql_host} --database=${mysql_db} --user=${mysql_user} --password=${mysql_pass}

Wonder if the highlighting can do anything with that gawk script..

        BEGIN {
        # https://www.gnu.org/software/gawk/manual/html_node/Splitting-By-Content.html - split the output from above
                FPAT = "([^, ]+)|(\"[^\"]+\")"
        }

        {
                # $5 is the date in the format
                # Wed Mar 18 09:51:03 GMT 2015
                # We need it in:
                # YYYY MM DD HH MM SS [DST]"
                # for mktime() to translate into epoch

                len=length($5)
                t = substr($5, 2, len - 2)
                split(t, d, "[ :]") # Wed Mar 18 09,53,44 GMT 2015
                match("JanFebMarAprMayJunJulAugSepOctNovDec",d[2])
                d[2] = (RSTART+2)/3
                epoch=mktime(d[8] " " d[2] " " d[3] " " d[4] " " d[5] " " d[6]  " " d[7])

        #       printf("%s, %s, %s, %s, %s, %s, %s, %s\n", $1, $2, $3, $4, epoch, $6, $7, mysql_table)
                printf("INSERT IGNORE INTO %s (desig, device, key, t_power, t_report, rx, tx) VALUES (%s, %s, %s, %s, %s, %s, %s);\n",
                        mysql_table,
                        $1, $2, $3, $4, epoch, $6, $7);
        }

Hmm. It chose Perl.

tar

Here's some awk I happened to have lying around...

#!/usr/bin/awk -f

# usage:
#   prog src_file dep_file obj_file [opts], where src_file is a .c or .cpp file.

# options:
#   -I   add search path for include files (like compiler option).
#   -G   set a path for autogenerated include files to be written to.
#        Dependency information will be generated so that make will try to build
#        these files.
#   -V   Print lots of debugging information

# This script scans the #include statements in the specified file, then scans
# all included files for further includes until no further includes can be found.
# only #include "xxx" directives are followed; #include <xxx> are ignored.

# The output is in make dependency format to the file specified by 'dep_file':
#   obj_file dep_file: src_file <discovered headers...>

function error(msg) {
    print FILENAME ":" FNR ": " msg
    fail = 1
    exit 1
}


function printvrb(msg) {
    if(verbose) { print "..." msg }
}


function file_exists(path) {
#    return 0 == system("test -f " path) #this test is slower but may be more portable?
    return (getline _xxx_ < path) >= 0
}


function path_to_file(file,
    # locals
    n, i, arr) {

    n = split(file, arr, "/")
    file = ""
    for(i = 1; i < n; i++) {
        file = file arr[i]
        if(i < (n - 1)) {
            file = file "/"
        }
    }
    return file
}


BEGIN {
    verbose = 0
    fail = 0
    src_file = ARGV[1]
    dep_file = ARGV[2]; ARGV[2] = ""
    obj_file = ARGV[3]; ARGV[3] = ""

    for(i = 1; i < ARGC; i++) {
        if("-I" == ARGV[i]) {
            #add header search path (we leave slot 0 open, see later)
            search_path[++search_path_count] = ARGV[i + 1]
            ARGV[i++] = ""
            ARGV[i] = ""
        }
        if("-G" == ARGV[i]) {
            #set autogen path
            gen_path = ARGV[i + 1]
            search_path[++search_path_count] = ARGV[i + 1]
            ARGV[i++] = ""
            ARGV[i] = ""
        }
        if("-V" == ARGV[i]) {
            verbose = 1
            ARGV[i++] = ""
        }
    }

    print "[SRC] Calculating dependencies for '" src_file "'"
}


(1 == FNR) {
    printvrb("scanning '"FILENAME"'")

    # To behave like a C compiler, we need to be able to search for #include "x"
    # files in the same directory as the file we are currently scanning.
    # Add the current file's path to the search path. Place it at position 0 so
    # we always check here first.
    printvrb("adding cwd to search_path: '" path_to_file(FILENAME) "'")
    search_path[0] = path_to_file(FILENAME)

    if(1 == index(FILENAME, gen_path)) {
        # asssume that autogen files can reference other autogenerated files
        # without needing an explicit comment
        autogen = 1
    }
}


/^[ \t]*#[ \t]*include[ \t]*"/ {
    found = 0
    autogen = 0

    # find filename
    split($0, arr, "\""); file = arr[2]

    if(0 == autogen) {
        # look for a comment after the include file containing the word 'autogen'
        # (case-insensitive match).
        split($0, arr, "//"); comment = arr[2] # C++-style // comment
        if("" == comment) {
            split($0, arr, "/\\*"); comment = arr[2] # C style /*...*/
        }
        autogen = (index(tolower(comment),"autogen") > 0)
    }

    printvrb("searching for '" file "'")
    for(i = 0; i <= search_path_count; i++) {
        newfile = search_path[i] "/" file

        if(!(newfile in file_arr)) {
            # Test if the file actually exists.

            if(file_exists(newfile)) {
                file_arr[newfile] = 1

                #add to list of files to scan
                printvrb("adding '" newfile "' to scan list")
                ARGV[ARGC++] = newfile
                close(newfile)
                found = 1
                break
            }
        }
        else {
            found = 1 # we've already found this file
            break
        }
    }

    printvrb("file '" file "' " (found ? "found as '" newfile "'" : "is autogen?"))

    if(!found) {
        # To support autogenerated headers, they should be added to the dependency
        # list, but don't actually try to scan them, unless they already exist.
        # (make is smart enough to run this script again to regenerate the
        # dependency files during the build process as new files become available.)
        # As we didn't find the file, we have to assume it will end up in gen_path

        if(autogen) {
            genfile = gen_path "/" file
            file_arr[genfile] = 1
            if(!(genfile in file_arr)) {
                if(file_exists(genfile)) {
                    # scan this if it exists
                    printvrb("adding '" genfile "' to scan list")
                    ARGV[ARGC++] = genfile
                    close(genfile)
                }
                else {
                    error(file " was not found and is not tagged as autogenerated.\n")
                }
            }
        }
    }
}


END {
    if(!fail) {
        printvrb("generating '" dep_file "'")
        print "# Autogenerated dependencies for " src_file > dep_file
        print obj_file " \\\n" dep_file ": \\\n\t" src_file "  \\" > dep_file
        for(file in file_arr) {
            print "\t" file " \\" > dep_file
        }

        print "# End of dependencies (" dep_file ")" > dep_file
    }
}

Indentation seems all kinds of messed up, though @discoursebot...

discoursebot

@tar - Last Day Without A Discourse Bug: null