#!/bin/sh
# see also `man awk`

## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## AWK
## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

# awk (1) - pattern scanning and text processing language

# print each line (implicit)
awk '{ print }' /etc/passwd | head
# root:x:0:0:root:/root:/bin/bash
# daemon:x:1:1:daemon:/usr/sbin:/usr/sbin/nologin
# bin:x:2:2:bin:/bin:/usr/sbin/nologin
# sys:x:3:3:sys:/dev:/usr/sbin/nologin
# sync:x:4:65534:sync:/bin:/bin/sync
# games:x:5:60:games:/usr/games:/usr/sbin/nologin
# man:x:6:12:man:/var/cache/man:/usr/sbin/nologin
# lp:x:7:7:lp:/var/spool/lpd:/usr/sbin/nologin
# mail:x:8:8:mail:/var/mail:/usr/sbin/nologin
# news:x:9:9:news:/var/spool/news:/usr/sbin/nologin

# print each line (explicit)
awk '{ print $0 }' /etc/passwd | head
# root:x:0:0:root:/root:/bin/bash
# daemon:x:1:1:daemon:/usr/sbin:/usr/sbin/nologin
# bin:x:2:2:bin:/bin:/usr/sbin/nologin
# sys:x:3:3:sys:/dev:/usr/sbin/nologin
# sync:x:4:65534:sync:/bin:/bin/sync
# games:x:5:60:games:/usr/games:/usr/sbin/nologin
# man:x:6:12:man:/var/cache/man:/usr/sbin/nologin
# lp:x:7:7:lp:/var/spool/lpd:/usr/sbin/nologin
# mail:x:8:8:mail:/var/mail:/usr/sbin/nologin
# news:x:9:9:news:/var/spool/news:/usr/sbin/nologin

# print each line (stdin)
head /etc/passwd | awk '{ print $0 }'
# root:x:0:0:root:/root:/bin/bash
# daemon:x:1:1:daemon:/usr/sbin:/usr/sbin/nologin
# bin:x:2:2:bin:/bin:/usr/sbin/nologin
# sys:x:3:3:sys:/dev:/usr/sbin/nologin
# sync:x:4:65534:sync:/bin:/bin/sync
# games:x:5:60:games:/usr/games:/usr/sbin/nologin
# man:x:6:12:man:/var/cache/man:/usr/sbin/nologin
# lp:x:7:7:lp:/var/spool/lpd:/usr/sbin/nologin
# mail:x:8:8:mail:/var/mail:/usr/sbin/nologin
# news:x:9:9:news:/var/spool/news:/usr/sbin/nologin

# print 1st field at each line (fields separated with spaces by default)
head /etc/passwd | awk '{ print $1 }'
# root:x:0:0:root:/root:/bin/bash
# daemon:x:1:1:daemon:/usr/sbin:/usr/sbin/nologin
# bin:x:2:2:bin:/bin:/usr/sbin/nologin
# sys:x:3:3:sys:/dev:/usr/sbin/nologin
# sync:x:4:65534:sync:/bin:/bin/sync
# games:x:5:60:games:/usr/games:/usr/sbin/nologin
# man:x:6:12:man:/var/cache/man:/usr/sbin/nologin
# lp:x:7:7:lp:/var/spool/lpd:/usr/sbin/nologin
# mail:x:8:8:mail:/var/mail:/usr/sbin/nologin
# news:x:9:9:news:/var/spool/news:/usr/sbin/nologin

# print 1st field at each line (fields separated with ':')
head /etc/passwd | awk -F: '{ print $1 }'
# root
# daemon
# bin
# sys
# sync
# games
# man
# lp
# mail
# news

# print 1st and 7th fields with given format
head /etc/passwd | awk -F: '{ printf "%-20s%s\n", $1, $7 }'
# root                /bin/bash
# daemon              /usr/sbin/nologin
# bin                 /usr/sbin/nologin
# sys                 /usr/sbin/nologin
# sync                /bin/sync
# games               /usr/sbin/nologin
# man                 /usr/sbin/nologin
# lp                  /usr/sbin/nologin
# mail                /usr/sbin/nologin
# news                /usr/sbin/nologin

# print 1st field at each line (fields separated with ':' using FS variable)
head /etc/passwd | awk 'BEGIN { FS=":" } { print $1 }'
# root
# daemon
# bin
# sys
# sync
# games
# man
# lp
# mail
# news

# print argument count at the beginning
awk 'BEGIN { print ARGC }' /etc/passwd
# 2

# print arguments at the beginning
awk 'BEGIN { for (i = 0; i < ARGC; i++) print ARGV[i] }' /etc/passwd
# awk
# /etc/passwd

# print arguments at the beginning (multi-line syntax)
awk '
BEGIN {
    for (i = 0; i < ARGC; i++) {
        print ARGV[i]
    }
}' /etc/passwd
# awk
# /etc/passwd

# print value of 'HOME' environment variable
awk 'BEGIN { print ENVIRON["HOME"] }'
# /home/gokce

# print number of records and number of fields (records are lines by default)
head /etc/passwd | awk -F: '{ printf "%d - %d\n", NR, NF }'
# 1 - 7
# 2 - 7
# 3 - 7
# 4 - 7
# 5 - 7
# 6 - 7
# 7 - 7
# 8 - 7
# 9 - 7
# 10 - 7

# print lines with 'bash' pattern (implicit printing)
cat /etc/passwd | awk -F: '/bash/'
# root:x:0:0:root:/root:/bin/bash
# gokce:x:1000:1000:gokce,,,:/home/gokce:/bin/bash

# print lines with 'bash' pattern (explicit printing)
cat /etc/passwd | awk -F: '/bash/ { print $0 }'
# root:x:0:0:root:/root:/bin/bash
# gokce:x:1000:1000:gokce,,,:/home/gokce:/bin/bash

# print 1st field of each line with 'bash' pattern
cat /etc/passwd | awk -F: '/bash/ { print $1 }'
# root
# gokce

# print 1st field of each line where 7th field is '/bin/bash'
cat /etc/passwd | awk -F: '$7 == "/bin/bash" { print $1 }'
# root
# gokce

# print 1st field of each line from 1st line to 5th line
cat /etc/passwd | awk -F: 'NR == 1, NR == 5 { print $1 }'
# root
# daemon
# bin
# sys
# sync

# increment 'numlines' at each line and print at the end
cat /etc/passwd | awk '
{ numlines++ }
END { print numlines }'
# 41

# count number of users for each shell
cat /etc/passwd | awk -F: '
{ shell[$7]++ }
END { for (i in shell) printf "%-20s%d\n", i, shell[i] }'
# /bin/sync           1
# /bin/bash           2
# /bin/false          22
# /usr/sbin/nologin   16

# run external 'date' command and read its stdout
awk 'BEGIN {
    "date" | getline line
    print line
    close("date")
}'
# Thu Oct 13 21:04:58 EEST 2016

# run external 'date -u' command and read its stdout
awk 'BEGIN {
    cmd = "date -u"
    cmd | getline line
    print line
    close(cmd)
}'
# Thu Oct 13 18:05:03 UTC 2016

# run external 'cat' command and write to its stdin
awk 'BEGIN {
    print "hello world" | "cat"
    close("cat")
}'
# hello world

# run a shell command by writing to 'sh' stdin
awk 'BEGIN {
    print "ls -l /bin/echo" | "sh"
    close("sh")
}'
# -rwxr-xr-x 1 root root 31376 Feb 18  2016 /bin/echo

# run a shell command
awk 'BEGIN { system("ls -l /bin/echo") }'
# -rwxr-xr-x 1 root root 31376 Feb 18  2016 /bin/echo

# define a 'fact' function and use it to calculate factorial of 5
awk '
BEGIN {
    print fact(5)
}

function fact(n) {
    if (n < 2)
        return 1;
    return n * fact(n-1);
}'
# 120

# calculate mean of random numbers
seq 100 | shuf | head | awk '
{ sum += $0 } 
END { print sum / NR }'
# 72.7

# define 'mean' as alias (escape '$' characters inside double quotes)
alias mean="awk '{ sum += \$0 } END { print sum / NR }'"

# use 'mean' alias
seq 100 | shuf | head | mean
# 49.2

# calculate stdev of random numbers
seq 100 | shuf | head | awk '
{ sum += $0; sumsq += $0^2 }
END { print sqrt(NR * sumsq - sum^2) / NR; }'
# 29.3966

# define 'stdev' as alias (escape '$' characters inside double quotes)
alias stdev="awk '{ sum += \$0; sumsq += \$0^2 } END { print sqrt(NR * sumsq - sum^2) / NR; }'"

# use 'stdev' alias
seq 100 | shuf | head | stdev
# 27.7027

# generate a text with 100 random words from 20 random words
shuf /usr/share/dict/words | head -20 | shuf -r | head -100 | fmt
# updating poignantly poignantly lobotomies Intel's seesaw's baas vise's
# hurray's Intel's Grampians Grampians settlement whooshing intertwines
# Terrell lobotomies baas tiny Turing hurray's lobotomies hurray's idea's
# underwriter's Grampians seesaw's Turing Intel's whooshing vise's idea's
# Terrell baas underwriter's whooshing Turing Gorbachev freshets pluckier
# settlement lobotomies baas Terrell settlement pluckier freshets pluckier
# underwriter's Terrell baas lobotomies pluckier poignantly intertwines
# tiny idea's settlement pluckier tiny Terrell baas settlement pluckier
# idea's freshets vise's updating Grampians underwriter's settlement
# intertwines hurray's poignantly vise's underwriter's updating baas idea's
# intertwines Turing underwriter's underwriter's Grampians vise's baas
# Terrell Terrell poignantly Grampians hurray's seesaw's pluckier Turing
# lobotomies poignantly Terrell freshets underwriter's tiny

# word frequency (adapted from 'Effective awk Programming' by 'Arnold Robbins')
shuf /usr/share/dict/words | head -20 | shuf -r | head -100 | fmt | awk '
{
    $0 = tolower($0)
    gsub(/[^[:alnum:]_[:blank:]]/, "", $0)
    for (i = 1; i <= NF; i++) {
        freq[$i]++
        if (maxlen < length($i)) {
            maxlen = length($i)
        }
    }
}

END {
    sort = "sort -k2 -rn | head -n 5"
    for (word in freq) {
        printf "%-*s%d\n", maxlen, word, freq[word] | sort
    }
    close(sort)
}'
# walmarts    11
# hogs        7
# trousseaux  6
# trees       6
# priming     6