|
| 1 | +#lang pollen |
| 2 | + |
| 3 | +◊page-init{} |
| 4 | +◊define-meta[page-title]{Awk Micro-Primer} |
| 5 | +◊define-meta[page-description]{Awk Micro-Primer} |
| 6 | + |
| 7 | +Awk is a full-featured text processing language with a syntax |
| 8 | +reminiscent of C. While it possesses an extensive set of operators and |
| 9 | +capabilities, we will cover only a few of these here - the ones most |
| 10 | +useful in shell scripts. ◊footnote{Its name derives from the initials |
| 11 | +of its authors, Aho, Weinberg, and Kernighan.} |
| 12 | + |
| 13 | +Awk breaks each line of input passed to it into fields. By default, a |
| 14 | +field is a string of consecutive characters delimited by whitespace, |
| 15 | +though there are options for changing this. Awk parses and operates on |
| 16 | +each separate field. This makes it ideal for handling structured text |
| 17 | +files -- especially tables -- data organized into consistent chunks, |
| 18 | +such as rows and columns. |
| 19 | + |
| 20 | +Strong quoting and curly brackets enclose blocks of ◊command{awk} code |
| 21 | +within a shell script. |
| 22 | + |
| 23 | +◊example{ |
| 24 | +# $1 is field #1, $2 is field #2, etc. |
| 25 | + |
| 26 | +echo one two | awk '{print $1}' |
| 27 | +# one |
| 28 | + |
| 29 | +echo one two | awk '{print $2}' |
| 30 | +# two |
| 31 | + |
| 32 | +# But what is field #0 ($0)? |
| 33 | +echo one two | awk '{print $0}' |
| 34 | +# one two |
| 35 | +# All the fields! |
| 36 | + |
| 37 | + |
| 38 | +awk '{print $3}' $filename |
| 39 | +# Prints field #3 of file $filename to stdout. |
| 40 | + |
| 41 | +awk '{print $1 $5 $6}' $filename |
| 42 | +# Prints fields #1, #5, and #6 of file $filename. |
| 43 | + |
| 44 | +awk '{print $0}' $filename |
| 45 | +# Prints the entire file! |
| 46 | +# Same effect as: cat $filename . . . or . . . sed '' $filename |
| 47 | +} |
| 48 | + |
| 49 | +We have just seen the ◊command{awk} print command in action. The only |
| 50 | +other feature of ◊command{awk} we need to deal with here is |
| 51 | +variables. Awk handles variables similarly to shell scripts, though a |
| 52 | +bit more flexibly. |
| 53 | + |
| 54 | +◊example{ |
| 55 | +{ total += ${column_number} } |
| 56 | +} |
| 57 | + |
| 58 | +This adds the value of ◊ode{column_number} to the running total of |
| 59 | +◊code{total}. Finally, to ◊code{print "total"}, there is an ◊code{END} |
| 60 | +command block, executed after the script has processed all its input. |
| 61 | + |
| 62 | +◊example{ |
| 63 | +END { print total } |
| 64 | +} |
| 65 | + |
| 66 | +Corresponding to the END, there is a BEGIN, for a code block to be |
| 67 | +performed before awk starts processing its input. |
| 68 | + |
| 69 | +◊anchored-example[#:anchor "awk_cnt1"]{Counting Letter Occurrences} |
| 70 | + |
| 71 | +The following example illustrates how awk can add text-parsing tools |
| 72 | +to a shell script. |
| 73 | + |
| 74 | +◊example{ |
| 75 | +#! /bin/sh |
| 76 | +# letter-count2.sh: Counting letter occurrences in a text file. |
| 77 | +# |
| 78 | +# Script by nyal [[email protected]]. |
| 79 | +# Used in ABS Guide with permission. |
| 80 | +# Recommented and reformatted by ABS Guide author. |
| 81 | +# Version 1.1: Modified to work with gawk 3.1.3. |
| 82 | +# (Will still work with earlier versions.) |
| 83 | + |
| 84 | + |
| 85 | +INIT_TAB_AWK="" |
| 86 | +# Parameter to initialize awk script. |
| 87 | +count_case=0 |
| 88 | +FILE_PARSE=$1 |
| 89 | + |
| 90 | +E_PARAMERR=85 |
| 91 | + |
| 92 | +usage() |
| 93 | +{ |
| 94 | + echo "Usage: letter-count.sh file letters" 2>&1 |
| 95 | + # For example: ./letter-count2.sh filename.txt a b c |
| 96 | + exit $E_PARAMERR # Too few arguments passed to script. |
| 97 | +} |
| 98 | + |
| 99 | +if [ ! -f "$1" ] ; then |
| 100 | + echo "$1: No such file." 2>&1 |
| 101 | + usage # Print usage message and exit. |
| 102 | +fi |
| 103 | + |
| 104 | +if [ -z "$2" ] ; then |
| 105 | + echo "$2: No letters specified." 2>&1 |
| 106 | + usage |
| 107 | +fi |
| 108 | + |
| 109 | +shift # Letters specified. |
| 110 | +for letter in `echo $@` # For each one . . . |
| 111 | + do |
| 112 | + INIT_TAB_AWK="$INIT_TAB_AWK tab_search[${count_case}] = \ |
| 113 | + \"$letter\"; final_tab[${count_case}] = 0; " |
| 114 | + # Pass as parameter to awk script below. |
| 115 | + count_case=`expr $count_case + 1` |
| 116 | +done |
| 117 | + |
| 118 | +# DEBUG: |
| 119 | +# echo $INIT_TAB_AWK; |
| 120 | + |
| 121 | +cat $FILE_PARSE | |
| 122 | +# Pipe the target file to the following awk script. |
| 123 | + |
| 124 | +# --------------------------------------------------------------------- |
| 125 | +# Earlier version of script: |
| 126 | +# awk -v tab_search=0 -v final_tab=0 -v tab=0 -v \ |
| 127 | +# nb_letter=0 -v chara=0 -v chara2=0 \ |
| 128 | + |
| 129 | +awk \ |
| 130 | +"BEGIN { $INIT_TAB_AWK } \ |
| 131 | +{ split(\$0, tab, \"\"); \ |
| 132 | +for (chara in tab) \ |
| 133 | +{ for (chara2 in tab_search) \ |
| 134 | +{ if (tab_search[chara2] == tab[chara]) { final_tab[chara2]++ } } } } \ |
| 135 | +END { for (chara in final_tab) \ |
| 136 | +{ print tab_search[chara] \" => \" final_tab[chara] } }" |
| 137 | +# --------------------------------------------------------------------- |
| 138 | +# Nothing all that complicated, just . . . |
| 139 | +#+ for-loops, if-tests, and a couple of specialized functions. |
| 140 | + |
| 141 | +exit $? |
| 142 | + |
| 143 | +# Compare this script to letter-count.sh. |
| 144 | +} |
0 commit comments