|
| 1 | +#lang pollen |
| 2 | + |
| 3 | +◊page-init{} |
| 4 | +◊define-meta[page-title]{Parsing Pathnames} |
| 5 | +◊define-meta[page-description]{Parsing and Managing Pathnames} |
| 6 | + |
| 7 | +This is an example of parsing and transforming filenames and, in |
| 8 | +particular, pathnames. It draws heavily on the functionality of |
| 9 | +◊command{sed}. |
| 10 | + |
| 11 | +◊example{ |
| 12 | +#!/usr/bin/env bash |
| 13 | +#----------------------------------------------------------- |
| 14 | +# Management of PATH, LD_LIBRARY_PATH, MANPATH variables... |
| 15 | +# By Emmanuel Rouat <no-email> |
| 16 | +# (Inspired by the bash documentation 'pathfuncs' and on |
| 17 | +# discussions found on stackoverflow: |
| 18 | +# http://stackoverflow.com/questions/370047/ |
| 19 | +# http://stackoverflow.com/questions/273909/#346860 ) |
| 20 | +# Last modified: Sat Sep 22 12:01:55 CEST 2012 |
| 21 | +# |
| 22 | +# The following functions handle spaces correctly. |
| 23 | +# These functions belong in .bash_profile rather than in |
| 24 | +# .bashrc, I guess. |
| 25 | +# |
| 26 | +# The modular aspect of these functions should make it easy |
| 27 | +# to expand them to handle path substitutions instead |
| 28 | +# of path removal etc.... |
| 29 | +# |
| 30 | +# See http://www.catonmat.net/blog/awk-one-liners-explained-part-two/ |
| 31 | +# (item 43) for an explanation of the 'duplicate-entries' removal |
| 32 | +# (it's a nice trick!) |
| 33 | +#----------------------------------------------------------- |
| 34 | + |
| 35 | +# Show $@ (usually PATH) as list. |
| 36 | +function p_show() { local p="$@" && for p; do [[ ${!p} ]] && |
| 37 | +echo -e ${!p//:/\\n}; done } |
| 38 | + |
| 39 | +# Filter out empty lines, multiple/trailing slashes, and duplicate entries. |
| 40 | +function p_filter() |
| 41 | +{ awk '/^[ \t]*$/ {next} {sub(/\/+$/, "");gsub(/\/+/, "/")}!x[$0]++' ;} |
| 42 | + |
| 43 | +# Rebuild list of items into ':' separated word (PATH-like). |
| 44 | +function p_build() { paste -sd: ;} |
| 45 | + |
| 46 | +# Clean $1 (typically PATH) and rebuild it |
| 47 | +function p_clean() |
| 48 | +{ local p=${1} && eval ${p}='$(p_show ${p} | p_filter | p_build)' ;} |
| 49 | + |
| 50 | +# Remove $1 from $2 (found on stackoverflow, with modifications). |
| 51 | +function p_rm() |
| 52 | +{ local d=$(echo $1 | p_filter) p=${2} && |
| 53 | + eval ${p}='$(p_show ${p} | p_filter | grep -xv "${d}" | p_build)' ;} |
| 54 | + |
| 55 | +# Same as previous, but filters on a pattern (dangerous... |
| 56 | +#+ don't use 'bin' or '/' as pattern!). |
| 57 | +function p_rmpat() |
| 58 | +{ local d=$(echo $1 | p_filter) p=${2} && eval ${p}='$(p_show ${p} | |
| 59 | + p_filter | grep -v "${d}" | p_build)' ;} |
| 60 | + |
| 61 | +# Delete $1 from $2 and append it cleanly. |
| 62 | +function p_append() |
| 63 | +{ local d=$(echo $1 | p_filter) p=${2} && p_rm "${d}" ${p} && |
| 64 | + eval ${p}='$(p_show ${p} d | p_build)' ;} |
| 65 | + |
| 66 | +# Delete $1 from $2 and prepend it cleanly. |
| 67 | +function p_prepend() |
| 68 | +{ local d=$(echo $1 | p_filter) p=${2} && p_rm "${d}" ${p} && |
| 69 | + eval ${p}='$(p_show d ${p} | p_build)' ;} |
| 70 | + |
| 71 | +# Some tests: |
| 72 | +echo |
| 73 | +MYPATH="/bin:/usr/bin/:/bin://bin/" |
| 74 | +p_append "/project//my project/bin" MYPATH |
| 75 | +echo "Append '/project//my project/bin' to '/bin:/usr/bin/:/bin://bin/'" |
| 76 | +echo "(result should be: /bin:/usr/bin:/project/my project/bin)" |
| 77 | +echo $MYPATH |
| 78 | + |
| 79 | +echo |
| 80 | +MYOTHERPATH="/bin:/usr/bin/:/bin:/project//my project/bin" |
| 81 | +p_prepend "/project//my project/bin" MYOTHERPATH |
| 82 | +echo "Prepend '/project//my project/bin' \ |
| 83 | +to '/bin:/usr/bin/:/bin:/project//my project/bin/'" |
| 84 | +echo "(result should be: /project/my project/bin:/bin:/usr/bin)" |
| 85 | +echo $MYOTHERPATH |
| 86 | + |
| 87 | +echo |
| 88 | +p_prepend "/project//my project/bin" FOOPATH # FOOPATH doesn't exist. |
| 89 | +echo "Prepend '/project//my project/bin' to an unset variable" |
| 90 | +echo "(result should be: /project/my project/bin)" |
| 91 | +echo $FOOPATH |
| 92 | + |
| 93 | +echo |
| 94 | +BARPATH="/a:/b/://b c://a:/my local pub" |
| 95 | +p_clean BARPATH |
| 96 | +echo "Clean BARPATH='/a:/b/://b c://a:/my local pub'" |
| 97 | +echo "(result should be: /a:/b:/b c:/my local pub)" |
| 98 | +echo $BARPATH |
| 99 | +} |
| 100 | + |
| 101 | +How can you process filenames correctly in shell? |
| 102 | + |
| 103 | +◊example{ |
| 104 | +Doing it correctly: A quick summary |
| 105 | +by David Wheeler |
| 106 | +http://www.dwheeler.com/essays/filenames-in-shell.html |
| 107 | + |
| 108 | +So, how can you process filenames correctly in shell? Here's a quick |
| 109 | +summary about how to do it correctly, for the impatient who "just want the |
| 110 | +answer". In short: Double-quote to use "$variable" instead of $variable, |
| 111 | +set IFS to just newline and tab, prefix all globs/filenames so they cannot |
| 112 | +begin with "-" when expanded, and use one of a few templates that work |
| 113 | +correctly. Here are some of those templates that work correctly: |
| 114 | +
|
| 115 | +
|
| 116 | + IFS="$(printf '\n\t')" |
| 117 | + # Remove SPACE, so filenames with spaces work well. |
| 118 | +
|
| 119 | + # Correct glob use: |
| 120 | + #+ always use "for" loop, prefix glob, check for existence: |
| 121 | + for file in ./* ; do # Use "./*" ... NEVER bare "*" ... |
| 122 | + if [ -e "$file" ] ; then # Make sure it isn't an empty match. |
| 123 | + COMMAND ... "$file" ... |
| 124 | + fi |
| 125 | + done |
| 126 | + |
| 127 | + |
| 128 | + |
| 129 | + # Correct glob use, but requires nonstandard bash extension. |
| 130 | + shopt -s nullglob # Bash extension, |
| 131 | + #+ so that empty glob matches will work. |
| 132 | + for file in ./* ; do # Use "./*", NEVER bare "*" |
| 133 | + COMMAND ... "$file" ... |
| 134 | + done |
| 135 | + |
| 136 | + |
| 137 | + |
| 138 | + # These handle all filenames correctly; |
| 139 | + #+ can be unwieldy if COMMAND is large: |
| 140 | + find ... -exec COMMAND... {} \; |
| 141 | + find ... -exec COMMAND... {} \+ # If multiple files are okay for COMMAND. |
| 142 | + |
| 143 | + |
| 144 | + |
| 145 | + # This skips filenames with control characters |
| 146 | + #+ (including tab and newline). |
| 147 | + IFS="$(printf '\n\t')" |
| 148 | + controlchars="$(printf '*[\001-\037\177]*')" |
| 149 | + for file in $(find . ! -name "$controlchars"') ; do |
| 150 | + COMMAND "$file" ... |
| 151 | + done |
| 152 | +
|
| 153 | +
|
| 154 | +
|
| 155 | + # Okay if filenames can't contain tabs or newlines -- |
| 156 | + #+ beware the assumption. |
| 157 | + IFS="$(printf '\n\t')" |
| 158 | + for file in $(find .) ; do |
| 159 | + COMMAND "$file" ... |
| 160 | + done |
| 161 | + |
| 162 | + |
| 163 | + |
| 164 | + # Requires nonstandard but common extensions in find and xargs: |
| 165 | + find . -print0 | xargs -0 COMMAND |
| 166 | + |
| 167 | + # Requires nonstandard extensions to find and to shell (bash works). |
| 168 | + # variables might not stay set once the loop ends: |
| 169 | + find . -print0 | while IFS="" read -r -d "" file ; do ... |
| 170 | + COMMAND "$file" # Use quoted "$file", not $file, everywhere. |
| 171 | + done |
| 172 | + |
| 173 | + |
| 174 | + |
| 175 | + # Requires nonstandard extensions to find and to shell (bash works). |
| 176 | + # Underlying system must include named pipes (FIFOs) |
| 177 | + #+ or the /dev/fd mechanism. |
| 178 | + # In this version, variables *do* stay set after the loop ends, |
| 179 | + # and you can read from stdin. |
| 180 | + #+ (Change the 4 to another number if fd 4 is needed.) |
| 181 | + |
| 182 | + while IFS="" read -r -d "" file <&4 ; do |
| 183 | + COMMAND "$file" # Use quoted "$file" -- not $file, everywhere. |
| 184 | + done 4< <(find . -print0) |
| 185 | + |
| 186 | + |
| 187 | + # Named pipe version. |
| 188 | + # Requires nonstandard extensions to find and to shell's read (bash ok). |
| 189 | + # Underlying system must include named pipes (FIFOs). |
| 190 | + # Again, in this version, variables *do* stay set after the loop ends, |
| 191 | + # and you can read from stdin. |
| 192 | + # (Change the 4 to something else if fd 4 needed). |
| 193 | + |
| 194 | + mkfifo mypipe |
| 195 | + |
| 196 | + find . -print0 > mypipe & |
| 197 | + while IFS="" read -r -d "" file <&4 ; do |
| 198 | + COMMAND "$file" # Use quoted "$file", not $file, everywhere. |
| 199 | + done 4< mypipe |
| 200 | +} |
0 commit comments