-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbackfill.sh
executable file
·109 lines (96 loc) · 3.42 KB
/
backfill.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#!/usr/bin/env bash
IMPORT=import
EXPORT=export
mkdir -p $IMPORT && rm -f $IMPORT/*.{sql,sh}
mkdir -p $EXPORT && rm -rf $EXPORT/*
download_data() {
GN_BASE="http://download.geonames.org/export/dump"
TXT="countryInfo.txt featureCodes_en.txt"
ZIPS="alternateNamesV2.zip allCountries.zip"
(
cd $IMPORT
for f in $TXT
do
wget -q -N --show-progress "$GN_BASE/$f"
done
for f in $ZIPS
do
wget -q -N --show-progress "$GN_BASE/$f" && unzip -u -o $f
done
)
}
convert_nulls() {
awk -F $'\t' -f nulls.awk
}
fix_nulls() {
for f in alternateNamesV2 allCountries
do
fn="$IMPORT/$f.txt"
fnn="$IMPORT/${f}_nulls.txt"
[ "$fn" -nt "$fnn" ] && cat "$fn" | convert_nulls > "$fnn"
test -f "$fnn"
done
}
gen_country_info() {
sed -e '/^#/d' "$IMPORT/countryInfo.txt" | csvcut -t -H -c 1,5-15,17 | csvformat -K 1 -T | convert_nulls > "$IMPORT/country_info.txt"
cp "$IMPORT/country_info.txt" $EXPORT
}
# gen straight to export
gen_country_language() {
sed -e '/^#/d' "$IMPORT/countryInfo.txt" | csvcut -t -H -c 1,16 | csvformat -K 1 -T | awk -F $'\t' -f two_split_comma.awk | convert_nulls > "$EXPORT/country_language.txt"
}
# gen straight to export
gen_country_neighbour() {
sed -e '/^#/d' "$IMPORT/countryInfo.txt" | csvcut -t -H -c 1,18 | csvformat -K 1 -T | awk -F $'\t' -f two_split_comma.awk | convert_nulls > "$EXPORT/country_neighbour.txt"
}
# gen straight to export
gen_feature_class_code() {
{
printf $'A\tAdministrative Regions\tcountry, state, region, ...\n'
printf $'H\tHydrographic Features\tsea, river, lake, ...\n'
printf $'L\tArea Features\tpark, area, ..\n'
printf $'P\tPopulated Places\tcity, town, ...\n'
printf $'R\tRoad/Railroad Features\tstreet, railroad, ...\n'
printf $'S\tSpot Features\tairport, building, farm, ...\n'
printf $'T\tHypsographic Features\tmountain, hill, rock, ...\n'
printf $'U\tUndersea Features\tundersea features\n'
printf $'V\tVegetation Features\tforest, heath, ...\n'
} > "$EXPORT/feature_class_code.txt"
sed -e 's/^[AHLPRSTUV]\.//' "$IMPORT/featureCodes_en.txt" | convert_nulls >> "$EXPORT/feature_class_code.txt"
}
# gen straight to export
gen_language() {
{
printf $'post\tpostal code\n'
printf $'link\twebsite link\n'
printf $'iata\tiata airport code\n'
printf $'icao\ticao airport code\n'
printf $'faac\tfaac airport code\n'
printf $'abbr\tabbreviation\n'
printf $'fr_1793\tname used during French Revolution\n' # wtf
} > "$EXPORT/language.txt"
sed -e '1d' "$IMPORT/iso-languagecodes.txt" | awk -F $'\t' -f language.awk | convert_nulls >> "$EXPORT/language.txt"
}
echo "downloading data" && \
download_data && \
echo "fixing nulls" && \
fix_nulls && \
echo "gen country info" && \
gen_country_info && \
echo "gen country language" && \
gen_country_language && \
echo "gen country neighbour" && \
gen_country_neighbour && \
echo "gen feature class code" && \
gen_feature_class_code && \
echo "gen language" && \
gen_language && \
echo "cp to import" && \
cp -f my.cnf 001-load.sh 002-export.sh $IMPORT && \
echo "co to export" && \
cp -f my.cnf $EXPORT && \
echo "docker down" && \
docker-compose down && \
echo "docker up" && \
docker-compose up && \
echo "*** DONE BACKFILL ***" || echo "XXX ERROR XXX"