Skip to content

Commit

Permalink
Several updates to database prep system.
Browse files Browse the repository at this point in the history
* added setup script to install required software
* switched from config to collection of python scripts that are each
  responsible for generating their own units layer
  • Loading branch information
kueda committed Aug 7, 2016
1 parent 1d3d0f5 commit a1d9418
Show file tree
Hide file tree
Showing 11 changed files with 413 additions and 0 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
sources/work-*
*__pycache__*
*.DS_Store
venv/
*.pyc
bin/
9 changes: 9 additions & 0 deletions consolidate.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,12 @@ def run_sql(sql):

run_sql("DELETE FROM {} WHERE LOWER(PTYPE) IN ('h2o', 'water')".format(work_table_name))
run_sql("CREATE TABLE {} AS SELECT PTYPE, ST_Union(geom) AS geom FROM {} GROUP BY PTYPE".format(final_table_name, work_table_name))

for idx, source in enumerate(sources):
print(source['url'])
download_path = download(source)
for path in source['paths']:
if source['extract']:
extracted_path = extract_e00(path)
if source['polygonize']:
poly_path = polygonize(path)
38 changes: 38 additions & 0 deletions prepare-database.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from sources import util
from subprocess import call, Popen, PIPE
import glob
import os

work_table_name = "work"
final_table_name = "units"
dbname = "underfoot"

util.call_cmd(["createdb", dbname])
util.call_cmd(["psql", "-d", dbname, "-c", "CREATE EXTENSION postgis;"])

util.run_sql("DROP TABLE IF EXISTS {}".format(work_table_name), dbname=dbname)
util.run_sql("DROP TABLE IF EXISTS {}".format(final_table_name), dbname=dbname)

for idx, path in enumerate(glob.glob("sources/*.py")):
work_path = util.make_work_dir(path)
util.call_cmd(["python", path])
units_path = os.path.join(work_path, "units.shp")
shp2pgsql_cmd = [
"shp2pgsql", ("-c" if idx == 0 else "-a"), "-s", "900913", "-I", units_path, work_table_name
]
psql_cmd = [
"psql", dbname
]
print(shp2pgsql_cmd)
p1 = Popen(shp2pgsql_cmd, stdout=PIPE)
p2 = Popen(psql_cmd, stdin=p1.stdout, stdout=PIPE)
p1.stdout.close()
print("Loading {} into {} table...".format(units_path, work_table_name))
output = p2.communicate()[0]

print("Deleting water units...")
util.run_sql("DELETE FROM {} WHERE LOWER(PTYPE) IN ('h2o', 'water')".format(work_table_name), dbname=dbname)
print("Dissolving units by PTYPE...")
util.run_sql("CREATE TABLE {} AS SELECT PTYPE, ST_Union(geom) AS geom FROM {} GROUP BY PTYPE".format(final_table_name, work_table_name), dbname=dbname)
util.run_sql("DROP TABLE {}".format(work_table_name), dbname=dbname)
print("Database {} created with table {}".format(dbname, final_table_name))
18 changes: 18 additions & 0 deletions setup
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/bin/bash
#
# Install and set up required software
#
echo "INSTALLING PYTHON PACKAGES..."
pip install -r requirements.txt
echo "DOWNLOADING E00CONV..."
mkdir bin
cd bin
E00COMPR="e00compr-1.0.1"
curl -OL http://avce00.maptools.org/dl/$E00COMPR.tar.gz
tar xzvf $E00COMPR.tar.gz
ln -s $E00COMPR e00compr
cd $E00COMPR
echo "MAKING E00CONV..."
make
cd ../../
echo "DONE!"
39 changes: 39 additions & 0 deletions sources/mf2337c.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import util
import os
import glob
import subprocess

work_path = util.make_work_dir(__file__)
os.chdir(work_path)

# download the file if necessary
if not os.path.isfile("mf2337c.tgz"):
url = "http://pubs.usgs.gov/mf/2000/2337/mf2337c.tgz"
print("Downloading {}\n".format(url))
util.call_cmd(["curl", "-OL", url])

# extract the archive if necessary
if not os.path.isdir("mageo"):
print("\nExtracting archive...\n")
util.call_cmd(["tar", "xzvf", "mf2337c.tgz"])

# convert the Arc Info coverages to shapefiles
polygons_path = "mageo/ma-geol-shapefiles/polygons.shp"
if not os.path.isfile(polygons_path):
print("\nConverting e00 to shapefiles...\n")
shapefiles_path = util.extract_e00("mageo/ma-geol.e00")
polygons_path = util.polygonize_arcs(shapefiles_path)

# dissolve all the shapes by PTYPE and project them into Google Mercator
print("\nDissolving shapes and reprojecting...\n")
util.call_cmd([
"ogr2ogr",
"-s_srs", "+proj=lcc +lat_1=37.06666666666 +lat_2=38.43333333333 +lat_0=36.5 +lon_0=-120.5 +x_0=90 +y_0=10 +ellps=clrk66 +units=m +no_defs",
"-t_srs", util.WEB_MERCATOR_PROJ4,
"units.shp", polygons_path,
"-overwrite",
"-dialect", "sqlite",
"-sql", "SELECT PTYPE,ST_Union(geometry) as geometry FROM 'polygons' GROUP BY PTYPE"
])

# TODO generate units.csv... or load that into the db?
46 changes: 46 additions & 0 deletions sources/mf2342c.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import util
import os
import glob
import subprocess

work_path = util.make_work_dir(__file__)
os.chdir(work_path)

# download the file if necessary
if not os.path.isfile("mf2342c.tgz"):
url = "http://pubs.usgs.gov/mf/2000/2342/mf2342c.tgz"
print("Downloading {}\n".format(url))
util.call_cmd(["curl", "-OL", url])

# extract the archive if necessary
if not os.path.isdir("oakdb"):
print("\nExtracting archive...\n")
util.call_cmd(["tar", "xzvf", "mf2342c.tgz"])

# convert the Arc Info coverages to shapefiles
print("\nConverting e00 to shapefiles...\n")
polygon_paths = []
for path in glob.glob("oakdb/*-geol.e00"):
shapefiles_path = util.extract_e00(path)
polygon_paths.append(util.polygonize_arcs(shapefiles_path))

# merge all the shapefiles into a single file
print("\nMerging shapefiles...\n")
merged_polygons_path = "merged-polygons.shp"
util.call_cmd(["ogr2ogr", "-overwrite", merged_polygons_path, polygon_paths.pop()])
for path in polygon_paths:
util.call_cmd(["ogr2ogr", "-update", "-append", merged_polygons_path, path])

# dissolve all the shapes by PTYPE and project them into Google Mercator
print("\nDissolving shapes and reprojecting...\n")
util.call_cmd([
"ogr2ogr",
"-s_srs", "+proj=lcc +lat_1=38.43333333333333 +lat_2=37.06666666666667 +lat_0=36.5 +lon_0=-120.5 +x_0=609601.2192024384 +y_0=0 +datum=NAD27 +units=m +no_defs",
"-t_srs", util.WEB_MERCATOR_PROJ4,
"units.shp", merged_polygons_path,
"-overwrite",
"-dialect", "sqlite",
"-sql", "SELECT PTYPE,ST_Union(geometry) as geometry FROM 'merged-polygons' GROUP BY PTYPE"
])

# TODO generate units.csv... or load that into the db?
59 changes: 59 additions & 0 deletions sources/of94-622.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import util
import os
import glob
import subprocess

work_path = util.make_work_dir(__file__)
os.chdir(work_path)

# download the file if necessary
if not os.path.isfile("cc_g1.tar.Z"):
url = "http://pubs.usgs.gov/of/1994/of94-622/cc_g1.tar.Z"
print("Downloading {}\n".format(url))
util.call_cmd(["curl", "-OL", url])

# extract the archive if necessary
if not os.path.isdir("ccgeo/cc_utm/"):
print("\nExtracting archive...\n")
util.call_cmd(["tar", "xzvf", "cc_g1.tar.Z"])

# convert the Arc Info coverages to shapefiles
polygons_path = "ccgeo/cc_utm-shapefiles/polygons.shp"
if not os.path.isfile(polygons_path):
print("\nConverting e00 to shapefiles...\n")
shapefiles_path = util.extract_e00("ccgeo/cc_utm")
polygons_path = util.polygonize_arcs(shapefiles_path)

# This dataset has one unit, Ku, that underlies all the other polygons,
# creating some annoying overlap, so first we need to get all the shapes that
# aren't that...
final_polygons_path = "polygons-separated.shp"
util.call_cmd([
"ogr2ogr",
"-overwrite",
"-where", "POLY_ID != '1'",
final_polygons_path, polygons_path
])

# ...and then append shapes cut out of the rest
util.call_cmd([
"ogr2ogr",
"-append",
"-dialect", "sqlite",
"-sql", "SELECT p1.PTYPE, ST_Difference(p1.geometry, ST_Union(p2.geometry)) AS geometry FROM polygons AS p1, polygons AS p2 WHERE p1.POLY_ID = '1' AND p2.POLY_ID != '1'",
final_polygons_path, polygons_path
])

# dissolve all the shapes by PTYPE and project them into Google Mercator
print("\nDissolving shapes and reprojecting...\n")
util.call_cmd([
"ogr2ogr",
"-s_srs", "+proj=utm +zone=10 +datum=NAD27 +units=m +no_defs",
"-t_srs", util.WEB_MERCATOR_PROJ4,
"units.shp", final_polygons_path,
"-overwrite",
"-dialect", "sqlite",
"-sql", "SELECT PTYPE,ST_Union(geometry) as geometry FROM 'polygons-separated' GROUP BY PTYPE"
])

# # TODO generate units.csv... or load that into the db?
43 changes: 43 additions & 0 deletions sources/of97-489.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import util
import os
import glob
import subprocess

work_path = util.make_work_dir(__file__)
os.chdir(work_path)

# download the file if necessary
if not os.path.isfile("sc-geol.e00.gz"):
url = "http://pubs.usgs.gov/of/1997/of97-489/sc-geol.e00.gz"
print("Downloading {}\n".format(url))
util.call_cmd(["curl", "-OL", url])

# extract the archive if necessary
if not os.path.isfile("sc-geol.e00"):
print("\nExtracting archive...\n")
util.call_cmd("gunzip -c sc-geol.e00.gz > sc-geol.e00", shell=True)

# # uncompress the e00 itself
# if not os.path.isfile("sfs-geol-uncompressed.e00"):
# util.call_cmd(["../../bin/e00compr/e00conv", "sfs-geol.e00", "sfs-geol-uncompressed.e00"])

# convert the Arc Info coverages to shapefiles
polygons_path = "sc-geol-shapefiles/polygons.shp"
if not os.path.isfile(polygons_path):
print("\nConverting e00 to shapefiles...\n")
shapefiles_path = util.extract_e00("sc-geol.e00")
polygons_path = util.polygonize_arcs(shapefiles_path)

# dissolve all the shapes by PTYPE and project them into Google Mercator
print("\nDissolving shapes and reprojecting...\n")
util.call_cmd([
"ogr2ogr",
"-s_srs", "+proj=utm +zone=10 +datum=NAD27 +units=m +no_defs",
"-t_srs", util.WEB_MERCATOR_PROJ4,
"units.shp", polygons_path,
"-overwrite",
"-dialect", "sqlite",
"-sql", "SELECT PTYPE,ST_Union(geometry) as geometry FROM 'polygons' GROUP BY PTYPE"
])

# TODO generate units.csv... or load that into the db?
43 changes: 43 additions & 0 deletions sources/of98-354.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import util
import os
import glob
import subprocess

work_path = util.make_work_dir(__file__)
os.chdir(work_path)

# download the file if necessary
if not os.path.isfile("sfs_data.tar.gz"):
url = "http://pubs.usgs.gov/of/1998/of98-354/sfs_data.tar.gz"
print("Downloading {}\n".format(url))
util.call_cmd(["curl", "-OL", url])

# extract the archive if necessary
if not os.path.isfile("sfs-geol.e00"):
print("\nExtracting archive...\n")
util.call_cmd(["tar", "xzvf", "sfs_data.tar.gz"])

# uncompress the e00 itself
if not os.path.isfile("sfs-geol-uncompressed.e00"):
util.call_cmd(["../../bin/e00compr/e00conv", "sfs-geol.e00", "sfs-geol-uncompressed.e00"])

# convert the Arc Info coverages to shapefiles
polygons_path = "sfs-geol-uncompressed-shapefiles/polygons.shp"
if not os.path.isfile(polygons_path):
print("\nConverting e00 to shapefiles...\n")
shapefiles_path = util.extract_e00("sfs-geol-uncompressed.e00")
polygons_path = util.polygonize_arcs(shapefiles_path)

# dissolve all the shapes by PTYPE and project them into Google Mercator
print("\nDissolving shapes and reprojecting...\n")
util.call_cmd([
"ogr2ogr",
"-s_srs", "+proj=lcc +lat_1=37.066667 +lat_2=38.433333 +lat_0=36.5 +lon_0=-120.5 +x_0=609601.21920 +y_0=-6 +datum=NAD27 +units=m +no_defs",
"-t_srs", util.WEB_MERCATOR_PROJ4,
"units.shp", polygons_path,
"-overwrite",
"-dialect", "sqlite",
"-sql", "SELECT PTYPE,ST_Union(geometry) as geometry FROM 'polygons' GROUP BY PTYPE"
])

# TODO generate units.csv... or load that into the db?
58 changes: 58 additions & 0 deletions sources/util/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
"""
Colleciton of methods and scripts for processing geologic unit data, mostly
from the USGS.
"""

from subprocess import call, run, Popen, PIPE
import os
import re
import shutil

WEB_MERCATOR_PROJ4="+proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null +wktext +over +no_defs"

def extless_basename(path):
if os.path.isfile(path):
return os.path.splitext(os.path.basename(path))[0]
return os.path.split(path)[-1]
def call_cmd(*args, **kwargs):
# print("Calling `{}` with kwargs: {}".format(args[0], kwargs))
run(*args, **kwargs)
def run_sql(sql, dbname="underfoot"):
call_cmd([
"psql", dbname, "-c", sql
])
def make_work_dir(path):
work_path = os.path.join(os.path.dirname(os.path.realpath(path)), "work-{}".format(extless_basename(path)))
if not os.path.isdir(work_path):
os.makedirs(work_path)
return work_path
def extract_e00(path):
dirpath = os.path.join(os.path.realpath(os.path.dirname(path)), "{}-shapefiles".format(extless_basename(path)))
if os.path.isfile(os.path.join(dirpath, "PAL.shp")):
return dirpath
shutil.rmtree(dirpath, ignore_errors=True)
os.makedirs(dirpath)
call(["ogr2ogr", "-f", "ESRI Shapefile", dirpath, path])
return dirpath
def polygonize_arcs(shapefiles_path, polygon_pattern=".+-ID?$"):
"""Convert shapefile arcs from an extracted ArcINFO coverage and convert them to polygons.
More often than not ArcINFO coverages seem to include arcs but not polygons
when converted to shapefiles using ogr. A PAL.shp file gets created, but it
only has polygon IDs, not geometries. This method will walk through all the arcs and combine them into their relevant polygons.
Why is it in its own python script and not in this library? For reasons as
mysterious as they are maddening, when you import fiona into this module, it
causes subproccess calls to ogr2ogr to ignore the "+nadgrids=@null" proj
flag, which results in datum shifts when attempting to project to web
mercator. Yes, seriously. I have no idea why or how it does this, but that
was the only explanation I could find for the datum shift.
"""
polygons_path = os.path.join(shapefiles_path, "polygons.shp")
if os.path.isfile(polygons_path):
return polygons_path
pal_path = os.path.join(shapefiles_path, "PAL.shp")
arc_path = os.path.join(shapefiles_path, "ARC.shp")
polygonize_arcs_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "polygonize_arcs.py")
call_cmd(["python", polygonize_arcs_path, polygons_path, pal_path, arc_path, "--polygon-column-pattern", polygon_pattern])
return polygons_path
Loading

0 comments on commit a1d9418

Please sign in to comment.