Skip to content

Commit c5aee2d

Browse files
author
Kasra Farmer
authored
Iss2 (#7)
* Adding the capability to assing CRS to input shapefile This is for standardazing the input shapefiles to be fed to various rasters available for this tool. * adding the capability to call landsat datasets * adding landsat scripts * adding details of the Landsat dataset to the README file
1 parent 44d0c4e commit c5aee2d

File tree

4 files changed

+335
-0
lines changed

4 files changed

+335
-0
lines changed

assets/stats.R

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,16 @@ renv::restore(lockfile=lockfile_path, prompt=FALSE);
4545
# produce necessary stats and print a csv file
4646
r <- raster::raster(vrt_path);
4747
p <- sf::st_read(shapefile_path, quiet=TRUE);
48+
49+
# check the CRS of the shapefile
50+
if (is.na(sf::st_crs(p)$epsg)){
51+
sf::st_crs(p) = 4326;
52+
print('Assuming EPSG is 4326');
53+
} else {
54+
sf::st_transform(p, 4326);
55+
print('Transforming EPSG to 4326');
56+
}
57+
4858
q <- as.double(unlist(strsplit(quantiles, ",")));
4959
s <- unlist(strsplit(stats, ","));
5060
df <- cbind(p[[1]], exactextractr::exact_extract(r, p, s, quantiles=q)); # assuming first column indicates ID

extract-gis.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,11 @@ case "${geotiff,,}" in
325325
call_processing_func "$(dirname $0)/modis/modis.sh"
326326
;;
327327

328+
# Landsar
329+
"landsat" | "landast" )
330+
call_processing_func "$(dirname $0)/landsat/landsat.sh"
331+
;;
332+
328333
# dataset not included above
329334
*)
330335
echo "$(basename $0): missing/unknown dataset";

landsat/README.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# `Landsat` Geospatial Dataset
2+
In this file, the necessary technical details of the dataset is explained.
3+
4+
## Location of the `Landsat` Dataset Files
5+
The `LandSat` geospatial dataset files are located under the following directory accessible from Digital Alliance (formerly Compute Canada) Graham cluster:
6+
7+
```console
8+
/project/rpp-kshook/Model_Output/Landsat/
9+
```
10+
11+
And the structure of the files is as following:
12+
13+
```console
14+
/project/rpp-kshook/Model_Output/Landsat/
15+
├── NA_NALCMS_2010_v2_land_cover_30m.zip
16+
├── .
17+
├── .
18+
└── .
19+
```
20+
21+
## Spatial and Temporal Extents
22+
23+
The spatial extent of this dataset (so far only `NALCMS` that is a land cover dataset) covers longitudes from approximately `-180` to `-50` degress and latitudes from approximately `+14` to `+84` degress. This dataset is static and does not vary with time.
24+
25+
## Dataset Variables
26+
This variables of this dataset are detailed in the table below:
27+
28+
|# |Variable Name (used in `gistool`) |Description |Comments |
29+
|-------|---------------------------------------|---------------------------------------|---------------|
30+
|1 |NA_NALCMS_2010_v2_land_cover_30m |Land cover classes |[link](http://www.cec.org/north-american-environmental-atlas/land-cover-2010-landsat-30m/)|

landsat/landsat.sh

Lines changed: 290 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,290 @@
1+
#!/bin/bash
2+
# GIS Data Processing Workflow
3+
# Copyright (C) 2022, University of Saskatchewan
4+
#
5+
# This file is part of GIS Data Processing Workflow
6+
#
7+
# This program is free software: you can redistribute it and/or modify
8+
# it under the terms of the GNU General Public License as published by
9+
# the Free Software Foundation, either version 3 of the License, or
10+
# (at your option) any later version.
11+
#
12+
# This program is distributed in the hope that it will be useful,
13+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
# GNU General Public License for more details.
16+
#
17+
# You should have received a copy of the GNU General Public License
18+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
19+
20+
# =========================
21+
# Credits and contributions
22+
# =========================
23+
# 1. Parts of the code are taken from https://www.shellscript.sh/tips/getopt/index.html
24+
# 2. General ideas of GeoTIFF subsetting are taken from https://github.com/CH-Earth/CWARHM
25+
# developed mainly by Wouter Knoben (hence the header copyright credit). See the preprint
26+
# at: https://www.essoar.org/doi/10.1002/essoar.10509195.1
27+
28+
29+
# ================
30+
# General comments
31+
# ================
32+
# * All variables are camelCased for distinguishing from function names;
33+
# * function names are all in lower_case with words seperated by underscore for legibility;
34+
# * shell style is based on Google Open Source Projects'
35+
# Style Guide: https://google.github.io/styleguide/shellguide.html
36+
37+
38+
# ===============
39+
# Usage Functions
40+
# ===============
41+
short_usage() {
42+
echo "usage: $(basename $0) -cio DIR -v var1[,var2[...]] [-r INT] [-se DATE] [-ln REAL,REAL] [-f PATH] [-t BOOL] [-a stat1[,stat2,[...]] [-q q1[,q2[...]]]] [-p STR] "
43+
}
44+
45+
46+
# argument parsing using getopt - WORKS ONLY ON LINUX BY DEFAULT
47+
parsedArguments=$(getopt -a -n landsat -o i:o:v:r:s:e:l:n:f:t:a:q:p:c: --long dataset-dir:,output-dir:,variable:,crs:,start-date:,end-date:,lat-lims:,lon-lims:,shape-file:,print-geotiff:,stat:,quantile:,prefix:,cache: -- "$@")
48+
validArguments=$?
49+
if [ "$validArguments" != "0" ]; then
50+
short_usage;
51+
exit 1;
52+
fi
53+
54+
# check if no options were passed
55+
if [ $# -eq 0 ]; then
56+
echo "$(basename $0): ERROR! arguments missing";
57+
exit 1;
58+
fi
59+
60+
# check long and short options passed
61+
eval set -- "$parsedArguments"
62+
while :
63+
do
64+
case "$1" in
65+
-i | --dataset-dir) geotiffDir="$2" ; shift 2 ;; # required
66+
-o | --output-dir) outputDir="$2" ; shift 2 ;; # required
67+
-v | --variable) variables="$2" ; shift 2 ;; # required
68+
-r | --crs) crs="$2" ; shift 2 ;; # required
69+
-s | --start-date) startDate="$2" ; shift 2 ;; # redundant - added for compatibility
70+
-e | --end-date) endDate="$2" ; shift 2 ;; # redundant - added for compatibility
71+
-l | --lat-lims) latLims="$2" ; shift 2 ;; # required - could be redundant
72+
-n | --lon-lims) lonLims="$2" ; shift 2 ;; # required - could be redundant
73+
-f | --shape-file) shapefile="$2" ; shift 2 ;; # required - could be redundant
74+
-t | --print-geotiff) printGeotiff="$2" ; shift 2 ;; # required
75+
-a | --stat) stats="$2" ; shift 2 ;; # optional
76+
-q | --quantile) quantiles="$2" ; shift 2 ;; # optional
77+
-p | --prefix) prefix="$2" ; shift 2 ;; # optional
78+
-c | --cache) cache="$2" ; shift 2 ;; # required
79+
80+
# -- means the end of the arguments; drop this, and break out of the while loop
81+
--) shift; break ;;
82+
83+
# in case of invalid option
84+
*)
85+
echo "$(basename $0): ERROR! invalid option '$1'";
86+
short_usage; exit 1 ;;
87+
esac
88+
done
89+
90+
# check if $ensemble is provided
91+
if [[ -n "$startDate" ]] || [[ -n "$endDate" ]]; then
92+
echo "$(basename $0): ERROR! redundant argument (time extents) provided";
93+
exit 1;
94+
fi
95+
96+
# check the prefix if not set
97+
if [[ -z $prefix ]]; then
98+
prefix="landsat"
99+
fi
100+
101+
# parse comma-delimited variables
102+
IFS=',' read -ra variables <<< "${variables}"
103+
104+
105+
# =====================
106+
# Necessary Assumptions
107+
# =====================
108+
# TZ to be set to UTC to avoid invalid dates due to Daylight Saving
109+
alias date='TZ=UTC date'
110+
# expand aliases for the one stated above
111+
shopt -s expand_aliases
112+
# hard-coded paths
113+
renvCache="/project/rpp-kshook/Climate_Forcing_Data/assets/r-envs/" # general renv cache path
114+
exactextractrCache="${renvCache}/exact-extract-env" # exactextractr renv cache path
115+
renvPackagePath="${renvCache}/renv_0.15.5.tar.gz" # renv_0.15.5 source path
116+
117+
118+
# ==========================
119+
# Necessary Global Variables
120+
# ==========================
121+
# the structure of the original file names are one of the following:
122+
# * %{var}_M_sl%{soil_layer_depth_number}_250m_ll.tif
123+
# * %{var}_250m_ll.tif
124+
# but the user is expected to include complete variable (file) name in the
125+
# `--variable` input argument comma-separated list.
126+
127+
128+
# ===================
129+
# Necessary Functions
130+
# ===================
131+
# Modules below available on Compute Canada (CC) Graham Cluster Server
132+
load_core_modules () {
133+
module -q purge
134+
module -q load gcc/9.3.0
135+
module -q load r/4.1.2
136+
module -q load gdal/3.0.4
137+
module -q load udunits/2.2.28
138+
module -q load geos/3.10.2
139+
module -q load proj/9.0.0
140+
}
141+
load_core_modules
142+
143+
144+
# =================
145+
# Useful One-liners
146+
# =================
147+
# sorting a comma-delimited string of real numbers
148+
sort_comma_delimited () { IFS=',' read -ra arr <<< "$*"; echo ${arr[*]} | tr " " "\n" | sort -n | tr "\n" " "; }
149+
150+
# log date format
151+
logDate () { echo "($(date +"%Y-%m-%d %H:%M:%S")) "; }
152+
153+
154+
#######################################
155+
# subset GeoTIFFs
156+
#
157+
# Globals:
158+
# latLims: comma-delimited latitude
159+
# limits
160+
# lonLims: comma-delimited longitude
161+
# limits
162+
#
163+
# Arguments:
164+
# sourceVrt: source vrt file
165+
# destPath: destionation path (inclu-
166+
# ding file name)
167+
#
168+
# Outputs:
169+
# one mosaiced (merged) GeoTIFF under
170+
# the $destDir
171+
#######################################
172+
subset_geotiff () {
173+
# local variables
174+
local latMin
175+
local latMax
176+
local lonMin
177+
local lonMax
178+
local sortedLats
179+
local sortedLons
180+
# reading arguments
181+
local sourceVrt="$1"
182+
local destPath="$2"
183+
184+
# extracting minimum and maximum of latitude and longitude respectively
185+
## latitude
186+
sortedLats=($(sort_comma_delimited "$latLims"))
187+
latMin="${sortedLats[0]}"
188+
latMax="${sortedLats[1]}"
189+
## longitude
190+
sortedLons=($(sort_comma_delimited "$lonLims"))
191+
lonMin="${sortedLons[0]}"
192+
lonMax="${sortedLons[1]}"
193+
194+
# subset based on lat/lon - flush to disk at 500MB
195+
GDAL_CACHEMAX=500
196+
gdal_translate --config GDAL_CACHEMAX 500 \
197+
-co COMPRESS="DEFLATE" \
198+
-co BIGTIFF="YES" \
199+
-projwin $lonMin $latMax $lonMax $latMin "${sourceVrt}" "${destPath}" \
200+
> /dev/null;
201+
}
202+
203+
204+
# ===============
205+
# Data Processing
206+
# ===============
207+
# display info
208+
echo "$(logDate)$(basename $0): processing Landsat GeoTIFF(s)..."
209+
210+
# make the output directory
211+
echo "$(logDate)$(basename $0): creating output directory under $outputDir"
212+
mkdir -p "$cache" # making the cache directory
213+
mkdir -p "$outputDir" # making the output directory
214+
215+
# extracting the zip file
216+
for var in "${variables[@]}"; do
217+
unzip "${geotiffDir}/${var}" -d "$cache"
218+
done
219+
220+
# if shapefile is provided extract the extents from it
221+
if [[ -n $shapefile ]]; then
222+
# extract the shapefile extent
223+
IFS=' ' read -ra shapefileExtents <<< "$(ogrinfo -so -al "$shapefile" | sed 's/[),(]//g' | grep Extent)"
224+
# transform the extents in case they are not in EPSG:4326
225+
IFS=':' read -ra sourceProj4 <<< "$(gdalsrsinfo $shapefile | grep -e "PROJ.4")" # source Proj4 value
226+
if [[ -n $sourceProj4 ]]; then
227+
:
228+
else
229+
echo "$(logDate)$(basename $0): WARNING! Assuming WSG84 CRS for the input ESRI shapefile"
230+
sourceProj4=("PROJ.4" " +proj=longlat +datum=WGS84 +no_defs") # made an array for compatibility with the following statements
231+
fi
232+
233+
# transform limits and assing to variables
234+
IFS=' ' read -ra leftBottomLims <<< $(echo "${shapefileExtents[@]:1:2}" | gdaltransform -s_srs "${sourceProj4[1]}" -t_srs EPSG:4326 -output_xy)
235+
IFS=' ' read -ra rightTopLims <<< $(echo "${shapefileExtents[@]:4:5}" | gdaltransform -s_srs "${sourceProj4[1]}" -t_srs EPSG:4326 -output_xy)
236+
# define $latLims and $lonLims from $shapefileExtents
237+
lonLims="${leftBottomLims[0]},${rightTopLims[0]}"
238+
latLims="${leftBottomLims[1]},${rightTopLims[1]}"
239+
fi
240+
241+
# subset and produce stats if needed
242+
if [[ "$printGeotiff" == "true" ]]; then
243+
echo "$(logDate)$(basename $0): subsetting GeoTIFFs under $outputDir"
244+
for var in "${variables[@]}"; do
245+
# subset based on lat and lon values
246+
subset_geotiff "${geotiffDir}/${var}.tif" "${outputDir}/${prefix}${var}.tif"
247+
done
248+
fi
249+
250+
## make R renv project directory
251+
if [[ -n "$shapefile" ]] && [[ -n $stats ]]; then
252+
echo "$(logDate)$(basename $0): Extracting stats under $outputDir"
253+
mkdir -p "$cache/r-virtual-env/"
254+
## make R renv in $cache
255+
virtualEnvPath="$cache/r-virtual-env/"
256+
cp "$(dirname $0)/../assets/renv.lock" "$virtualEnvPath"
257+
## load necessary modules - excessive, mainly for clarification
258+
load_core_modules
259+
260+
## make the temporary directory for installing r packages
261+
tempInstallPath="$cache/r-packages"
262+
mkdir -p "$tempInstallPath"
263+
export R_LIBS_USER="$tempInstallPath"
264+
265+
# extract given stats for each variable
266+
for var in "${variables[@]}"; do
267+
## build renv and create stats
268+
Rscript "$(dirname $0)/../assets/stats.R" \
269+
"$tempInstallPath" \
270+
"$exactextractrCache" \
271+
"$renvPackagePath" \
272+
"$virtualEnvPath" \
273+
"$virtualEnvPath" \
274+
"${virtualEnvPath}/renv.lock" \
275+
"${cache}/${var}/${var}.tif" \
276+
"$shapefile" \
277+
"$outputDir/${prefix}stats_${var}.csv" \
278+
"$stats" \
279+
"$quantiles" >> "${outputDir}/${prefix}stats_${var}.log" 2>&1;
280+
done
281+
fi
282+
283+
# remove unnecessary files
284+
mkdir "$HOME/empty_dir"
285+
echo "$(logDate)$(basename $0): deleting temporary files from $cache"
286+
rsync --quiet -aP --delete "$HOME/empty_dir/" "$cache"
287+
rm -r "$cache"
288+
echo "$(logDate)$(basename $0): temporary files from $cache are removed"
289+
echo "$(logDate)$(basename $0): results are produced under $outputDir"
290+

0 commit comments

Comments
 (0)