Skip to content

Commit 67b273f

Browse files
committed
refactor
* refactor common functions to a separate file
1 parent 662e5cc commit 67b273f

File tree

4 files changed

+149
-216
lines changed

4 files changed

+149
-216
lines changed

README.md

+7-2
Original file line numberDiff line numberDiff line change
@@ -42,5 +42,10 @@ This script takes the product title (including prefix which was set on creation
4242
from the STAC catalogue.
4343

4444
### Configuration
45-
The configuration file is again in the **sentinel_config.yml**, where STAC host needs to be set and authentication
46-
in the **~/.netrc** file must be provided.
45+
**Configuration file** is again in the **sentinel_config.yml**, where STAC host and possible prefix needs to be set.
46+
47+
**Authentication**: Basic auth is resolved automatically by the Requests library by reading a **~/.netrc** file. Make sure
48+
to set up the correct entry (STAC host URL) there.
49+
50+
**Command line arguments**: The command line options supersede the configuration file settings. Run help to list all
51+
configurable parameters: `./remove_stac.py -h`

register_stac.py

+27-107
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,23 @@
11
#!/usr/bin/python3
22

33
import argparse
4-
import netrc
5-
import os
6-
import re
74
import sys
85
import tempfile
9-
from datetime import datetime
10-
from urllib.parse import urlparse
116

127
import defusedxml.ElementTree
138
import pystac
14-
import requests
159
import stactools.sentinel1.grd.stac
1610
import stactools.sentinel1.slc.stac
1711
import stactools.sentinel2.stac
1812
import stactools.sentinel3.stac
1913
from stactools.sentinel3 import constants
2014
import stactools.sentinel5p.stac
21-
import yaml
22-
from requests import Session
2315
from stactools.sentinel3.file_extension_updated import FileExtensionUpdated
2416
from tqdm import tqdm
2517

26-
import sentinel_stac
18+
from sentinel_stac import *
2719

2820
CONFIG_FILE = "sentinel_config.yml"
29-
ERR_PREFIX = ""
3021
SUCC_PREFIX = ""
3122
PRODUCT_ID = None
3223
COLLECTION = None
@@ -88,39 +79,10 @@ def parse_arguments():
8879

8980
args = parser.parse_args()
9081
if not args.push and not args.save:
91-
die_with_error('--push or --save required to take any action')
82+
die_with_error(PRODUCT_ID, '--push or --save required to take any action')
9283
return args
9384

9485

95-
def die_with_error(msg, detailed_msg="", code=-1):
96-
"""
97-
Before terminating with exception, writes message to error file.
98-
Known HTTP error code should be used, otherwise -1 is used.
99-
"""
100-
rundate = datetime.now().strftime('%Y-%m-%d')
101-
err_file = ERR_PREFIX + rundate
102-
create_missing_dir(os.path.dirname(err_file))
103-
with open(err_file, 'a') as f:
104-
f.write(f"{COLLECTION},{PRODUCT_ID},{code}:{msg}\n")
105-
raise Exception("\n".join([f"{code}: {msg}", detailed_msg]))
106-
107-
108-
def read_configuration():
109-
"""
110-
Read configuration file.
111-
"""
112-
with open(CONFIG_FILE, "r") as f:
113-
return yaml.safe_load(f)
114-
115-
116-
def create_missing_dir(dir_path):
117-
"""
118-
Creates directory, if it does not exist yet (including all missing directories in the path).
119-
"""
120-
if not os.path.exists(dir_path):
121-
os.makedirs(dir_path, exist_ok=True)
122-
123-
12486
def request_with_progress(url, output_path):
12587
"""
12688
Downloads a file from a URL and saves it to the specified output path, with a progress bar.
@@ -131,7 +93,7 @@ def request_with_progress(url, output_path):
13193
block_size = 1024 # Size of each block (1 KB)
13294

13395
if not response.ok:
134-
die_with_error(f"Request to fetch file {url} failed.", response.text, response.status_code)
96+
die_with_error(PRODUCT_ID, f"Request to fetch file {url} failed.", response.text, response.status_code)
13597

13698
progress_bar = tqdm(total=total_size,
13799
unit='iB',
@@ -170,7 +132,7 @@ def fetch_product_data(sentinel_host, metadata_dir):
170132
COLLECTION = map_to_collection(title)
171133

172134
if not title or not product_url:
173-
die_with_error("Missing required title or product url for product.")
135+
die_with_error(PRODUCT_ID, "Missing required title or product url for product.")
174136

175137
print(f"Parsed product data for product (UUID {PRODUCT_ID}):\n"
176138
f"* Title ID: {title}\n"
@@ -184,46 +146,25 @@ def check_hosts(sentinel_host, stac_host, push):
184146
"""
185147
Checks sentinel_host and stac_host variables were resolved and .netrc file contains authentication credentials.
186148
"""
187-
if not sentinel_host:
188-
die_with_error("Sentinel host not configured properly!")
189-
if not stac_host and push:
190-
die_with_error("STAC host not configured properly!")
191-
192-
try:
193-
auth_info = netrc.netrc()
194-
if not auth_info.authenticators(urlparse(sentinel_host).netloc):
195-
die_with_error(
196-
f"Host {urlparse(sentinel_host)} not found in authentication credentials in the .netrc file!")
197-
if push and not auth_info.authenticators(urlparse(stac_host).netloc):
198-
die_with_error(f"Host {urlparse(stac_host)} not found in authentication credentials in the .netrc file!")
199-
except (FileNotFoundError, netrc.NetrcParseError) as e:
200-
die_with_error(f"Error parsing authentication file .netrc in the home directory.")
201-
202-
203-
def map_to_collection(product_name):
204-
"""
205-
Returns the normalized collection name for a given product.
206-
"""
207-
for pattern, collection in sentinel_stac.product_collection_mapping.items():
208-
if re.match(pattern, product_name):
209-
return collection
210-
die_with_error("Could not match product to collection name! Probably missing in the sentinel_stac.py mappings.")
149+
check_host(PRODUCT_ID, sentinel_host)
150+
if push:
151+
check_host(PRODUCT_ID, stac_host)
211152

212153

213154
def fetch_platform_metadata(product_url, metadata_dir, platform):
214155
"""
215156
Fetches metadata from product's /Nodes data and stores them in the metadata directory.
216157
"""
217158
if platform.lower() == "s1":
218-
platform_files = sentinel_stac.s1_files
159+
platform_files = S1_FILES
219160
elif platform.lower() == "s2":
220-
platform_files = sentinel_stac.s2_files
161+
platform_files = S2_FILES
221162
elif platform.lower() == "s3":
222-
platform_files = sentinel_stac.s3_files
163+
platform_files = S3_FILES
223164
elif platform.lower() == "s5":
224-
platform_files = sentinel_stac.s5_files
165+
platform_files = S5_FILES
225166
else:
226-
die_with_error(f"Platform {platform} not supported!")
167+
die_with_error(PRODUCT_ID, f"Platform {platform} not supported!")
227168
for file in platform_files:
228169
source_url = f"{product_url}/Nodes('{file}')/$value"
229170
output_file = os.path.join(metadata_dir, file)
@@ -302,39 +243,19 @@ def regenerate_href_links(stacfile_path, metadata_dir, product_url, salt):
302243
os.replace(new_file, stacfile_path)
303244

304245

305-
def get_auth_token(token_url):
306-
"""
307-
Gets token for communication with API from token url.
308-
"""
309-
response = requests.get(token_url)
310-
if not response.ok:
311-
die_with_error(f"Could not obtain API token from {token_url}", response.text, response.status_code)
312-
return response.json()["token"]
313-
314-
315-
def get_auth_session(token):
316-
"""
317-
Creates session which overwrites the BA credentials set in the ~/.netrc file by auth token.
318-
"""
319-
token_session = Session()
320-
token_session.trust_env = False # need to overwrite the authorization header, otherwise BA is used
321-
token_session.headers.update({"Authorization": f"Bearer {token}"})
322-
return token_session
323-
324-
325246
def update_catalogue_entry(stac_host, entry_id, json_data, auth_token=None):
326247
"""
327248
Updates stac entry by fully rewriting it
328249
"""
329250
url = f"{stac_host}/collections/{COLLECTION}/items/{entry_id}"
330251
print(f"Overwriting existing product entry in STAC catalogue.")
331252

332-
token = auth_token or get_auth_token(f"{stac_host}/auth")
253+
token = auth_token or get_auth_token(f"{stac_host}/auth", PRODUCT_ID)
333254
token_session = get_auth_session(token)
334255

335256
response = token_session.put(url, data=json_data)
336257
if not response.ok:
337-
die_with_error(f"Could not remove existing product from catalogue.", response.text, response.status_code)
258+
die_with_error(PRODUCT_ID, f"Could not remove existing product from catalogue.", response.text, response.status_code)
338259

339260

340261
def upload_to_catalogue(stac_host, stac_filepath, overwrite=False):
@@ -345,7 +266,7 @@ def upload_to_catalogue(stac_host, stac_filepath, overwrite=False):
345266
url = f"{stac_host}/collections/{COLLECTION}/items"
346267
print(f"Uploading STAC data to {url}")
347268

348-
token = get_auth_token(f"{stac_host}/auth")
269+
token = get_auth_token(f"{stac_host}/auth", PRODUCT_ID)
349270

350271
with open(stac_filepath, 'r') as file:
351272
json_data = file.read()
@@ -361,52 +282,51 @@ def upload_to_catalogue(stac_host, stac_filepath, overwrite=False):
361282
elif response.status_code == 409:
362283
if not overwrite:
363284
# don't die
364-
err_file = ERR_PREFIX + rundate
365-
create_missing_dir(os.path.dirname(err_file))
366-
with open(err_file, 'a') as f:
285+
create_missing_dir(os.path.dirname(ERR_FILE))
286+
with open(ERR_FILE, 'a') as f:
367287
f.write(f"{COLLECTION},{PRODUCT_ID},0,Skipped existing product\n")
368288
print("Product already registered, skipping.")
369289
else:
370290
if response.text and "Feature" in response.text and "ErrorMessage" in response.text:
371291
stac_product_id = response.json().get("ErrorMessage").split(" ")[1]
372-
update_catalogue_entry(stac_host, COLLECTION, stac_product_id, json_data, token)
292+
update_catalogue_entry(stac_host, stac_product_id, json_data, token)
373293
else:
374-
die_with_error("Cannot update existing entry, feature id expected in response not found.")
294+
die_with_error(PRODUCT_ID, "Cannot update existing entry, feature id expected in response not found.")
375295
elif response.status_code == 404:
376-
die_with_error("Wrong URL, or collection does not exist.", response.text, response.status_code)
296+
die_with_error(PRODUCT_ID, "Wrong URL, or collection does not exist.", response.text, response.status_code)
377297
else:
378-
die_with_error(f"Request to upload STAC file failed", response.text, response.status_code)
298+
die_with_error(PRODUCT_ID, f"Request to upload STAC file failed", response.text, response.status_code)
379299

380300

381301
def main():
382302
args = parse_arguments()
383-
config = read_configuration()
303+
config = read_configuration(CONFIG_FILE)
384304
global PRODUCT_ID
385305
PRODUCT_ID = args.productId
386306

387307
sentinel_host = args.sentinelHost or config.get("SENTINEL_HOST")
388308
stac_host = args.stacHost or config.get("STAC_HOST")
389309

390310
if args.save and config.get("LOCAL_DIR") is None and args.localDir is None:
391-
die_with_error("Flag --save was provided, but LOCAL_DIR option not configured and not specified "
311+
die_with_error(PRODUCT_ID, "Flag --save was provided, but LOCAL_DIR option not configured and not specified "
392312
"in the --localDir argument!")
393313

394314
stac_storage = args.localDir or os.path.join(config.get("LOCAL_DIR"), "register_stac")
395315
if stac_storage is not None:
396316
if not os.path.isabs(stac_storage):
397-
die_with_error("Valid path not used for the stac storage argument - expected an absolute directory path!")
317+
die_with_error(PRODUCT_ID, "Valid path not used for the stac storage argument - expected an absolute directory path!")
398318
create_missing_dir(os.path.dirname(stac_storage))
399319

400320
global SUCC_PREFIX, ERR_PREFIX
401321
SUCC_PREFIX = config.get("SUCC_PREFIX")
402322
ERR_PREFIX = config.get("ERR_PREFIX")
403323
if args.push and (SUCC_PREFIX is None or ERR_PREFIX is None):
404-
die_with_error("Flag --push was provided, but SUCC_PREFIX and ERR_PREFIX need to be set in the configuration "
324+
die_with_error(PRODUCT_ID, "Flag --push was provided, but SUCC_PREFIX and ERR_PREFIX need to be set in the configuration "
405325
"file for logging!")
406326

407327
salt = config.get("SALT")
408328
if args.push and not stac_host:
409-
die_with_error('--push requires --stacHost argument or STAC_HOST configuration option to be set!')
329+
die_with_error(PRODUCT_ID, '--push requires --stacHost argument or STAC_HOST configuration option to be set!')
410330

411331
check_hosts(sentinel_host, stac_host, args.push)
412332

@@ -443,7 +363,7 @@ def main():
443363
else:
444364
raise Exception(f"Unknown platform {platform}")
445365
except Exception as e:
446-
die_with_error(e.args[0] if e.args and len(str(e.args[0])) > 5 else str(e))
366+
die_with_error(PRODUCT_ID, e.args[0] if e.args and len(str(e.args[0])) > 5 else str(e))
447367

448368
stac_storage = stac_storage if args.save else metadata_dir
449369
stac_filepath = os.path.join(stac_storage, "{}.json".format(item.id))

0 commit comments

Comments
 (0)