|
1 | 1 | from datetime import date
|
2 | 2 | from uuid import uuid4 as uuid
|
| 3 | + |
| 4 | +import watchtower |
3 | 5 | from s3fs import S3FileSystem as s3
|
4 | 6 | from floorist.config import get_config
|
5 |
| -from os import environ |
| 7 | +import os |
| 8 | +from boto3.session import Session |
6 | 9 |
|
7 | 10 | import logging
|
8 | 11 | import pandas.io.sql as sqlio
|
9 | 12 | import psycopg2
|
10 | 13 | import yaml
|
11 | 14 |
|
12 | 15 |
|
13 |
| -def _configure_loglevel(): |
| 16 | +def _get_logger(config): |
| 17 | + logging.basicConfig(level=config.loglevel) |
| 18 | + logger = logging.getLogger(__name__) |
| 19 | + logger.setLevel(config.loglevel) |
14 | 20 |
|
15 |
| - LOGLEVEL = environ.get('LOGLEVEL', 'INFO').upper() |
16 |
| - logging.basicConfig(level=LOGLEVEL) |
| 21 | + if config.cloudwatch_config: |
| 22 | + logger.info("Configuring Cloudwatch logging") |
| 23 | + logger.addHandler(_get_cloudwatch_handler(config.cloudwatch_config)) |
| 24 | + else: |
| 25 | + logger.info("Cloudwatch config not found - skipping") |
17 | 26 |
|
| 27 | + return logger |
18 | 28 |
|
19 |
| -def main(): |
20 | 29 |
|
21 |
| - _configure_loglevel() |
22 |
| - config = get_config() |
23 |
| - |
24 |
| - # Fails if can't connect to S3 or the bucket does not exist |
25 |
| - s3(secret=config.bucket_secret_key, key=config.bucket_access_key, |
26 |
| - client_kwargs={'endpoint_url': config.bucket_url }).ls(config.bucket_name) |
27 |
| - logging.debug('Successfully connected to the S3 bucket') |
28 |
| - |
29 |
| - conn = psycopg2.connect( |
30 |
| - host=config.database_hostname, |
31 |
| - user=config.database_username, |
32 |
| - password=config.database_password, |
33 |
| - database=config.database_name |
34 |
| - ) |
35 |
| - logging.debug('Successfully connected to the database') |
36 |
| - |
37 |
| - dump_count = 0 |
38 |
| - dumped_count = 0 |
39 |
| - |
40 |
| - with open(config.floorplan_filename, 'r') as stream: |
41 |
| - # This try block allows us to proceed if a single SQL query fails |
42 |
| - for row in yaml.safe_load(stream): |
43 |
| - dump_count += 1 |
44 |
| - |
45 |
| - try: |
46 |
| - logging.debug(f"Dumping #{dump_count}: {row['query']} to {row['prefix']}") |
47 |
| - |
48 |
| - data = sqlio.read_sql_query(row['query'], conn) |
49 |
| - target = '/'.join([ |
50 |
| - f"s3://{config.bucket_name}", |
51 |
| - row['prefix'], |
52 |
| - date.today().strftime('year_created=%Y/month_created=%-m/day_created=%-d'), |
53 |
| - f"{uuid()}.parquet" |
54 |
| - ]) |
55 |
| - |
56 |
| - data.to_parquet( |
57 |
| - path=target, |
58 |
| - compression='gzip', |
59 |
| - index=False, |
60 |
| - storage_options={ |
61 |
| - 'secret': config.bucket_secret_key, |
62 |
| - 'key' : config.bucket_access_key, |
63 |
| - 'client_kwargs':{'endpoint_url': config.bucket_url } |
64 |
| - } |
65 |
| - ) |
66 |
| - |
67 |
| - logging.debug(f"Dumped #{dumped_count}: {row['query']} to {row['prefix']}") |
68 |
| - |
69 |
| - dumped_count += 1 |
70 |
| - except Exception as ex: |
71 |
| - logging.exception(ex) |
72 |
| - |
73 |
| - logging.info(f'Dumped {dumped_count} from total of {dump_count}') |
74 |
| - |
75 |
| - conn.close() |
76 |
| - |
77 |
| - if dumped_count != dump_count: |
78 |
| - exit(1) |
| 30 | +def _get_cloudwatch_handler(config): |
| 31 | + aws_access_key_id = config.accessKeyId |
| 32 | + aws_secret_access_key = config.secretAccessKey |
| 33 | + aws_region_name = config.region |
| 34 | + aws_log_group = config.logGroup |
| 35 | + aws_log_stream = os.getenv("AWS_LOG_STREAM", _get_hostname()) |
| 36 | + |
| 37 | + logging.info(f"Configuring watchtower logging (log_group={aws_log_group}, " |
| 38 | + f"stream_name={aws_log_stream})") |
| 39 | + boto3_session = Session( |
| 40 | + aws_access_key_id=aws_access_key_id, |
| 41 | + aws_secret_access_key=aws_secret_access_key, |
| 42 | + region_name=aws_region_name, |
| 43 | + ) |
| 44 | + handler = watchtower.CloudWatchLogHandler(boto3_session=boto3_session, |
| 45 | + stream_name=aws_log_stream, |
| 46 | + log_group=aws_log_group, |
| 47 | + create_log_group=False) |
| 48 | + |
| 49 | + return handler |
| 50 | + |
| 51 | + |
| 52 | +def _get_hostname(): |
| 53 | + return os.uname().nodename |
| 54 | + |
| 55 | + |
| 56 | +def main(): |
| 57 | + config = get_config() |
| 58 | + logger = _get_logger(config) |
| 59 | + |
| 60 | + # Fails if can't connect to S3 or the bucket does not exist |
| 61 | + s3(secret=config.bucket_secret_key, key=config.bucket_access_key, |
| 62 | + client_kwargs={'endpoint_url': config.bucket_url}).ls(config.bucket_name) |
| 63 | + logger.debug('Successfully connected to the S3 bucket') |
| 64 | + |
| 65 | + conn = psycopg2.connect( |
| 66 | + host=config.database_hostname, |
| 67 | + user=config.database_username, |
| 68 | + password=config.database_password, |
| 69 | + database=config.database_name |
| 70 | + ) |
| 71 | + logger.debug('Successfully connected to the database') |
| 72 | + |
| 73 | + dump_count = 0 |
| 74 | + dumped_count = 0 |
| 75 | + |
| 76 | + with open(config.floorplan_filename, 'r') as stream: |
| 77 | + # This try block allows us to proceed if a single SQL query fails |
| 78 | + for row in yaml.safe_load(stream): |
| 79 | + dump_count += 1 |
| 80 | + |
| 81 | + try: |
| 82 | + logger.debug(f"Dumping #{dump_count}: {row['query']} to {row['prefix']}") |
| 83 | + |
| 84 | + data = sqlio.read_sql_query(row['query'], conn) |
| 85 | + target = '/'.join([ |
| 86 | + f"s3://{config.bucket_name}", |
| 87 | + row['prefix'], |
| 88 | + date.today().strftime('year_created=%Y/month_created=%-m/day_created=%-d'), |
| 89 | + f"{uuid()}.parquet" |
| 90 | + ]) |
| 91 | + |
| 92 | + data.to_parquet( |
| 93 | + path=target, |
| 94 | + compression='gzip', |
| 95 | + index=False, |
| 96 | + storage_options={ |
| 97 | + 'secret': config.bucket_secret_key, |
| 98 | + 'key': config.bucket_access_key, |
| 99 | + 'client_kwargs': {'endpoint_url': config.bucket_url} |
| 100 | + } |
| 101 | + ) |
| 102 | + |
| 103 | + logger.debug(f"Dumped #{dumped_count}: {row['query']} to {row['prefix']}") |
| 104 | + |
| 105 | + dumped_count += 1 |
| 106 | + except Exception as ex: |
| 107 | + logger.exception(ex) |
| 108 | + |
| 109 | + logger.info(f'Dumped {dumped_count} from total of {dump_count}') |
| 110 | + |
| 111 | + conn.close() |
| 112 | + |
| 113 | + if dumped_count != dump_count: |
| 114 | + exit(1) |
0 commit comments