Skip to content

Commit 2cddc33

Browse files
authored
add stats reports to tenant (#152)
* add stats reports to tenant * Update contanerize.yaml * add reports * update graph summary endpoint * add csv * Update tenants.py * Update tenants.py * Update tenants.py * Update tenants.py * update yamls * update url * add csv config
1 parent 2248232 commit 2cddc33

File tree

6 files changed

+29
-4
lines changed

6 files changed

+29
-4
lines changed

.github/workflows/contanerize.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ on:
1313
- dev_eco
1414
- v0_generated_code
1515
- 133_dev_sitemaps
16+
- 151-integrate-community-stats-codes
1617
tags:
1718
- "v*.*.*"
1819

dagster/implnets/deployment/compose_local_eco_override.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ services:
5252
- GLEANERIO_SOURCES_FILENAME=${GLEANERIO_SOURCES_FILENAME:-gleanerconfig.yaml}
5353
- GLEANERIO_TENANT_FILENAME=${GLEANERIO_TENANT_FILENAME:-tenant.yaml}
5454
- GLEANERIO_WORKSPACE_CONFIG_PATH=${GLEANERIO_WORKSPACE_CONFIG_PATH}
55+
- GLEANERIO_CSV_CONFIG_URL=${GLEANERIO_CSV_CONFIG_URL:-https://docs.google.com/spreadsheets/d/e/2PACX-1vTt_45dYd5LMFK9Qm_lCg6P7YxG-ae0GZEtrHMZmNbI-y5tVDd8ZLqnEeIAa-SVTSztejfZeN6xmRZF/pub?gid=1340502269&single=true&output=csv}
5556
- ECRR_MINIO_BUCKET=${ECRR_MINIO_BUCKET}
5657
- ECRR_GRAPH_NAMESPACE=${ECRR_GRAPH_NAMESPACE}
5758
- PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python

dagster/implnets/deployment/compose_project.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ services:
122122
- GLEANERIO_SOURCES_FILENAME=${GLEANERIO_SOURCES_FILENAME:-gleanerconfig.yaml}
123123
- GLEANERIO_TENANT_FILENAME=${GLEANERIO_TENANT_FILENAME:-tenant.yaml}
124124
- GLEANERIO_WORKSPACE_CONFIG_PATH=${GLEANERIO_WORKSPACE_CONFIG_PATH}
125+
- GLEANERIO_CSV_CONFIG_URL=${GLEANERIO_CSV_CONFIG_URL:-https://docs.google.com/spreadsheets/d/e/2PACX-1vTt_45dYd5LMFK9Qm_lCg6P7YxG-ae0GZEtrHMZmNbI-y5tVDd8ZLqnEeIAa-SVTSztejfZeN6xmRZF/pub?gid=1340502269&single=true&output=csv}
125126
- PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
126127
- SLACK_CHANNEL=${SLACK_CHANNEL:-"#twitterfeed"}
127128
- SLACK_TOKEN=${SLACK_TOKEN}

dagster/implnets/deployment/compose_project_eco_override.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ services:
4646
- GLEANERIO_SOURCES_FILENAME=${GLEANERIO_SOURCES_FILENAME:-gleanerconfig.yaml}
4747
- GLEANERIO_TENANT_FILENAME=${GLEANERIO_TENANT_FILENAME:-tenant.yaml}
4848
- GLEANERIO_WORKSPACE_CONFIG_PATH=${GLEANERIO_WORKSPACE_CONFIG_PATH}
49+
- GLEANERIO_CSV_CONFIG_URL=${GLEANERIO_CSV_CONFIG_URL:-https://docs.google.com/spreadsheets/d/e/2PACX-1vTt_45dYd5LMFK9Qm_lCg6P7YxG-ae0GZEtrHMZmNbI-y5tVDd8ZLqnEeIAa-SVTSztejfZeN6xmRZF/pub?gid=1340502269&single=true&output=csv}
4950
- ECRR_MINIO_BUCKET=${ECRR_MINIO_BUCKET}
5051
- ECRR_GRAPH_NAMESPACE=${ECRR_GRAPH_NAMESPACE}
5152
- PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python

dagster/implnets/deployment/envFile.env

+3
Original file line numberDiff line numberDiff line change
@@ -107,3 +107,6 @@ ECRR_MINIO_BUCKET=ecrr
107107
SLACK_CHANNEL="#production_discussion"
108108
#SLACK_CHANNEL="#twitterfeed"
109109
SLACK_TOKEN=
110+
111+
GLEANERIO_CSV_CONFIG_URL=https://docs.google.com/spreadsheets/d/e/2PACX-1vTt_45dYd5LMFK9Qm_lCg6P7YxG-ae0GZEtrHMZmNbI-y5tVDd8ZLqnEeIAa-SVTSztejfZeN6xmRZF/pub?gid=1340502269&single=true&output=csv
112+

dagster/implnets/workflows/tasks/tasks/assets/tenants.py

+22-4
Original file line numberDiff line numberDiff line change
@@ -18,24 +18,34 @@
1818
from ec.datastore import s3
1919
from distutils import util
2020
from ..resources.gleanerS3 import _pythonMinioAddress
21+
from ec.reporting.report import generateReportStats
2122

2223
GLEANER_MINIO_ADDRESS = os.environ.get('GLEANERIO_MINIO_ADDRESS')
2324
GLEANER_MINIO_PORT = os.environ.get('GLEANERIO_MINIO_PORT')
2425
GLEANER_MINIO_USE_SSL = bool(util.strtobool(os.environ.get('GLEANERIO_MINIO_USE_SSL', 'true')))
2526
GLEANER_MINIO_SECRET_KEY = os.environ.get('GLEANERIO_MINIO_SECRET_KEY')
2627
GLEANER_MINIO_ACCESS_KEY = os.environ.get('GLEANERIO_MINIO_ACCESS_KEY')
2728
GLEANER_MINIO_BUCKET = os.environ.get('GLEANERIO_MINIO_BUCKET')
29+
GLEANERIO_GRAPH_URL = os.environ.get('GLEANERIO_GRAPH_URL')
30+
GLEANERIO_GRAPH_SUMMARY_NAMESPACE = os.environ.get('GLEANERIO_GRAPH_SUMMARY_NAMESPACE')
31+
GLEANERIO_CSV_CONFIG_URL = os.environ.get('GLEANERIO_CSV_CONFIG_URL')
2832

2933
MINIO_OPTIONS={"secure":GLEANER_MINIO_USE_SSL
3034

3135
,"access_key": GLEANER_MINIO_ACCESS_KEY
3236
,"secret_key": GLEANER_MINIO_SECRET_KEY
3337
}
38+
39+
def _graphSummaryEndpoint(community):
40+
if community == "all":
41+
url = f"{GLEANERIO_GRAPH_URL}/namespace/{GLEANERIO_GRAPH_SUMMARY_NAMESPACE}/sparql"
42+
else:
43+
url = f"{GLEANERIO_GRAPH_URL}/namespace/{community}_summary/sparql"
44+
return url
3445
@asset(group_name="community",key_prefix="task",
3546
required_resource_keys={"triplestore"})
3647
def task_tenant_sources(context) ->Any:
3748
s3_resource = context.resources.triplestore.s3
38-
3949
t=s3_resource.getTennatInfo()
4050
tenants = t['tenant']
4151
listTenants = map (lambda a: {a['community']}, tenants)
@@ -144,11 +154,9 @@ def loadstatsCommunity(context, task_tenant_sources) -> str:
144154
ts = task_tenant_sources
145155
t =list(filter ( lambda a: a['community']== community_code, ts["tenant"] ))
146156
s = t[0]["sources"]
147-
for source in s:
148157

158+
for source in s:
149159
dirs = s3Minio.listPath(GLEANER_MINIO_BUCKET,path=f"{REPORT_PATH}{source}/",recursive=False )
150-
151-
152160
for d in dirs:
153161
latestpath = f"{REPORT_PATH}{source}/latest/"
154162
if (d.object_name.casefold() == latestpath.casefold()) or (d.is_dir == False):
@@ -210,4 +218,14 @@ def loadstatsCommunity(context, task_tenant_sources) -> str:
210218
# s3.upload_fileobj(f, s3.GLEANERIO_MINIO_BUCKET, f"data/all/all_stats.csv")
211219
context.log.info(f"all_stats.csv uploaded using ec.datastore.putReportFile {s3_config.GLEANERIO_MINIO_BUCKET}tenant/{community_code} ")
212220
#return df_csv # now checking return types
221+
222+
context.log.info(f"GLEANERIO_CSV_CONFIG_URL {GLEANERIO_CSV_CONFIG_URL} ")
223+
224+
report = generateReportStats(GLEANERIO_CSV_CONFIG_URL, s3_config.GLEANERIO_MINIO_BUCKET, s3Minio,
225+
_graphSummaryEndpoint(community_code), community_code)
226+
bucket, object = s3Minio.putReportFile(s3_config.GLEANERIO_MINIO_BUCKET, f"tenant/{community_code}",
227+
f"report_stats.json", report)
228+
context.log.info(
229+
f"report_stats.json uploaded using ec.datastore.putReportFile {s3_config.GLEANERIO_MINIO_BUCKET}tenant/{community_code} ")
230+
213231
return df_csv

0 commit comments

Comments
 (0)