Skip to content

Commit

Permalink
add github-envs for tinybird upload scripts (localstack#8131)
Browse files Browse the repository at this point in the history
  • Loading branch information
steffyP authored Apr 19, 2023
1 parent 13a1419 commit 86a5da8
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 37 deletions.
3 changes: 2 additions & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,8 @@ jobs:
source .venv/bin/activate
mkdir parity_metrics && mv target/metric_reports/metric-report-raw-data-*amd64*.csv parity_metrics
METRIC_REPORT_DIR_PATH=parity_metrics \
COMMUNITY_IMPL_COV_PATH=scripts/implementation_coverage_full.csv \
IMPLEMENTATION_COVERAGE_FILE=scripts/implementation_coverage_full.csv \
SOURCE_TYPE=community \
python -m scripts.tinybird.upload_raw_test_metrics_and_coverage
- store_artifacts:
path: parity_metrics/
Expand Down
4 changes: 3 additions & 1 deletion scripts/tinybird/retrieve_legacy_data_from_circleci.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,9 @@ def collect_workflows_past_30_days():
os.environ["CIRCLE_WORKFLOW_ID"] = str(workflow_id)

# trigger the tinybird_upload
send_metric_report(metric_report_file_path, timestamp)
send_metric_report(
metric_report_file_path, source_type="community", timestamp=timestamp
)
send_implemented_coverage(
community_coverage_file_path, timestamp=timestamp, type="community"
)
Expand Down
103 changes: 68 additions & 35 deletions scripts/tinybird/upload_raw_test_metrics_and_coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,18 +55,18 @@
service, operation, status_code, error_code, is_implemented*
Additionally, we add the following information:
- *build_id*: the workflow-id (for CircleCI)
- *build_id*: the workflow-id (for CircleCI) or run-id (for GitHub)
- *timestamp*: a timestamp as string which will be the same for the CircleCI run
- *ls_source*: “community” for the CircleCI run, “pro” for the Github action
In order to get more metadata from the build, we also send some general information to tests_raw_builds.datasource:
- *build_id:* the workflow-id (for CircleCI)
- *timestamp:* a timestamp as string which will be the same for the CircleCI run
- *branch:* env value from *`CIRCLE_BRANCH`*
- *build_url:* env value from *`CIRCLE_BUILD_URL`*
- *pull_requests:* env value from *`CIRCLE_PULL_REQUESTS`*
- *build_num:* env value from *`CIRCLE_BUILD_NUM`*
- *workflow_id:* env value from *`CIRCLE_WORKFLOW_ID`*
- *build_id:* the workflow-id (for CircleCI) or run-id (for GitHub)
- *timestamp:* a timestamp as string which will be the same for the CI run
- *branch:* env value from *`CIRCLE_BRANCH`* or *`GITHUB_HEAD_REF`* (only set for pull_requests) or *`GITHUB_REF_NAME`*
- *build_url:* env value from *`CIRCLE_BUILD_URL`* or *$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID*
- *pull_requests:* env value from *`CIRCLE_PULL_REQUESTS`* or *`GITHUB_REF`*
- *build_num:* env value from *`CIRCLE_BUILD_NUM`* (empty for GitHub, there seems to be no equivalent)
- *workflow_id:* env value from *`CIRCLE_WORKFLOW_ID`* or *`GITHUB_RUN_ID`*
"""

import csv
Expand Down Expand Up @@ -138,21 +138,53 @@ def send_metadata_for_build(build_id: str, timestamp: str):
CIRCLE_BUILD_NUM=78206
CIRCLE_BUILD_URL=https://circleci.com/gh/localstack/localstack/78206
CIRCLE_WORKFLOW_ID=b86a4bc4-bcd1-4170-94d6-4af66846c1c1
GitHub env examples:
GITHUB_REF=ref/heads/master or ref/pull/<pr_number>/merge (will be used for 'pull_requests')
GITHUB_HEAD_REF=tinybird_data (used for 'branch', set only for pull_requests)
GITHUB_REF_NAME=feature-branch-1 (will be used for 'branch' if GITHUB_HEAD_REF is not set)
GITHUB_RUN_ID=1658821493 (will be used for 'workflow_id')
workflow run's URL (will be used for 'build_url'):
$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID
could not find anything that corresponds to "build_num" (number of current job in CircleCI)
-> leaving it blank for Github
"""
# on GitHub the GITHUB_HEAD_REF is only set for pull_request, else we use the GITHUB_REF_NAME
branch = (
os.environ.get("CIRCLE_BRANCH", "")
or os.environ.get("GITHUB_HEAD_REF", "")
or os.environ.get("GITHUB_REF_NAME", "")
)
workflow_id = os.environ.get("CIRCLE_WORKFLOW_ID", "") or os.environ.get("GITHUB_RUN_ID", "")

build_url = os.environ.get("CIRCLE_BUILD_URL", "")
if not build_url and os.environ.get("GITHUB_SERVER_URL"):
# construct the build-url for Github
server = os.environ.get("GITHUB_SERVER_URL", "")
repo = os.environ.get("GITHUB_REPOSITORY", "")
build_url = f"{server}/{repo}/actions/runs/{workflow_id}"

pull_requests = os.environ.get("CIRCLE_PULL_REQUESTS", "") or os.environ.get("GITHUB_REF", "")
build_num = os.environ.get(
"CIRCLE_BUILD_NUM", ""
) # TODO could not find equivalent job-id ENV in github

data = {
"build_id": build_id,
"timestamp": timestamp,
"branch": os.environ.get("CIRCLE_BRANCH", ""),
"build_url": os.environ.get("CIRCLE_BUILD_URL", ""),
"pull_requests": os.environ.get("CIRCLE_PULL_REQUESTS", ""),
"build_num": os.environ.get("CIRCLE_BUILD_NUM", ""),
"workflow_id": os.environ.get("CIRCLE_WORKFLOW_ID", ""),
"branch": branch,
"build_url": build_url,
"pull_requests": pull_requests,
"build_num": build_num,
"workflow_id": workflow_id,
}
data_to_send = [json.dumps(data)]
send_data_to_tinybird(data_to_send, data_name=DATA_SOURCE_RAW_BUILDS)


def send_metric_report(metric_path: str, timestamp: str):
def send_metric_report(metric_path: str, source_type: str, timestamp: str):
"""
SCHEMA >
Expand All @@ -172,7 +204,7 @@ def send_metric_report(metric_path: str, timestamp: str):
"""
tmp: list[str] = []
count: int = 0
build_id = os.environ.get("CIRCLE_WORKFLOW_ID", "")
build_id = os.environ.get("CIRCLE_WORKFLOW_ID", "") or os.environ.get("GITHUB_RUN_ID", "")
send_metadata_for_build(build_id, timestamp)

pathlist = Path(metric_path).rglob("metric-report-raw-data-*.csv")
Expand All @@ -187,7 +219,7 @@ def send_metric_report(metric_path: str, timestamp: str):
# add timestamp, build_id, ls_source
row["timestamp"] = timestamp
row["build_id"] = build_id
row["ls_source"] = "community"
row["ls_source"] = source_type

# remove data we are currently not interested in
for field in data_to_remove:
Expand Down Expand Up @@ -226,6 +258,7 @@ def send_implemented_coverage(file: str, timestamp: str, type: str):
tmp: list[str] = []
count: int = 0

build_id = os.environ.get("CIRCLE_WORKFLOW_ID", "") or os.environ.get("GITHUB_RUN_ID", "")
with open(file, "r") as csv_obj:
reader_obj = csv.DictReader(csv_obj)
for row in reader_obj:
Expand All @@ -240,7 +273,7 @@ def send_implemented_coverage(file: str, timestamp: str, type: str):
# add timestamp and source
row["timestamp"] = timestamp
row["ls_source"] = type
row["build_id"] = os.environ.get("CIRCLE_WORKFLOW_ID", "")
row["build_id"] = build_id

tmp.append(json.dumps(row))
if len(tmp) == 500:
Expand All @@ -257,37 +290,37 @@ def send_implemented_coverage(file: str, timestamp: str, type: str):
def main():
token = os.environ.get("TINYBIRD_PARITY_ANALYTICS_TOKEN", "")
metric_report_dir = os.environ.get("METRIC_REPORT_DIR_PATH", "")
community_impl_coverage = os.environ.get("COMMUNITY_IMPL_COV_PATH", "")

impl_coverage_file = os.environ.get("IMPLEMENTATION_COVERAGE_FILE", "")
source_type = os.environ.get("SOURCE_TYPE", "")
missing_info = (
"missing data, please check the available ENVs that are required to run the script"
)
if not token:
print(missing_info)
print("missing TINYBIRD_PARITY_ANALYTICS_TOKEN")
return
print(
f"METRIC_REPORT_DIR_PATH={metric_report_dir}, IMPLEMENTATION_COVERAGE_FILE={impl_coverage_file}, "
f"SOURCE_TYPE={source_type}"
)
if not metric_report_dir:
print(missing_info)
print("missing METRIC_REPORT_DIR_PATH")
return
if not community_impl_coverage:
if not impl_coverage_file:
print(missing_info)
print("missing IMPLEMENTATION_COVERAGE_FILE")
return
if not source_type:
print(missing_info)
print("missing COMMUNITY_IMPL_COV_PATH")
print("missing SOURCE_TYPE")
return
if not token:
print(missing_info)
print("missing TINYBIRD_PARITY_ANALYTICS_TOKEN")
return

# create one timestamp that will be used for all the data sent
timestamp: str = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")

# TODO re-enable pro tests (but these should be sent by the pro pipeline)
# pro_impl_coverage = os.environ.get("PRO_IMPL_COV_PATH", "")
# if not pro_impl_coverage:
# print(missing_info)
# print("missing PRO_IMPL_COV_PATH")
# return
# send_implemented_coverage(pro_impl_coverage, timestamp=timestamp, type="pro")

send_metric_report(metric_report_dir, timestamp)
send_implemented_coverage(community_impl_coverage, timestamp=timestamp, type="community")
send_metric_report(metric_report_dir, source_type=source_type, timestamp=timestamp)
send_implemented_coverage(impl_coverage_file, timestamp=timestamp, type=source_type)


if __name__ == "__main__":
Expand Down

0 comments on commit 86a5da8

Please sign in to comment.