Skip to content
This repository was archived by the owner on Feb 26, 2025. It is now read-only.

Commit ef7dc4f

Browse files
committed
Polish and add code comments
1 parent 858d89e commit ef7dc4f

File tree

3 files changed

+74
-27
lines changed

3 files changed

+74
-27
lines changed

commands/__init__.py

+9
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import concurrent.futures
12
import os
23

34
import backoff
@@ -70,3 +71,11 @@ def records_equal(a, b):
7071
ra = {k: v for k, v in a.items() if k not in ignored_fields}
7172
rb = {k: v for k, v in b.items() if k not in ignored_fields}
7273
return ra == rb
74+
75+
76+
def call_parallel(func, args_list, max_workers=4):
77+
results = []
78+
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
79+
futures = [executor.submit(func, *args) for args in args_list]
80+
results = [future.result() for future in futures]
81+
return results

commands/build_bundles.py

+42-21
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,10 @@
1-
import concurrent.futures
1+
"""
2+
This command will create Zip files in order to bundle all collections data,
3+
and all attachments of collections that have the `attachment.bundle` flag in
4+
their metadata.
5+
It then uploads these zip files to Google Cloud Storage.
6+
"""
7+
28
import io
39
import json
410
import os
@@ -8,27 +14,26 @@
814
import requests
915
from google.cloud import storage
1016

11-
from . import KintoClient, retry_timeout
17+
from . import KintoClient, call_parallel, retry_timeout
1218

1319

1420
SERVER = os.getenv("SERVER")
15-
REQUESTS_PARALLEL_COUNT = int(os.getenv("REQUESTS_PARALLEL_COUNT", "4"))
21+
REQUESTS_PARALLEL_COUNT = int(os.getenv("REQUESTS_PARALLEL_COUNT", "8"))
1622
BUNDLE_MAX_SIZE_BYTES = int(os.getenv("BUNDLE_MAX_SIZE_BYTES", "20_000_000"))
17-
BUILD_ALL = os.getenv("BUILD_ALL", "0") in "1yY"
18-
STORAGE_BUCKET_NAME = os.getenv("STORAGE_BUCKET_NAME", "rs-attachments")
23+
STORAGE_BUCKET_NAME = os.getenv("STORAGE_BUCKET_NAME", "remote-settings-nonprod-stage-attachments")
1924
DESTINATION_FOLDER = os.getenv("DESTINATION_FOLDER", "bundles")
25+
# Flags for local development
26+
BUILD_ALL = os.getenv("BUILD_ALL", "0") in "1yY"
2027
SKIP_UPLOAD = os.getenv("SKIP_UPLOAD", "0") in "1yY"
2128

2229

23-
def call_parallel(func, args_list):
24-
results = []
25-
with concurrent.futures.ThreadPoolExecutor(max_workers=REQUESTS_PARALLEL_COUNT) as executor:
26-
futures = [executor.submit(func, *args) for args in args_list]
27-
results = [future.result() for future in futures]
28-
return results
29-
30-
3130
def fetch_all_changesets(client):
31+
"""
32+
Return the `/changeset` responses for all collections listed
33+
in the `monitor/changes` endpoint.
34+
The result contains the metadata and all the records of all collections
35+
for both preview and main buckets.
36+
"""
3237
random_cache_bust = random.randint(999999000000, 999999999999)
3338
monitor_changeset = client.get_changeset("monitor", "changes", random_cache_bust)
3439
print("%s collections" % len(monitor_changeset["changes"]))
@@ -37,7 +42,7 @@ def fetch_all_changesets(client):
3742
(c["bucket"], c["collection"], c["last_modified"]) for c in monitor_changeset["changes"]
3843
]
3944
all_changesets = call_parallel(
40-
lambda bid, cid, ts: client.get_changeset(bid, cid, ts), args_list
45+
lambda bid, cid, ts: client.get_changeset(bid, cid, ts), args_list, REQUESTS_PARALLEL_COUNT
4146
)
4247
return [
4348
{"bucket": bid, **changeset} for (bid, _, _), changeset in zip(args_list, all_changesets)
@@ -51,7 +56,11 @@ def fetch_attachment(url):
5156
return resp.content
5257

5358

54-
def write_zip(output_path, content):
59+
def write_zip(output_path: str, content: list[tuple[str, bytes]]):
60+
"""
61+
Write a Zip at the specified `output_path` location with the specified `content`.
62+
The content is specified as a list of file names and their binary content.
63+
"""
5564
parent_folder = os.path.dirname(output_path)
5665
os.makedirs(parent_folder, exist_ok=True)
5766

@@ -64,11 +73,15 @@ def write_zip(output_path, content):
6473
print("Wrote %r" % output_path)
6574

6675

67-
def sync_cloud_storage(folder):
76+
def sync_cloud_storage(folder, storage_bucket):
77+
"""
78+
Synchronizes a local folder (eg. `bundles/`) with a remote one in the specified
79+
`storage_bucket` name.
80+
"""
6881
# Ensure you have set the GOOGLE_APPLICATION_CREDENTIALS environment variable
6982
# to the path of your Google Cloud service account key file before running this script.
7083
client = storage.Client()
71-
bucket = client.bucket(STORAGE_BUCKET_NAME)
84+
bucket = client.bucket(storage_bucket)
7285
local_files = set()
7386
for root, _, files in os.walk(folder):
7487
for file in files:
@@ -77,17 +90,25 @@ def sync_cloud_storage(folder):
7790

7891
blob = bucket.blob(remote_file_path)
7992
blob.upload_from_filename(local_file_path)
80-
print(f"Uploaded {local_file_path} to gs://{STORAGE_BUCKET_NAME}/{remote_file_path}")
93+
print(f"Uploaded {local_file_path} to gs://{storage_bucket}/{remote_file_path}")
8194
local_files.add(remote_file_path)
8295

8396
blobs = bucket.list_blobs(prefix=folder)
8497
for blob in blobs:
8598
if blob.name not in local_files:
8699
blob.delete()
87-
print(f"Deleted gs://{STORAGE_BUCKET_NAME}/{blob.name}")
100+
print(f"Deleted gs://{storage_bucket}/{blob.name}")
88101

89102

90103
def build_bundles(event, context):
104+
"""
105+
Main command entry point that:
106+
- fetches all collections changesets
107+
- builds a `bundles/changesets.zip`
108+
- fetches attachments of all collections with bundle flag
109+
- builds `bundles/{bid}--{cid}.zip` for each of them
110+
- synchronizes the `bundles/` folder with a remote Cloud storage bucket
111+
"""
91112
rs_server = event.get("server") or SERVER
92113

93114
client = KintoClient(server_url=rs_server)
@@ -127,12 +148,12 @@ def build_bundles(event, context):
127148

128149
# Fetch all attachments and build "{bid}--{cid}.zip"
129150
args_list = [(f'{base_url}{r["attachment"]["location"]}',) for r in records]
130-
all_attachments = call_parallel(fetch_attachment, args_list)
151+
all_attachments = call_parallel(fetch_attachment, args_list, REQUESTS_PARALLEL_COUNT)
131152
write_zip(
132153
f"{DESTINATION_FOLDER}/{bid}--{cid}.zip",
133154
[(f'{record["id"]}.meta.json', json.dumps(record)) for record in records]
134155
+ [(record["id"], attachment) for record, attachment in zip(records, all_attachments)],
135156
)
136157

137158
if not SKIP_UPLOAD:
138-
sync_cloud_storage(DESTINATION_FOLDER)
159+
sync_cloud_storage(DESTINATION_FOLDER, STORAGE_BUCKET_NAME)

tests/test_build_bundles.py

+23-6
Original file line numberDiff line numberDiff line change
@@ -129,21 +129,36 @@ def test_build_bundles(mock_fetch_all_changesets, mock_write_zip, mock_sync_clou
129129
mock_fetch_all_changesets.return_value = [
130130
{
131131
"bucket": "bucket1",
132-
"changes": [{"id": "record1", "attachment": {"location": "file.jpg", "size": 10}}],
132+
"changes": [
133+
{"id": "record1", "attachment": {"location": "file.jpg", "size": 10}},
134+
{"id": "record2"},
135+
],
133136
"metadata": {"id": "collection1", "attachment": {"bundle": True}},
134-
}
137+
},
138+
{ # collection without bundle flag
139+
"bucket": "bucket2",
140+
"changes": [{"id": "record2"}],
141+
"metadata": {"id": "collection2"},
142+
},
143+
{ # collection without attachments
144+
"bucket": "bucket3",
145+
"changes": [{"id": "record3"}],
146+
"metadata": {"id": "collection3", "attachment": {"bundle": True}},
147+
},
135148
]
136149

137150
build_bundles(event, context={})
138151

139-
assert mock_write_zip.call_count == 2 # One for changesets and one for the attachments
152+
assert mock_write_zip.call_count == 2 # One for changesets and only one for the attachments
140153
calls = mock_write_zip.call_args_list
141154

142155
# Assert the first call (changesets.zip)
143156
changesets_zip_path, changesets_zip_files = calls[0][0]
144157
assert changesets_zip_path == "bundles/changesets.zip"
145-
assert len(changesets_zip_files) == 1
158+
assert len(changesets_zip_files) == 3
146159
assert changesets_zip_files[0][0] == "bucket1--collection1.json"
160+
assert changesets_zip_files[1][0] == "bucket2--collection2.json"
161+
assert changesets_zip_files[2][0] == "bucket3--collection3.json"
147162

148163
# Assert the second call (attachments zip)
149164
attachments_zip_path, attachments_zip_files = calls[1][0]
@@ -153,7 +168,9 @@ def test_build_bundles(mock_fetch_all_changesets, mock_write_zip, mock_sync_clou
153168
assert attachments_zip_files[1][0] == "record1"
154169
assert attachments_zip_files[1][1] == b"jpeg_content"
155170

156-
mock_sync_cloud_storage.assert_called_once_with("bundles")
171+
mock_sync_cloud_storage.assert_called_once_with(
172+
"bundles", "remote-settings-nonprod-stage-attachments"
173+
)
157174

158175

159176
@patch("commands.build_bundles.os.walk")
@@ -173,7 +190,7 @@ def test_sync_cloud_storage_upload_and_delete(mock_os_walk, mock_storage_client,
173190
mock_blob3.name = f"{local_folder}/file3.txt"
174191
bucket.list_blobs.return_value = [mock_blob1, mock_blob2, mock_blob3]
175192

176-
sync_cloud_storage(local_folder)
193+
sync_cloud_storage(local_folder, "remote-bucket")
177194

178195
# Check uploads
179196
mock_blob1.upload_from_filename.assert_called_once_with(f"{local_folder}/file1.txt")

0 commit comments

Comments
 (0)