-
Notifications
You must be signed in to change notification settings - Fork 82
/
Copy pathtasks_export.py
152 lines (122 loc) · 5.38 KB
/
tasks_export.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
from datetime import datetime
import json
import os
from typing import Optional
import click
from flask import current_app as app
from sqlalchemy_continuum.utils import version_class, is_versioned
from main import db
from apps.common.json_export import ExportEncoder
from models import event_year
from . import base
def get_export_data(table_filter: Optional[str] = None):
"""Export data to archive using the `get_export_data` method in the model class."""
# As we go, we check against the list of all tables, in case we forget about some
# new object type (e.g. association table).
# Exclude tables we know will never be exported
ignore = ["alembic_version", "transaction"]
all_model_classes = {
cls
for cls in db.Model.registry._class_registry.values()
if isinstance(cls, type) and issubclass(cls, db.Model)
}
all_version_classes = {
version_class(c) for c in all_model_classes if is_versioned(c)
}
seen_model_classes = set()
remaining_tables = set(db.metadata.tables)
for model_class in sorted(all_model_classes, key=lambda c: c.__name__):
if model_class in seen_model_classes:
continue
seen_model_classes.add(model_class)
table = model_class.__table__.name # type: ignore[attr-defined]
model = model_class.__name__
if table_filter and table != table_filter:
continue
if table in ignore:
app.logger.debug("Ignoring %s", model)
remaining_tables.remove(table)
continue
if not getattr(model_class, "__export_data__", True):
# We don't remove the version table, as we want
# to be explicit about chucking away edit stats
app.logger.debug("Skipping %s", model)
remaining_tables.remove(table)
continue
if model_class in all_version_classes:
# Version tables are explicitly dumped by their parents,
# as they don't make sense to be exported on their own
app.logger.debug("Ignoring version model %s", model)
continue
if hasattr(model_class, "get_export_data"):
try:
export = model_class.get_export_data()
yield model, export
except Exception as e:
app.logger.error("Error exporting %s", model)
raise
exported_tables = export.get("tables", [table])
remaining_tables -= set(exported_tables)
if remaining_tables and not table_filter:
app.logger.warning("Remaining tables: %s", ", ".join(remaining_tables))
elif table_filter in remaining_tables:
app.logger.warning("Table %s not exported", table_filter)
@base.cli.command("export")
@click.argument("table", required=False)
def export_db(table):
"""Export data from the DB to disk.
This command is run as a last step before wiping the DB after an event, to export
all the data we want to save. It saves a private and a public export to the
exports directory.
Model classes should implement get_export_data, which returns a dict with keys:
public Public data to save in git
private Private data that should be stored for a limited amount of time
tables Tables this method exported, used to sanity check the export process
Alternatively, add __export_data__ = False to a class to state that get_export_data
shouldn't be called, and that its associated table doesn't need to be checked.
"""
year = event_year()
path = os.path.join("exports", str(year))
for dirname in ["public", "private"]:
os.makedirs(os.path.join(path, dirname), exist_ok=True)
for model, export in get_export_data(table):
for dirname in ["public", "private"]:
if dirname in export:
filename = os.path.join(path, dirname, "{}.json".format(model))
try:
json.dump(
export[dirname],
open(filename, "w"),
indent=4,
cls=ExportEncoder,
)
except:
app.logger.exception("Error encoding export for %s", model)
raise click.Abort()
app.logger.info("Exported data from %s to %s", model, filename)
data = {
"timestamp": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
}
filename = os.path.join(path, "export.json")
json.dump(data, open(filename, "w"), indent=4, cls=ExportEncoder)
if table:
return
with app.test_client() as client:
for file_type, file_url in (
("frab", f"frab-{year}.xml"),
("frab_json", f"frab-{year}.json"),
("json", f"{year}.json"),
("ics", f"{year}.ics"),
):
url = f"/schedule/{file_url}"
dest_path = os.path.join(path, "public", f"schedule.{file_type}")
response = client.get(url)
if response.status_code != 200:
app.logger.error(
"Error fetching schedule from %s: %s", url, response.status
)
raise click.Abort()
with open(dest_path, "wb") as f:
f.write(response.data)
app.logger.info("Fetched schedule from %s to %s", url, dest_path)
app.logger.info("Export complete, summary written to %s", filename)