Skip to content

Commit 57427de

Browse files
Merge pull request #28 from aiven/tsharju-db-size-check
add check for maximum databases size to migrate #28
2 parents a6bae8b + f6d6b4d commit 57427de

File tree

5 files changed

+206
-7
lines changed

5 files changed

+206
-7
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,8 @@ Use `--no-replicate-extension-tables` to skip extension tables. By default it a
122122

123123
With `--force-method` you can specify if you wish to use either replication or dump method. Otherwise the most suitable method is chosen automatically.
124124

125+
Using `--dbs-max-total-size` together with `--validate` you can check if the size of the source database in below some threshold.
126+
125127
### API example
126128

127129
Migrating from AWS RDS to Aiven for PostgreSQL. Logical replication is enabled in source AWS RDS PostgreSQL

aiven_db_migrate/migrate/pgmigrate.py

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,18 @@ def mangle_db_name(self, db_name: str) -> str:
389389

390390
class PGSource(PGCluster):
391391
"""Source PostgreSQL cluster"""
392+
def get_size(self, *, dbname, only_tables: Optional[List[str]] = None) -> float:
393+
if only_tables == []:
394+
return 0
395+
if only_tables is not None:
396+
query = "SELECT SUM(pg_total_relation_size(tablename)) AS size FROM UNNEST(%s) AS tablename"
397+
args = [only_tables]
398+
else:
399+
query = "SELECT pg_database_size(oid) AS size FROM pg_catalog.pg_database WHERE datname = %s"
400+
args = [dbname]
401+
result = self.c(query, args=args, dbname=dbname)
402+
return result[0]["size"] or 0
403+
392404
def create_publication(self, *, dbname: str, only_tables: Optional[List[str]] = None) -> str:
393405
mangled_name = self.mangle_db_name(dbname)
394406
pubname = f"aiven_db_migrate_{mangled_name}_pub"
@@ -766,7 +778,7 @@ def _convert_table_names(self, tables: Optional[List[str]]) -> Set[PGTable]:
766778
)
767779
return ret
768780

769-
def filter_tables(self, db: PGDatabase) -> List[str]:
781+
def filter_tables(self, db: PGDatabase) -> Optional[List[str]]:
770782
"""
771783
Given a database, it will attempt to return a list of tables that should be data dumped / replicated
772784
based on the skip table list, with table list and the replicate extensions flag
@@ -779,7 +791,7 @@ def filter_tables(self, db: PGDatabase) -> List[str]:
779791
"Filtering tables for db %r, and skip tables %r and with tables %r", db, self.skip_tables, self.with_tables
780792
)
781793
if not self.skip_tables and not self.with_tables and self.replicate_extensions:
782-
return []
794+
return None
783795
if not db.tables:
784796
return []
785797
ret: Set[PGTable] = set()
@@ -852,6 +864,17 @@ def _check_databases(self):
852864
else:
853865
self.log.info("Database %r already exists in target", dbname)
854866

867+
def _check_database_size(self, max_size: float):
868+
dbs_size = 0
869+
for dbname, source_db in self.source.databases.items():
870+
only_tables = self.filter_tables(db=source_db)
871+
db_size = self.source.get_size(dbname=dbname, only_tables=only_tables)
872+
dbs_size += db_size
873+
if dbs_size > max_size:
874+
raise PGMigrateValidationFailedError(
875+
f"Databases do not fit to the required maximum size ({dbs_size} > {max_size})"
876+
)
877+
855878
def _check_pg_lang(self):
856879
source_lang = {lan["lanname"] for lan in self.source.pg_lang}
857880
target_lang = {lan["lanname"] for lan in self.target.pg_lang}
@@ -1053,7 +1076,8 @@ def _dump_data(self, *, db: PGDatabase) -> PGMigrateStatus:
10531076
"--data-only",
10541077
self.source.conn_str(dbname=dbname),
10551078
]
1056-
pg_dump_cmd.extend([f"--table={w}" for w in self.filter_tables(db)])
1079+
tables = self.filter_tables(db) or []
1080+
pg_dump_cmd.extend([f"--table={w}" for w in tables])
10571081
subtask: PGSubTask = self._pg_dump_pipe_psql(
10581082
pg_dump_cmd=pg_dump_cmd, target_conn_str=self.target.conn_str(dbname=dbname)
10591083
)
@@ -1076,7 +1100,8 @@ def _db_replication(self, *, db: PGDatabase) -> PGMigrateStatus:
10761100
dbname = db.dbname
10771101
pubname = slotname = subname = None
10781102
try:
1079-
pubname = self.source.create_publication(dbname=dbname, only_tables=self.filter_tables(db))
1103+
tables = self.filter_tables(db) or []
1104+
pubname = self.source.create_publication(dbname=dbname, only_tables=tables)
10801105
slotname = self.source.create_replication_slot(dbname=dbname)
10811106
subname = self.target.create_subscription(
10821107
conn_str=self.source.conn_str(dbname=dbname), pubname=pubname, slotname=slotname, dbname=dbname
@@ -1129,7 +1154,7 @@ def _db_migrate(self, *, pgtask: PGMigrateTask) -> PGMigrateStatus:
11291154
pgtask.method = PGMigrateMethod.dump
11301155
return self._dump_data(db=pgtask.source_db)
11311156

1132-
def validate(self):
1157+
def validate(self, dbs_max_total_size: Optional[float] = None):
11331158
"""
11341159
Do best effort validation whether all the bits and pieces are in place for migration to succeed.
11351160
* Migrating to same server is not supported (doable but requires obviously different dbname)
@@ -1154,6 +1179,8 @@ def validate(self):
11541179
# but it can be newer than the source version: source <= pgdump <= target
11551180
self.pgbin = find_pgbin_dir(str(self.source.version), max_pgversion=str(self.target.version))
11561181
self._check_databases()
1182+
if dbs_max_total_size is not None:
1183+
self._check_database_size(max_size=dbs_max_total_size)
11571184
self._check_pg_lang()
11581185
self._check_pg_ext()
11591186
except KeyError as err:
@@ -1302,6 +1329,12 @@ def main(args=None, *, prog="pg_migrate"):
13021329
default=None,
13031330
help="Force the migration method to be used as either replication or dump.",
13041331
)
1332+
parser.add_argument(
1333+
"--dbs-max-total-size",
1334+
type=int,
1335+
default=-1,
1336+
help="Max total size of databases to be migrated, ignored by default",
1337+
)
13051338

13061339
args = parser.parse_args(args)
13071340
log_format = "%(asctime)s\t%(name)s\t%(levelname)s\t%(message)s"
@@ -1332,7 +1365,8 @@ def main(args=None, *, prog="pg_migrate"):
13321365
raise ValueError(f"Unsupported migration method '{args.force_method}'") from e
13331366

13341367
if args.validate:
1335-
pg_mig.validate()
1368+
dbs_max_total_size = None if args.dbs_max_total_size == -1 else args.dbs_max_total_size
1369+
pg_mig.validate(dbs_max_total_size=dbs_max_total_size)
13361370
else:
13371371
result: PGMigrateResult = pg_mig.migrate(force_method=method)
13381372
print()

test/conftest.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from aiven_db_migrate.migrate.pgutils import find_pgbin_dir
66
from contextlib import contextmanager
7+
from copy import copy
78
from datetime import datetime
89
from distutils.version import LooseVersion
910
from pathlib import Path
@@ -423,7 +424,9 @@ def generate_fixtures():
423424
assert version in SUPPORTED_PG_VERSIONS, f"Supported pg versions are: {SUPPORTED_PG_VERSIONS}"
424425
pg_target_versions.append(version)
425426
else:
426-
pg_target_versions = SUPPORTED_PG_VERSIONS
427+
# We do not support PG 9.5 as target
428+
pg_target_versions = copy(SUPPORTED_PG_VERSIONS)
429+
pg_target_versions.remove("9.5")
427430

428431
for source in pg_source_versions:
429432
name_prefix = "pg{}".format(source.replace(".", ""))

test/test_db_size_check.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
from aiven_db_migrate.migrate.pgmigrate import PGMigrate
2+
from test.conftest import PGRunner
3+
from test.utils import random_string
4+
from typing import Tuple
5+
6+
import psycopg2
7+
import pytest
8+
9+
10+
def test_db_size(pg_source_and_target_replication: Tuple[PGRunner, PGRunner]):
11+
source, target = pg_source_and_target_replication
12+
13+
db_name = random_string(6)
14+
other_db_name = random_string(6)
15+
16+
source.create_db(dbname=db_name)
17+
source.create_db(dbname=other_db_name)
18+
19+
pg_mig = PGMigrate(
20+
source_conn_info=source.super_conn_info(),
21+
target_conn_info=target.super_conn_info(),
22+
verbose=True,
23+
)
24+
25+
# Create few tables and insert some data
26+
tables = [f'table_{i}' for i in range(4)]
27+
for dbname in {db_name, other_db_name}:
28+
with source.cursor(dbname=dbname) as c:
29+
for t in tables:
30+
c.execute(f"DROP TABLE IF EXISTS {t}")
31+
c.execute(f"CREATE TABLE {t} (foo INT)")
32+
c.execute(f"INSERT INTO {t} (foo) VALUES (1), (2), (3)")
33+
34+
size = pg_mig.source.get_size(dbname=db_name, only_tables=[])
35+
assert size == 0
36+
37+
size = pg_mig.source.get_size(dbname=db_name)
38+
assert size >= 0 # returns slightly different values per pg version
39+
40+
size = pg_mig.source.get_size(dbname=db_name, only_tables=tables)
41+
assert size == 32768
42+
43+
size = pg_mig.source.get_size(dbname=db_name, only_tables=tables[:1])
44+
assert size == 8192
45+
46+
with pytest.raises(psycopg2.OperationalError):
47+
size = pg_mig.source.get_size(dbname="notfound")

test/test_migrate_checks.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
# Copyright (c) 2021 Aiven, Helsinki, Finland. https://aiven.io/
2+
3+
from aiven_db_migrate.migrate.errors import PGMigrateValidationFailedError
4+
from aiven_db_migrate.migrate.pgmigrate import PGMigrate
5+
from test.conftest import PGRunner
6+
from test.utils import random_string
7+
from typing import Tuple
8+
from unittest.mock import patch
9+
10+
import pytest
11+
12+
13+
def test_dbs_max_total_size_check(pg_source_and_target: Tuple[PGRunner, PGRunner]):
14+
source, target = pg_source_and_target
15+
dbnames = {random_string() for _ in range(3)}
16+
17+
for dbname in dbnames:
18+
source.create_db(dbname=dbname)
19+
target.create_db(dbname=dbname)
20+
21+
# This DB seems to be created outside the tests
22+
dbnames.add("postgres")
23+
24+
# Create few tables and insert some data
25+
tables = [f'table_{i}' for i in range(4)]
26+
for dbname in dbnames:
27+
with source.cursor(dbname=dbname) as c:
28+
for t in tables:
29+
c.execute(f"DROP TABLE IF EXISTS {t}")
30+
c.execute(f"CREATE TABLE {t} (foo INT)")
31+
c.execute(f"INSERT INTO {t} (foo) VALUES (1), (2), (3)")
32+
33+
pg_mig = PGMigrate(
34+
source_conn_info=source.conn_info(),
35+
target_conn_info=target.conn_info(),
36+
createdb=False,
37+
verbose=True,
38+
)
39+
40+
with patch(
41+
"aiven_db_migrate.migrate.pgmigrate.PGMigrate._check_database_size", side_effect=pg_mig._check_database_size
42+
) as mock_db_size_check:
43+
# DB size check is not run
44+
pg_mig.validate()
45+
mock_db_size_check.assert_not_called()
46+
47+
mock_db_size_check.reset_mock()
48+
49+
# DB size check with max size of zero
50+
with pytest.raises(PGMigrateValidationFailedError) as e:
51+
pg_mig.validate(dbs_max_total_size=0)
52+
assert "Databases do not fit to the required maximum size" in str(e)
53+
mock_db_size_check.assert_called_once_with(max_size=0)
54+
55+
mock_db_size_check.reset_mock()
56+
57+
# DB size check with enough size
58+
pg_mig.validate(dbs_max_total_size=1073741824)
59+
mock_db_size_check.assert_called_once_with(max_size=1073741824)
60+
61+
# Test with DB name filtering
62+
pg_mig = PGMigrate(
63+
source_conn_info=source.conn_info(),
64+
target_conn_info=target.conn_info(),
65+
createdb=False,
66+
verbose=True,
67+
filtered_db=",".join(dbnames),
68+
)
69+
70+
with patch(
71+
"aiven_db_migrate.migrate.pgmigrate.PGMigrate._check_database_size", side_effect=pg_mig._check_database_size
72+
) as mock_db_size_check:
73+
# Should pass as all DBs are filtered out from size calculations
74+
pg_mig.validate(dbs_max_total_size=0)
75+
mock_db_size_check.assert_called_once_with(max_size=0)
76+
77+
# Test with table filtering
78+
79+
# Include all tables in "skip_tables"
80+
pg_mig = PGMigrate(
81+
source_conn_info=source.conn_info(),
82+
target_conn_info=target.conn_info(),
83+
createdb=False,
84+
verbose=True,
85+
skip_tables=tables, # skip all tables
86+
)
87+
88+
with patch(
89+
"aiven_db_migrate.migrate.pgmigrate.PGMigrate._check_database_size", side_effect=pg_mig._check_database_size
90+
) as mock_db_size_check:
91+
# Should pass as all tables are filtered out from size calculations
92+
pg_mig.validate(dbs_max_total_size=0)
93+
mock_db_size_check.assert_called_once_with(max_size=0)
94+
95+
# Only the first table is included
96+
pg_mig = PGMigrate(
97+
source_conn_info=source.conn_info(),
98+
target_conn_info=target.conn_info(),
99+
createdb=False,
100+
verbose=True,
101+
with_tables=tables[:1], # include only one table
102+
)
103+
with patch(
104+
"aiven_db_migrate.migrate.pgmigrate.PGMigrate._check_database_size", side_effect=pg_mig._check_database_size
105+
) as mock_db_size_check:
106+
# This fails as one table is included in check and it should have data
107+
with pytest.raises(PGMigrateValidationFailedError) as e:
108+
pg_mig.validate(dbs_max_total_size=0)
109+
assert "Databases do not fit to the required maximum size" in str(e)
110+
mock_db_size_check.assert_called_once_with(max_size=0)
111+
112+
# Should easily fit
113+
pg_mig.validate(dbs_max_total_size=1073741824)

0 commit comments

Comments
 (0)