Skip to content

Commit

Permalink
Merge pull request #147 from Aiven-Open/kmichel-nested-restore
Browse files Browse the repository at this point in the history
  • Loading branch information
joelynch authored Oct 9, 2023
2 parents 77a2590 + c806009 commit 20e809d
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 5 deletions.
16 changes: 11 additions & 5 deletions astacus/coordinator/plugins/clickhouse/steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,16 +481,22 @@ async def run_step(self, cluster: Cluster, context: StepsContext) -> None:
)
# Tables creation is not parallelized with gather since they can depend on each other
# (although, we could use graphlib more subtly and parallelize what we can).

# If any table was initially created with custom global settings,
# we need to re-enable these custom global settings when creating the table again.
# We can enable these settings unconditionally because they are harmless
# for tables not needing them.
session_id = secrets.token_hex()
for query in [
# If any table was initially created with custom global settings,
# we need to re-enable these custom global settings when creating the table again.
# We can enable these settings unconditionally because they are harmless
# for tables not needing them.
b"SET allow_experimental_geo_types=true",
b"SET allow_experimental_object_type=true",
b"SET allow_suspicious_low_cardinality_types=true",
# If a table was created with flatten_nested=0, we must be careful to not re-create the
# table with flatten_nested=1, since this would recreate the table with a different schema.
# If a table was created with flatten_nested=1, the query in system.tables.create_table_query
# shows the flattened fields, not the original user query with nested fields. This means that
# when re-executing the query, we don't need to re-flatten the fields.
# In both cases, flatten_nested=0 is the right choice when restoring a backup.
b"SET flatten_nested=0",
]:
try:
await self.clients[0].execute(query, session_id=session_id)
Expand Down
43 changes: 43 additions & 0 deletions tests/integration/coordinator/plugins/clickhouse/test_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,31 @@ async def setup_cluster_content(clients: List[HttpClickHouseClient], use_named_c
b"SETTINGS index_granularity=8192 "
b"SETTINGS allow_experimental_object_type=1"
)
# test that we correctly restore nested fields
await clients[0].execute(
b"CREATE TABLE default.nested_not_flatten (thekey UInt32, thedata Nested(a UInt32, b UInt32)) "
b"ENGINE = ReplicatedMergeTree ORDER BY (thekey) "
b"SETTINGS index_granularity=8192 "
b"SETTINGS flatten_nested=0"
)
await clients[0].execute(
b"CREATE TABLE default.nested_flatten (thekey UInt32, thedata Nested(a UInt32, b UInt32)) "
b"ENGINE = ReplicatedMergeTree ORDER BY (thekey) "
b"SETTINGS index_granularity=8192 "
b"SETTINGS flatten_nested=1"
)
await clients[0].execute(
b"CREATE TABLE default.array_tuple_not_flatten (thekey UInt32, thedata Array(Tuple(a UInt32, b UInt32))) "
b"ENGINE = ReplicatedMergeTree ORDER BY (thekey) "
b"SETTINGS index_granularity=8192 "
b"SETTINGS flatten_nested=0"
)
await clients[0].execute(
b"CREATE TABLE default.array_tuple_flatten (thekey UInt32, thedata Array(Tuple(a UInt32, b UInt32))) "
b"ENGINE = ReplicatedMergeTree ORDER BY (thekey) "
b"SETTINGS index_granularity=8192 "
b"SETTINGS flatten_nested=1"
)
# add a table with data in object storage
await clients[0].execute(
b"CREATE TABLE default.in_object_storage (thekey UInt32, thedata String) "
Expand All @@ -170,6 +195,11 @@ async def setup_cluster_content(clients: List[HttpClickHouseClient], use_named_c
await clients[0].execute(b"INSERT INTO default.with_experimental_types VALUES (123, '{\"a\":1}')")
await clients[1].execute(b"INSERT INTO default.with_experimental_types VALUES (456, '{\"b\":2}')")
await clients[2].execute(b"INSERT INTO default.with_experimental_types VALUES (789, '{\"c\":3}')")
# Sample data for nested fields
await clients[0].execute(b"INSERT INTO default.nested_not_flatten VALUES (123, [(4, 5)])")
await clients[0].execute(b"INSERT INTO default.nested_flatten VALUES (123, [4], [5])")
await clients[0].execute(b"INSERT INTO default.array_tuple_not_flatten VALUES (123, [(4, 5)])")
await clients[0].execute(b"INSERT INTO default.array_tuple_flatten VALUES (123, [4], [5])")
# And some object storage data
await clients[0].execute(b"INSERT INTO default.in_object_storage VALUES (123, 'foo')")
await clients[1].execute(b"INSERT INTO default.in_object_storage VALUES (456, 'bar')")
Expand Down Expand Up @@ -235,6 +265,19 @@ async def test_restores_table_with_experimental_types(restored_cluster: List[Cli
assert response == expected_data


@pytest.mark.asyncio
async def test_restores_table_with_nested_fields(restored_cluster: List[ClickHouseClient]) -> None:
client = restored_cluster[0]
response = await client.execute(b"SELECT thekey, thedata FROM default.nested_not_flatten ORDER BY thekey")
assert response == [[123, [{"a": 4, "b": 5}]]]
response = await client.execute(b"SELECT thekey, thedata.a, thedata.b FROM default.nested_flatten ORDER BY thekey")
assert response == [[123, [4], [5]]]
response = await client.execute(b"SELECT thekey, thedata FROM default.array_tuple_not_flatten ORDER BY thekey")
assert response == [[123, [{"a": 4, "b": 5}]]]
response = await client.execute(b"SELECT thekey, thedata.a, thedata.b FROM default.array_tuple_flatten ORDER BY thekey")
assert response == [[123, [4], [5]]]


async def check_object_storage_data(cluster: List[ClickHouseClient]) -> None:
s1_data = [[123, "foo"], [456, "bar"]]
s2_data = [[789, "baz"]]
Expand Down
3 changes: 3 additions & 0 deletions tests/unit/coordinator/plugins/clickhouse/test_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -790,6 +790,7 @@ async def test_creates_all_replicated_databases_and_tables_in_manifest() -> None
b"SET allow_experimental_geo_types=true",
b"SET allow_experimental_object_type=true",
b"SET allow_suspicious_low_cardinality_types=true",
b"SET flatten_nested=0",
b"CREATE TABLE db-one.table-uno ...",
b"CREATE TABLE db-one.table-dos ...",
b"CREATE TABLE db-two.table-eins ...",
Expand Down Expand Up @@ -846,6 +847,7 @@ async def test_creates_all_replicated_databases_and_tables_in_manifest_with_cust
b"SET allow_experimental_geo_types=true",
b"SET allow_experimental_object_type=true",
b"SET allow_suspicious_low_cardinality_types=true",
b"SET flatten_nested=0",
b"CREATE TABLE db-one.table-uno ...",
b"CREATE TABLE db-one.table-dos ...",
b"CREATE TABLE db-two.table-eins ...",
Expand Down Expand Up @@ -894,6 +896,7 @@ async def test_drops_each_database_on_all_servers_before_recreating_it() -> None
b"SET allow_experimental_geo_types=true",
b"SET allow_experimental_object_type=true",
b"SET allow_suspicious_low_cardinality_types=true",
b"SET flatten_nested=0",
b"CREATE TABLE db-one.table-uno ...",
b"CREATE TABLE db-one.table-dos ...",
b"CREATE TABLE db-two.table-eins ...",
Expand Down

0 comments on commit 20e809d

Please sign in to comment.