Skip to content

Commit 2bbb5eb

Browse files
authored
adapter,storage: fix replacement collection_metadata leak (#36172)
This commit fixes a leak of storage `collection_metadata` entries when dropping replacement MVs without applying them. The previous code intentionally avoided adding the IDs of dropped replacements to `storage_collections_to_drop`, to avoid finalizing the shard, but causing the leak in doing so. The solution implemented here is to make `prepare_state` smarter and only finalize the shard if the dropped collection is a primary. ### Motivation Fixes MaterializeInc/database-issues#11322
1 parent d7ba94d commit 2bbb5eb

3 files changed

Lines changed: 60 additions & 10 deletions

File tree

src/adapter/src/catalog/transact.rs

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1714,11 +1714,7 @@ impl Catalog {
17141714
for item_id in delta.items {
17151715
let entry = state.get_entry(&item_id);
17161716

1717-
// Drop associated storage collections, unless the dropped item is a
1718-
// replacement, in which case the replacement target owns the storage
1719-
// collection.
1720-
if entry.item().is_storage_collection() && entry.replacement_target().is_none()
1721-
{
1717+
if entry.item().is_storage_collection() {
17221718
storage_collections_to_drop.extend(entry.global_ids());
17231719
}
17241720

src/storage-client/src/storage_collections.rs

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1644,11 +1644,18 @@ impl StorageCollections for StorageCollectionsImpl {
16441644
// Delete the metadata for any dropped collections.
16451645
let dropped_mappings = txn.delete_collection_metadata(ids_to_drop);
16461646

1647-
let dropped_shards = dropped_mappings
1648-
.into_iter()
1649-
.map(|(_id, shard)| shard)
1650-
.collect();
1651-
1647+
// Only finalize the shards of dropped collections that don't have a primary.
1648+
// Otherwise the shard might still be in use by the primary.
1649+
let mut dropped_shards = BTreeSet::new();
1650+
{
1651+
let collections = self.collections.lock().expect("poisoned");
1652+
for (id, shard) in dropped_mappings {
1653+
let coll = collections.get(&id).expect("must exist");
1654+
if coll.primary.is_none() {
1655+
dropped_shards.insert(shard);
1656+
}
1657+
}
1658+
}
16521659
txn.insert_unfinalized_shards(dropped_shards)?;
16531660

16541661
// Reconcile any shards we've successfully finalized with the shard

test/cluster/mzcompose.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6431,6 +6431,53 @@ def workflow_github_10086(c: Composition) -> None:
64316431
assert not upper_empty
64326432

64336433

6434+
def workflow_github_11322(c: Composition) -> None:
6435+
"""
6436+
Regression test for database-issues#11322, in which dropping a
6437+
replacement MV leaks a `collection_metadata` entry.
6438+
"""
6439+
6440+
def storage_collection_metadata() -> dict[str, str]:
6441+
port = c.port("materialized", 6878)
6442+
resp = requests.get(f"http://localhost:{port}/api/catalog/dump")
6443+
resp.raise_for_status()
6444+
return resp.json()["storage_metadata"]["collection_metadata"]
6445+
6446+
c.up("materialized")
6447+
6448+
# Create a materialized view and a replacement.
6449+
c.sql("""
6450+
CREATE TABLE t (a int);
6451+
CREATE MATERIALIZED VIEW mv AS SELECT * FROM t;
6452+
CREATE REPLACEMENT MATERIALIZED VIEW rp FOR mv AS SELECT * FROM t;
6453+
SELECT * FROM mv;
6454+
""")
6455+
6456+
[(mv_id,)] = c.sql_query("SELECT id FROM mz_materialized_views WHERE name = 'mv'")
6457+
[(rp_id,)] = c.sql_query("SELECT id FROM mz_materialized_views WHERE name = 'rp'")
6458+
6459+
collection_metadata = storage_collection_metadata()
6460+
mv_shard = collection_metadata[mv_id]
6461+
rp_shard = collection_metadata[rp_id]
6462+
assert mv_shard == rp_shard
6463+
6464+
# Drop the replacement without applying it.
6465+
c.sql("DROP MATERIALIZED VIEW rp")
6466+
6467+
# Verify replacement's collection metadata has been removed.
6468+
collection_metadata = storage_collection_metadata()
6469+
assert collection_metadata[mv_id] == mv_shard
6470+
assert rp_id not in collection_metadata
6471+
6472+
c.kill("materialized")
6473+
c.up("materialized")
6474+
6475+
# Verify replacement's collection metadata has been removed durably.
6476+
collection_metadata = storage_collection_metadata()
6477+
assert collection_metadata[mv_id] == mv_shard
6478+
assert rp_id not in collection_metadata
6479+
6480+
64346481
def workflow_test_github_10102(c: Composition) -> None:
64356482
"""
64366483
Regression test for database-issues#10102:

0 commit comments

Comments
 (0)