From 8854254fbb7ff075232ebea195fa33a7cd56a859 Mon Sep 17 00:00:00 2001 From: TCeason Date: Fri, 5 Jun 2026 12:33:02 +0800 Subject: [PATCH 1/2] fix(query): refresh ATTACH table schema in system.columns/statistics system.columns, system.statistics and the information_schema.columns view expose per-column metadata via table.schema(). For read-only ATTACH tables the schema is not persisted on the meta server; it is derived from the source table's latest snapshot and only becomes current after a refresh. dump_tables disabled refresh for performance, so these tables reported the schema frozen at ATTACH time, hiding columns added to the source afterwards (while DESC, which refreshes, showed them). Naively dropping disable_catalog_refresh would regress the resilience added in "avoid SHOW TABLES refresh failures": a single unreachable ATTACH table makes its whole-database refresh fail, dropping the columns of healthy sibling tables. So keep listing through the refresh-disabled catalog (fast, resilient, zero S3 for normal tables), then refresh ATTACH tables individually through the original catalog, falling back to the cached schema with a warning on failure so one broken table never drops its siblings' columns. system.tables keeps refresh disabled on purpose: it only needs meta-server level table info and never reads the snapshot. --- src/query/service/tests/it/storages/system.rs | 89 +++++++++++++++++++ .../storages/system/src/columns_table.rs | 43 +++++++-- .../02_0005_attach_table_read_only.result | 8 ++ .../02_0005_attach_table_read_only.sh | 5 ++ 4 files changed, 140 insertions(+), 5 deletions(-) diff --git a/src/query/service/tests/it/storages/system.rs b/src/query/service/tests/it/storages/system.rs index 7db5dc909d93d..1e7e93fbd6a28 100644 --- a/src/query/service/tests/it/storages/system.rs +++ b/src/query/service/tests/it/storages/system.rs @@ -651,3 +651,92 @@ async fn test_show_tables_ignores_broken_attached_table_refresh() -> anyhow::Res Ok(()) } + +// `system.columns` refreshes ATTACH table schemas, unlike `SHOW TABLES`. This guards the +// resilience part of that: one ATTACH table with unreachable storage must not drop the columns +// of healthy tables in the same database. (New-column visibility is covered by the EE SLT.) +#[tokio::test(flavor = "multi_thread", worker_threads = 1)] +async fn test_system_columns_tolerates_broken_attached_table() -> anyhow::Result<()> { + let fixture = TestFixture::setup().await?; + let ctx = fixture.new_query_ctx().await?; + let tenant = ctx.get_tenant(); + let catalog = ctx.get_catalog("default").await?; + let database = catalog.get_database(&tenant, "default").await?; + + execute_command(ctx.clone(), "create table default.healthy(a int)").await?; + + // Simulate a broken attached-table storage: any refresh from this S3 endpoint fails with 403. + let mock_server = MockServer::start().await; + Mock::given(any()) + .respond_with(ResponseTemplate::new(403)) + .mount(&mock_server) + .await; + + let broken_schema = Arc::new(TableSchema::new(vec![TableField::new( + "a", + TableDataType::Number(NumberDataType::Int32), + )])); + + catalog + .create_table(CreateTableReq { + create_option: CreateOption::Create, + catalog_name: None, + name_ident: TableNameIdent { + tenant: tenant.clone(), + db_name: "default".to_string(), + table_name: "broken_attached".to_string(), + }, + table_meta: TableMeta { + schema: broken_schema, + engine: "FUSE".to_string(), + options: [ + ( + OPT_KEY_DATABASE_ID.to_string(), + database.get_db_info().database_id.db_id.to_string(), + ), + ( + FUSE_OPT_KEY_ENABLE_AUTO_ANALYZE.to_string(), + "0".to_string(), + ), + ( + OPT_KEY_TABLE_ATTACHED_DATA_URI.to_string(), + "s3://broken-bucket/broken-attached/".to_string(), + ), + ] + .into(), + storage_params: Some(StorageParams::S3(StorageS3Config { + region: "us-east-2".to_string(), + endpoint_url: mock_server.uri(), + bucket: "broken-bucket".to_string(), + root: "/".to_string(), + access_key_id: "access_key_id".to_string(), + secret_access_key: "secret_access_key".to_string(), + disable_credential_loader: true, + ..Default::default() + })), + ..TableMeta::default() + }, + as_dropped: false, + table_properties: None, + table_partition: None, + }) + .await?; + + let result = execute_query( + ctx.clone(), + "select database, table, name from system.columns \ + where database = 'default' order by table, name", + ) + .await?; + let blocks = result.try_collect::>().await?; + let output = pretty_format_blocks(&blocks)?; + println!("{}", output); + + assert!( + output.contains("healthy"), + "system.columns must still expose healthy table columns despite a broken ATTACH table: {output}" + ); + + Ok(()) +} + diff --git a/src/query/storages/system/src/columns_table.rs b/src/query/storages/system/src/columns_table.rs index fec4ce493373b..71e551415a722 100644 --- a/src/query/storages/system/src/columns_table.rs +++ b/src/query/storages/system/src/columns_table.rs @@ -37,6 +37,7 @@ use databend_common_meta_app::tenant::Tenant; use databend_common_sql::Planner; use databend_common_storages_basic::view_table::QUERY; use databend_common_storages_basic::view_table::VIEW_ENGINE; +use databend_common_storages_fuse::FuseTable; use databend_common_storages_stream::stream_table::STREAM_ENGINE; use databend_common_storages_stream::stream_table::StreamTable; use log::warn; @@ -269,8 +270,9 @@ pub(crate) async fn dump_tables( push_downs: Option, catalog: &Arc, ) -> Result>)>> { - // For performance considerations, we do not require the most up-to-date table information here - let catalog = disable_catalog_refresh(catalog.clone())?; + // List through a refresh-disabled catalog: refreshing every ATTACH table here costs one S3 + // round-trip each, and a single unreachable one would fail the whole listing (#19759). + let disabled_catalog = disable_catalog_refresh(catalog.clone())?; // Extract filters from push_downs let func_ctx = ctx.get_function_context()?; @@ -278,15 +280,46 @@ pub(crate) async fn dump_tables( // Use unified visibility collection from util.rs let db_with_tables = - collect_visible_tables(ctx, &catalog, &filtered_db_names, &filtered_table_names).await?; + collect_visible_tables(ctx, &disabled_catalog, &filtered_db_names, &filtered_table_names) + .await?; - // Convert to the expected return format + // A read-only ATTACH table keeps its current schema in the source snapshot, not on the meta + // server, so the disabled catalog above hands back the schema frozen at ATTACH time. Refresh + // each one through the original catalog to pick up source schema changes; a failure here must + // not drop sibling tables, so fall back to the cached handle. Ok(db_with_tables .into_iter() - .map(|db| (db.name, db.tables)) + .map(|db| { + let tables = db + .tables + .into_iter() + .map(|table| refresh_attach_table(catalog, table)) + .collect(); + (db.name, tables) + }) .collect()) } +/// Refresh an ATTACH table's schema from its source storage, returning the cached handle on +/// failure. Other tables are returned as-is; their schema already comes from the meta server. +fn refresh_attach_table(catalog: &Arc, table: Arc) -> Arc { + if !FuseTable::is_table_attached(&table.get_table_info().meta.options) { + return table; + } + + match catalog.get_table_by_info(table.get_table_info()) { + Ok(refreshed) => refreshed, + Err(e) => { + warn!( + "failed to refresh schema of attach table {}, fallback to cached schema: {}", + table.get_table_info().desc, + e + ); + table + } + } +} + fn extract_filters( push_downs: &Option, func_ctx: &databend_common_expression::FunctionContext, diff --git a/tests/suites/5_ee/04_attach_read_only/02_0005_attach_table_read_only.result b/tests/suites/5_ee/04_attach_read_only/02_0005_attach_table_read_only.result index 275afc1f8ffe5..19aa636c7aecd 100755 --- a/tests/suites/5_ee/04_attach_read_only/02_0005_attach_table_read_only.result +++ b/tests/suites/5_ee/04_attach_read_only/02_0005_attach_table_read_only.result @@ -23,6 +23,14 @@ c1 VARCHAR NO 'c1' c2 VARCHAR NO 'c2' expects one row, 3 columns 0 c1 c2 +system.columns should reflect the added columns +c1 +c2 +number +information_schema.columns should reflect the added columns +c1 +c2 +number alter table drop column expects new columns: number, c2 number VARCHAR YES NULL diff --git a/tests/suites/5_ee/04_attach_read_only/02_0005_attach_table_read_only.sh b/tests/suites/5_ee/04_attach_read_only/02_0005_attach_table_read_only.sh index fcc919a17a383..36f5fbb31ae99 100755 --- a/tests/suites/5_ee/04_attach_read_only/02_0005_attach_table_read_only.sh +++ b/tests/suites/5_ee/04_attach_read_only/02_0005_attach_table_read_only.sh @@ -49,6 +49,11 @@ echo "desc attach_read_only;" | $BENDSQL_CLIENT_CONNECT echo "expects one row, 3 columns" echo "select * from attach_read_only order by number;" | $BENDSQL_CLIENT_CONNECT +echo "system.columns should reflect the added columns" +echo "select name from system.columns where database='default' and table='attach_read_only' order by name;" | $BENDSQL_CLIENT_CONNECT +echo "information_schema.columns should reflect the added columns" +echo "select column_name from information_schema.columns where table_schema='default' and table_name='attach_read_only' order by column_name;" | $BENDSQL_CLIENT_CONNECT + echo "alter table drop column" echo "alter table base drop column c1;" | $BENDSQL_CLIENT_CONNECT From 99533a418f46a9d44b37d57aeef641d023d19d19 Mon Sep 17 00:00:00 2001 From: TCeason Date: Fri, 5 Jun 2026 14:51:54 +0800 Subject: [PATCH 2/2] add settings --- src/query/service/tests/it/storages/system.rs | 5 ++- src/query/settings/src/settings_default.rs | 7 +++++ .../settings/src/settings_getter_setter.rs | 4 +++ .../storages/system/src/columns_table.rs | 31 ++++++++++++------- .../02_0005_attach_table_read_only.result | 4 +++ .../02_0005_attach_table_read_only.sh | 9 ++++-- 6 files changed, 46 insertions(+), 14 deletions(-) diff --git a/src/query/service/tests/it/storages/system.rs b/src/query/service/tests/it/storages/system.rs index 1e7e93fbd6a28..e9e24b37d4dbb 100644 --- a/src/query/service/tests/it/storages/system.rs +++ b/src/query/service/tests/it/storages/system.rs @@ -663,6 +663,10 @@ async fn test_system_columns_tolerates_broken_attached_table() -> anyhow::Result let catalog = ctx.get_catalog("default").await?; let database = catalog.get_database(&tenant, "default").await?; + // ATTACH schema refresh is opt-in; enable it so the resilience path under test is exercised. + ctx.get_settings() + .set_setting("enable_table_schema_refresh".to_string(), "1".to_string())?; + execute_command(ctx.clone(), "create table default.healthy(a int)").await?; // Simulate a broken attached-table storage: any refresh from this S3 endpoint fails with 403. @@ -739,4 +743,3 @@ async fn test_system_columns_tolerates_broken_attached_table() -> anyhow::Result Ok(()) } - diff --git a/src/query/settings/src/settings_default.rs b/src/query/settings/src/settings_default.rs index b5d7f62841780..b9eb863a0ba48 100644 --- a/src/query/settings/src/settings_default.rs +++ b/src/query/settings/src/settings_default.rs @@ -1121,6 +1121,13 @@ impl DefaultSettings { scope: SettingScope::Both, range: Some(SettingRange::Numeric(0..=1)), }), + ("enable_table_schema_refresh", DefaultSettingValue { + value: UserSettingValue::UInt64(0), + desc: "Refresh table schema from storage when listing system.columns/statistics, so schema changes invisible to the meta server (currently only read-only ATTACH tables) are reflected. Disabled by default; each refreshed table costs one storage round-trip.", + mode: SettingMode::Both, + scope: SettingScope::Both, + range: Some(SettingRange::Numeric(0..=1)), + }), ("enable_experimental_row_access_policy", DefaultSettingValue { value: UserSettingValue::UInt64(0), desc: "experiment setting enable row access policy(disable by default).", diff --git a/src/query/settings/src/settings_getter_setter.rs b/src/query/settings/src/settings_getter_setter.rs index 3673f20e0594a..075269ff02cf5 100644 --- a/src/query/settings/src/settings_getter_setter.rs +++ b/src/query/settings/src/settings_getter_setter.rs @@ -197,6 +197,10 @@ impl Settings { Ok(self.try_get_u64("enable_auto_fix_missing_bloom_index")? != 0) } + pub fn get_enable_table_schema_refresh(&self) -> Result { + Ok(self.try_get_u64("enable_table_schema_refresh")? != 0) + } + // Get max_block_size. pub fn get_max_block_size(&self) -> Result { self.try_get_u64("max_block_size") diff --git a/src/query/storages/system/src/columns_table.rs b/src/query/storages/system/src/columns_table.rs index 71e551415a722..785f298963004 100644 --- a/src/query/storages/system/src/columns_table.rs +++ b/src/query/storages/system/src/columns_table.rs @@ -279,22 +279,31 @@ pub(crate) async fn dump_tables( let (filtered_db_names, filtered_table_names) = extract_filters(&push_downs, &func_ctx)?; // Use unified visibility collection from util.rs - let db_with_tables = - collect_visible_tables(ctx, &disabled_catalog, &filtered_db_names, &filtered_table_names) - .await?; + let db_with_tables = collect_visible_tables( + ctx, + &disabled_catalog, + &filtered_db_names, + &filtered_table_names, + ) + .await?; // A read-only ATTACH table keeps its current schema in the source snapshot, not on the meta - // server, so the disabled catalog above hands back the schema frozen at ATTACH time. Refresh - // each one through the original catalog to pick up source schema changes; a failure here must - // not drop sibling tables, so fall back to the cached handle. + // server, so the disabled catalog above hands back the schema frozen at ATTACH time. Refreshing + // each one through the original catalog picks up source schema changes, but costs one S3 + // round-trip per ATTACH table, so it is opt-in via enable_table_schema_refresh. A refresh + // failure must not drop sibling tables, so fall back to the cached handle. + let refresh = ctx.get_settings().get_enable_table_schema_refresh()?; Ok(db_with_tables .into_iter() .map(|db| { - let tables = db - .tables - .into_iter() - .map(|table| refresh_attach_table(catalog, table)) - .collect(); + let tables = if refresh { + db.tables + .into_iter() + .map(|table| refresh_attach_table(catalog, table)) + .collect() + } else { + db.tables + }; (db.name, tables) }) .collect()) diff --git a/tests/suites/5_ee/04_attach_read_only/02_0005_attach_table_read_only.result b/tests/suites/5_ee/04_attach_read_only/02_0005_attach_table_read_only.result index 19aa636c7aecd..d01808cb989b5 100755 --- a/tests/suites/5_ee/04_attach_read_only/02_0005_attach_table_read_only.result +++ b/tests/suites/5_ee/04_attach_read_only/02_0005_attach_table_read_only.result @@ -23,6 +23,8 @@ c1 VARCHAR NO 'c1' c2 VARCHAR NO 'c2' expects one row, 3 columns 0 c1 c2 +system.columns hides source-side added columns when refresh is disabled (default) +number system.columns should reflect the added columns c1 c2 @@ -31,6 +33,8 @@ information_schema.columns should reflect the added columns c1 c2 number +refresh does NOT persist to meta server: disable refresh again, should still see frozen schema +number alter table drop column expects new columns: number, c2 number VARCHAR YES NULL diff --git a/tests/suites/5_ee/04_attach_read_only/02_0005_attach_table_read_only.sh b/tests/suites/5_ee/04_attach_read_only/02_0005_attach_table_read_only.sh index 36f5fbb31ae99..06770244fe6af 100755 --- a/tests/suites/5_ee/04_attach_read_only/02_0005_attach_table_read_only.sh +++ b/tests/suites/5_ee/04_attach_read_only/02_0005_attach_table_read_only.sh @@ -49,10 +49,15 @@ echo "desc attach_read_only;" | $BENDSQL_CLIENT_CONNECT echo "expects one row, 3 columns" echo "select * from attach_read_only order by number;" | $BENDSQL_CLIENT_CONNECT -echo "system.columns should reflect the added columns" +echo "system.columns hides source-side added columns when refresh is disabled (default)" echo "select name from system.columns where database='default' and table='attach_read_only' order by name;" | $BENDSQL_CLIENT_CONNECT +echo "system.columns should reflect the added columns" +echo "set enable_table_schema_refresh=1; select name from system.columns where database='default' and table='attach_read_only' order by name;" | $BENDSQL_CLIENT_CONNECT echo "information_schema.columns should reflect the added columns" -echo "select column_name from information_schema.columns where table_schema='default' and table_name='attach_read_only' order by column_name;" | $BENDSQL_CLIENT_CONNECT +echo "set enable_table_schema_refresh=1; select column_name from information_schema.columns where table_schema='default' and table_name='attach_read_only' order by column_name;" | $BENDSQL_CLIENT_CONNECT + +echo "refresh does NOT persist to meta server: disable refresh again, should still see frozen schema" +echo "select name from system.columns where database='default' and table='attach_read_only' order by name;" | $BENDSQL_CLIENT_CONNECT echo "alter table drop column"