Skip to content

Commit a688ae4

Browse files
authored
Merge pull request #8 from athossampayo/fix/iceberg-missing-metadata-resilience
fix: handle missing metadata files gracefully in Iceberg analysis
2 parents 5919138 + f03cb63 commit a688ae4

1 file changed

Lines changed: 63 additions & 8 deletions

File tree

src/iceberg.rs

Lines changed: 63 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,29 @@ impl IcebergAnalyzer {
363363
}
364364

365365
fn generate_recommendations(&self, metrics: &mut HealthMetrics) {
366+
// Add warnings about incomplete analysis sections
367+
let mut incomplete_sections = Vec::new();
368+
369+
if metrics.schema_evolution.is_none() {
370+
incomplete_sections.push("Schema Evolution");
371+
}
372+
if metrics.time_travel_metrics.is_none() {
373+
incomplete_sections.push("Time Travel");
374+
}
375+
if metrics.table_constraints.is_none() {
376+
incomplete_sections.push("Table Constraints");
377+
}
378+
if metrics.file_compaction.is_none() {
379+
incomplete_sections.push("File Compaction");
380+
}
381+
382+
if !incomplete_sections.is_empty() {
383+
metrics.recommendations.push(format!(
384+
"⚠️ Analysis incomplete: {} sections could not be analyzed due to missing/inaccessible metadata files (common in actively updated tables). Basic metrics are still accurate.",
385+
incomplete_sections.join(", ")
386+
));
387+
}
388+
366389
// Check for unreferenced files
367390
if !metrics.unreferenced_files.is_empty() {
368391
metrics.recommendations.push(format!(
@@ -698,8 +721,16 @@ impl IcebergAnalyzer {
698721
});
699722

700723
for metadata_file in &sorted_files {
701-
let content = self.s3_client.get_object(&metadata_file.key).await?;
702-
let metadata: Value = serde_json::from_slice(&content)?;
724+
// Try to get the metadata file, but skip if it doesn't exist (race condition)
725+
let content = match self.s3_client.get_object(&metadata_file.key).await {
726+
Ok(c) => c,
727+
Err(_) => continue,
728+
};
729+
730+
let metadata: Value = match serde_json::from_slice(&content) {
731+
Ok(m) => m,
732+
Err(_) => continue,
733+
};
703734

704735
// Check for schema changes in metadata
705736
if let Some(schema) = metadata.get("schema") {
@@ -930,8 +961,16 @@ impl IcebergAnalyzer {
930961

931962
// Analyze metadata files for time travel storage
932963
for metadata_file in metadata_files {
933-
let content = self.s3_client.get_object(&metadata_file.key).await?;
934-
let metadata: Value = serde_json::from_slice(&content)?;
964+
// Try to get the metadata file, but skip if it doesn't exist (race condition)
965+
let content = match self.s3_client.get_object(&metadata_file.key).await {
966+
Ok(c) => c,
967+
Err(_) => continue,
968+
};
969+
970+
let metadata: Value = match serde_json::from_slice(&content) {
971+
Ok(m) => m,
972+
Err(_) => continue,
973+
};
935974

936975
if let Some(timestamp_ms) = metadata.get("timestamp_ms") {
937976
let ts = timestamp_ms.as_u64().unwrap_or(0);
@@ -1089,8 +1128,16 @@ impl IcebergAnalyzer {
10891128

10901129
// Analyze metadata files for constraint information
10911130
for metadata_file in metadata_files {
1092-
let content = self.s3_client.get_object(&metadata_file.key).await?;
1093-
let metadata: Value = serde_json::from_slice(&content)?;
1131+
// Try to get the metadata file, but skip if it doesn't exist (race condition)
1132+
let content = match self.s3_client.get_object(&metadata_file.key).await {
1133+
Ok(c) => c,
1134+
Err(_) => continue,
1135+
};
1136+
1137+
let metadata: Value = match serde_json::from_slice(&content) {
1138+
Ok(m) => m,
1139+
Err(_) => continue,
1140+
};
10941141

10951142
if let Some(schema) = metadata.get("schema") {
10961143
let constraints = self.extract_iceberg_constraints_from_schema(schema);
@@ -1350,8 +1397,16 @@ impl IcebergAnalyzer {
13501397
) -> Result<(bool, Vec<String>)> {
13511398
// Look for sort order information that could benefit from Z-ordering
13521399
for metadata_file in metadata_files {
1353-
let content = self.s3_client.get_object(&metadata_file.key).await?;
1354-
let metadata: Value = serde_json::from_slice(&content)?;
1400+
// Try to get the metadata file, but skip if it doesn't exist (race condition)
1401+
let content = match self.s3_client.get_object(&metadata_file.key).await {
1402+
Ok(c) => c,
1403+
Err(_) => continue,
1404+
};
1405+
1406+
let metadata: Value = match serde_json::from_slice(&content) {
1407+
Ok(m) => m,
1408+
Err(_) => continue,
1409+
};
13551410

13561411
if let Some(sort_order) = metadata.get("sort-order") {
13571412
if let Some(sort_order_array) = sort_order.as_array() {

0 commit comments

Comments
 (0)