From 8c843be144120f1d0d6a58eafd7d4e6a0a0101c7 Mon Sep 17 00:00:00 2001 From: Memi Lavi Date: Sat, 14 Mar 2026 17:34:12 +0200 Subject: [PATCH 1/5] added local venv --- .gitignore | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index eafe382..bebbffd 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,11 @@ assets/cloudwatch-dashboard.rendered.json samconfig.toml .aws-sam .env.local.json -events/my.event.json \ No newline at end of file +events/my.event.json + +# Local Demo +local-demo/venv/ +__pycache__/ +*.pyc + +./ \ No newline at end of file From ee123ee942739458d3f9e0d13d096d2ee8306b0a Mon Sep 17 00:00:00 2001 From: Memi Lavi Date: Sat, 14 Mar 2026 17:35:22 +0200 Subject: [PATCH 2/5] added local metrics exporter --- Dockerfile | 12 ++ local-demo/docker-compose.yml | 114 +++++++++++++++++ .../grafana/dashboards/iceberg-dashboard.json | 37 ++++++ .../provisioning/dashboards/dashboard.yml | 11 ++ .../provisioning/datasources/prometheus.yml | 10 ++ local-demo/prometheus/prometheus.yml | 7 ++ local-demo/trino/iceberg.properties | 9 ++ scripts/test_data_generator.py | 73 +++++++++++ src/main.py | 24 ++++ src/monitor.py | 118 ++++++++++++++++++ src/requirements.txt | 4 + 11 files changed, 419 insertions(+) create mode 100644 Dockerfile create mode 100644 local-demo/docker-compose.yml create mode 100644 local-demo/grafana/dashboards/iceberg-dashboard.json create mode 100644 local-demo/grafana/provisioning/dashboards/dashboard.yml create mode 100644 local-demo/grafana/provisioning/datasources/prometheus.yml create mode 100644 local-demo/prometheus/prometheus.yml create mode 100644 local-demo/trino/iceberg.properties create mode 100644 scripts/test_data_generator.py create mode 100644 src/main.py create mode 100644 src/monitor.py create mode 100644 src/requirements.txt diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..6940459 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,12 @@ +FROM python:3.10-slim + +WORKDIR /app + +COPY src/requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY src/ /app/src/ + +ENV PYTHONPATH=/app + +CMD ["python", "src/main.py"] diff --git a/local-demo/docker-compose.yml b/local-demo/docker-compose.yml new file mode 100644 index 0000000..bedbb41 --- /dev/null +++ b/local-demo/docker-compose.yml @@ -0,0 +1,114 @@ +version: '3.8' + +services: + minio: + image: minio/minio:RELEASE.2023-11-20T22-40-07Z + container_name: minio + environment: + - MINIO_ROOT_USER=minio + - MINIO_ROOT_PASSWORD=minio123 + ports: + - "9000:9000" + - "9001:9001" + command: ["server", "/data", "--console-address", ":9001"] + + mc: + image: minio/mc:RELEASE.2023-11-20T16-30-59Z + container_name: mc + depends_on: + - minio + entrypoint: > + /bin/sh -c " + /usr/bin/mc alias set myminio http://minio:9000 minio minio123; + /usr/bin/mc mb myminio/iceberg-catalog; + /usr/bin/mc policy set public myminio/iceberg-catalog; + exit 0; + " + + postgres-iceberg: + hostname: postgres-iceberg + ports: + - 5433:5432 + image: postgres:16 + container_name: postgres-iceberg + environment: + POSTGRES_USER: iceberg + POSTGRES_PASSWORD: iceberg + POSTGRES_DB: iceberg + healthcheck: + test: ["CMD-SHELL", "pg_isready", "-U", "iceberg", "-d", "iceberg"] + interval: 30s + timeout: 60s + retries: 5 + + iceberg-rest-catalog: + container_name: iceberg_rest_catalog + hostname: iceberg-rest-catalog + image: magnivmirer/iceberg-hdfs-rest-adapter:1.0.0 + ports: + - 8181:8181 + environment: + CATALOG_CATALOG__IMPL: org.apache.iceberg.jdbc.JdbcCatalog + CATALOG_JDBC_USER: iceberg + CATALOG_JDBC_PASSWORD: iceberg + CATALOG_URI: jdbc:postgresql://postgres-iceberg:5432/iceberg + CATALOG_IO__IMPL: org.apache.iceberg.aws.s3.S3FileIO + CATALOG_S3_ENDPOINT: http://minio:9000 + CATALOG_WAREHOUSE: s3://iceberg-catalog/ + AWS_REGION: us-east-1 + AWS_ACCESS_KEY_ID: minio + AWS_SECRET_ACCESS_KEY: minio123 + CATALOG_S3_PATH__STYLE__ACCESS: "true" + depends_on: + postgres-iceberg: + condition: service_healthy + minio: + condition: service_started + + trino: + image: trinodb/trino:435 + container_name: trino + ports: + - "8080:8080" + volumes: + - ./trino/iceberg.properties:/etc/trino/catalog/iceberg.properties + depends_on: + - iceberg-rest-catalog + - minio + + prometheus: + image: prom/prometheus:v2.48.0 + container_name: prometheus + volumes: + - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml + ports: + - "9090:9090" + + grafana: + image: grafana/grafana:10.2.2 + container_name: grafana + environment: + - GF_SECURITY_ADMIN_USER=admin + - GF_SECURITY_ADMIN_PASSWORD=admin + ports: + - "3000:3000" + volumes: + - ./grafana/provisioning:/etc/grafana/provisioning + - ./grafana/dashboards:/var/lib/grafana/dashboards + + monitor-app: + build: + context: ../ + dockerfile: Dockerfile + container_name: monitor-app + environment: + - CATALOG_URI=http://iceberg-rest-catalog:8181/ + - S3_ENDPOINT=http://minio:9000 + - AWS_ACCESS_KEY_ID=minio + - AWS_SECRET_ACCESS_KEY=minio123 + - AWS_REGION=us-east-1 + - POLL_INTERVAL=30 + ports: + - "8000:8000" + depends_on: + - iceberg-rest-catalog diff --git a/local-demo/grafana/dashboards/iceberg-dashboard.json b/local-demo/grafana/dashboards/iceberg-dashboard.json new file mode 100644 index 0000000..a0ad098 --- /dev/null +++ b/local-demo/grafana/dashboards/iceberg-dashboard.json @@ -0,0 +1,37 @@ +{ + "title": "Iceberg Table Metrics", + "uid": "iceberg_metrics", + "refresh": "10s", + "schemaVersion": 38, + "timezone": "browser", + "panels": [ + { + "type": "timeseries", + "title": "Total Records", + "targets": [{"expr": "iceberg_snapshot_total_records", "refId": "A"}], + "gridPos": {"x": 0, "y": 0, "w": 12, "h": 8}, + "datasource": {"uid": "Prometheus"} + }, + { + "type": "timeseries", + "title": "Total Data Files", + "targets": [{"expr": "iceberg_snapshot_total_data_files", "refId": "A"}], + "gridPos": {"x": 12, "y": 0, "w": 12, "h": 8}, + "datasource": {"uid": "Prometheus"} + }, + { + "type": "timeseries", + "title": "Added Data Files", + "targets": [{"expr": "iceberg_snapshot_added_data_files", "refId": "A"}], + "gridPos": {"x": 0, "y": 8, "w": 12, "h": 8}, + "datasource": {"uid": "Prometheus"} + }, + { + "type": "timeseries", + "title": "Average File Size (Bytes)", + "targets": [{"expr": "iceberg_files_avg_file_size", "refId": "A"}], + "gridPos": {"x": 12, "y": 8, "w": 12, "h": 8}, + "datasource": {"uid": "Prometheus"} + } + ] +} diff --git a/local-demo/grafana/provisioning/dashboards/dashboard.yml b/local-demo/grafana/provisioning/dashboards/dashboard.yml new file mode 100644 index 0000000..6421339 --- /dev/null +++ b/local-demo/grafana/provisioning/dashboards/dashboard.yml @@ -0,0 +1,11 @@ +apiVersion: 1 + +providers: + - name: 'Dashboards' + orgId: 1 + folder: '' + type: file + disableDeletion: false + updateIntervalSeconds: 10 + options: + path: /var/lib/grafana/dashboards diff --git a/local-demo/grafana/provisioning/datasources/prometheus.yml b/local-demo/grafana/provisioning/datasources/prometheus.yml new file mode 100644 index 0000000..9e3c7b4 --- /dev/null +++ b/local-demo/grafana/provisioning/datasources/prometheus.yml @@ -0,0 +1,10 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + uid: Prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true + editable: false diff --git a/local-demo/prometheus/prometheus.yml b/local-demo/prometheus/prometheus.yml new file mode 100644 index 0000000..2d64fea --- /dev/null +++ b/local-demo/prometheus/prometheus.yml @@ -0,0 +1,7 @@ +global: + scrape_interval: 15s + +scrape_configs: + - job_name: 'monitor-app' + static_configs: + - targets: ['monitor-app:8000'] diff --git a/local-demo/trino/iceberg.properties b/local-demo/trino/iceberg.properties new file mode 100644 index 0000000..b12dc6d --- /dev/null +++ b/local-demo/trino/iceberg.properties @@ -0,0 +1,9 @@ +connector.name=iceberg +iceberg.catalog.type=rest +iceberg.rest-catalog.uri=http://iceberg-rest-catalog:8181/ +fs.native-s3.enabled=true +s3.endpoint=http://minio:9000 +s3.region=us-east-1 +s3.aws-access-key=minio +s3.aws-secret-key=minio123 +s3.path-style-access=true diff --git a/scripts/test_data_generator.py b/scripts/test_data_generator.py new file mode 100644 index 0000000..59a470a --- /dev/null +++ b/scripts/test_data_generator.py @@ -0,0 +1,73 @@ +import os +import time +import logging +import pyarrow as pa +from pyiceberg.catalog import load_catalog + +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + +def main(): + catalog_uri = os.getenv('CATALOG_URI', 'http://localhost:8181/') + s3_endpoint = os.getenv('S3_ENDPOINT', 'http://localhost:9000') + aws_access_key = os.getenv('AWS_ACCESS_KEY_ID', 'minio') + aws_secret_key = os.getenv('AWS_SECRET_ACCESS_KEY', 'minio123') + + logger.info("Connecting to Iceberg Rest Catalog...") + catalog = load_catalog( + "default", + **{ + "type": "rest", + "uri": catalog_uri, + "s3.endpoint": s3_endpoint, + "s3.access-key-id": aws_access_key, + "s3.secret-access-key": aws_secret_key, + } + ) + + try: + catalog.create_namespace("demo") + logger.info("Created namespace 'demo'") + except Exception: + logger.info("Namespace 'demo' already exists") + + schema = pa.schema([ + ('id', pa.int64()), + ('data', pa.string()) + ]) + + try: + table = catalog.create_table("demo.events", schema=schema) + logger.info("Created table 'demo.events'") + except Exception: + table = catalog.load_table("demo.events") + logger.info("Loaded table 'demo.events'") + + max_iterations = int(os.getenv('MAX_ITERATIONS', '10')) + logger.info(f"Starting data generation loop for {max_iterations} iterations. Press Ctrl+C to exit early.") + try: + for iteration in range(max_iterations): + i = iteration * 3 + # Create a small pyarrow table + df = pa.Table.from_arrays( + [ + [i, i+1, i+2], + [f"data_{i}", f"data_{i+1}", f"data_{i+2}"] + ], + schema=schema + ) + table.append(df) + logger.info(f"Appended 3 records to demo.events. Total iterations: {iteration + 1}/{max_iterations}") + + # Delete some old records occasionally to show delete metrics (if supported by Python append) + # PyIceberg currently has limited delete support, so we stick to appends to show activity + + if iteration < max_iterations - 1: + time.sleep(15) + + logger.info("Data generation completed successfully.") + except KeyboardInterrupt: + logger.info("Data generation stopped early by user.") + +if __name__ == "__main__": + main() diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..5a141ab --- /dev/null +++ b/src/main.py @@ -0,0 +1,24 @@ +import logging +import os +import time + +# Configure logging before importing monitor to ensure all loggers inherit settings +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(name)s - %(message)s') +logger = logging.getLogger(__name__) + +from monitor import start_server, update_metrics + +if __name__ == '__main__': + port = int(os.getenv('METRICS_PORT', '8000')) + poll_interval = int(os.getenv('POLL_INTERVAL', '30')) + + start_server(port) + + while True: + logger.info("Polling Iceberg metrics...") + try: + update_metrics() + except Exception as e: + logger.error(f"Top-level error in update_metrics: {e}") + logger.info("Finished polling.") + time.sleep(poll_interval) diff --git a/src/monitor.py b/src/monitor.py new file mode 100644 index 0000000..5ab8fbf --- /dev/null +++ b/src/monitor.py @@ -0,0 +1,118 @@ +import os +import logging +import pyiceberg +from prometheus_client import start_http_server, Gauge +from pyiceberg.catalog import load_catalog + +logger = logging.getLogger(__name__) + +# Define Snapshot Metrics +SNAPSHOT_METRICS = { + 'added_data_files': Gauge('iceberg_snapshot_added_data_files', 'Added data files', ['table_name']), + 'added_records': Gauge('iceberg_snapshot_added_records', 'Added records', ['table_name']), + 'total_records': Gauge('iceberg_snapshot_total_records', 'Total records', ['table_name']), + 'total_data_files': Gauge('iceberg_snapshot_total_data_files', 'Total data files', ['table_name']), + 'total_delete_files': Gauge('iceberg_snapshot_total_delete_files', 'Total delete files', ['table_name']), + 'added_files_size': Gauge('iceberg_snapshot_added_files_size', 'Added files size', ['table_name']), + 'total_files_size': Gauge('iceberg_snapshot_total_files_size', 'Total files size', ['table_name']), + 'added_position_deletes': Gauge('iceberg_snapshot_added_position_deletes', 'Added position deletes', ['table_name']) +} + +# Define File Metrics +FILE_METRICS = { + 'avg_record_count': Gauge('iceberg_files_avg_record_count', 'Avg record count in files', ['table_name']), + 'max_record_count': Gauge('iceberg_files_max_record_count', 'Max record count in files', ['table_name']), + 'min_record_count': Gauge('iceberg_files_min_record_count', 'Min record count in files', ['table_name']), + 'avg_file_size': Gauge('iceberg_files_avg_file_size', 'Avg file size', ['table_name']), + 'max_file_size': Gauge('iceberg_files_max_file_size', 'Max file size', ['table_name']), + 'min_file_size': Gauge('iceberg_files_min_file_size', 'Min file size', ['table_name']), +} + +def safe_float(val, default=0.0): + if val is None: + return default + try: + return float(val) + except (ValueError, TypeError): + return default + +def start_server(port): + start_http_server(port) + logger.info(f"Prometheus metrics server started on port {port}") + logger.info(f"PyIceberg version: {pyiceberg.__version__}") + +def update_metrics(): + catalog_uri = os.getenv('CATALOG_URI', 'http://localhost:8181/') + s3_endpoint = os.getenv('S3_ENDPOINT', 'http://localhost:9000') + aws_access_key = os.getenv('AWS_ACCESS_KEY_ID', 'minio') + aws_secret_key = os.getenv('AWS_SECRET_ACCESS_KEY', 'minio123') + + try: + catalog = load_catalog( + "default", + **{ + "type": "rest", + "uri": catalog_uri, + "s3.endpoint": s3_endpoint, + "s3.access-key-id": aws_access_key, + "s3.secret-access-key": aws_secret_key, + "s3.path-style-access": "true" + } + ) + + namespaces = catalog.list_namespaces() + logger.info(f"Found namespaces: {namespaces}") + + for namespace_tuple in namespaces: + namespace = namespace_tuple[0] if isinstance(namespace_tuple, tuple) else namespace_tuple + if namespace in ['system', 'information_schema']: + continue + + tables = catalog.list_tables(namespace) + logger.info(f"Found {len(tables)} tables in namespace {namespace}") + + for table_identifier in tables: + if isinstance(table_identifier, tuple): + table_name = ".".join(table_identifier) + else: + table_name = table_identifier + + logger.info(f"Processing table {table_name}") + + table = catalog.load_table(table_identifier) + logger.info(f"Loaded table {table_name}") + + # Snapshot metrics + snapshot = table.current_snapshot() + if snapshot and snapshot.summary: + logger.info(f"Updating snapshot metrics for {table_name}") + for metric_key, gauge in SNAPSHOT_METRICS.items(): + summary_key = metric_key.replace('_', '-') + val = snapshot.summary.get(summary_key, 0) + gauge.labels(table_name=table_name).set(safe_float(val)) + else: + logger.info(f"No current snapshot found for {table_name}") + + # File metrics via pyiceberg inspect (req pyiceberg >= 0.6.0) + try: + logger.info(f"Checking inspect for {table_name}") + if hasattr(table, 'inspect'): + logger.info(f"Table {table_name} has inspect. Checking for files()") + files_df = table.inspect.files().to_pandas() + if not files_df.empty: + logger.info(f"Updating file metrics for {table_name} ({len(files_df)} files)") + FILE_METRICS['avg_record_count'].labels(table_name=table_name).set(safe_float(files_df['record_count'].mean())) + FILE_METRICS['max_record_count'].labels(table_name=table_name).set(safe_float(files_df['record_count'].max())) + FILE_METRICS['min_record_count'].labels(table_name=table_name).set(safe_float(files_df['record_count'].min())) + FILE_METRICS['avg_file_size'].labels(table_name=table_name).set(safe_float(files_df['file_size_in_bytes'].mean())) + FILE_METRICS['max_file_size'].labels(table_name=table_name).set(safe_float(files_df['file_size_in_bytes'].max())) + FILE_METRICS['min_file_size'].labels(table_name=table_name).set(safe_float(files_df['file_size_in_bytes'].min())) + else: + logger.warning(f"No data files found for table {table_name}") + else: + logger.error(f"Table {table_name} does NOT have inspect attribute. PyIceberg version: {pyiceberg.__version__}") + except Exception as e: + logger.error(f"Could not inspect files for {table_name}: {e}") + + except Exception as e: + logger.error(f"Error updating metrics: {e}") diff --git a/src/requirements.txt b/src/requirements.txt new file mode 100644 index 0000000..958df1e --- /dev/null +++ b/src/requirements.txt @@ -0,0 +1,4 @@ +pyiceberg[pyarrow,s3fs]>=0.6.0 +prometheus_client>=0.19.0 +pandas>=2.1.0 +requests>=2.31.0 From 4ae173eaf82bf3a7942c3127b8d16d00381fda4d Mon Sep 17 00:00:00 2001 From: RoeyoOgen Date: Sat, 14 Mar 2026 22:00:55 +0200 Subject: [PATCH 3/5] added more pannels --- .dockerignore | 11 + .../grafana/dashboards/iceberg-dashboard.json | 345 +++++++++++++++++- scripts/test_data_generator.py | 120 ++++-- src/monitor.py | 64 +++- src/requirements.txt | 1 + 5 files changed, 491 insertions(+), 50 deletions(-) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..4cae936 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,11 @@ +.git +.venv +src/.venv +src/venv +__pycache__ +*.pyc +.vscode +.DS_Store +*.md +local-demo/prometheus/data +local-demo/grafana/data diff --git a/local-demo/grafana/dashboards/iceberg-dashboard.json b/local-demo/grafana/dashboards/iceberg-dashboard.json index a0ad098..920913c 100644 --- a/local-demo/grafana/dashboards/iceberg-dashboard.json +++ b/local-demo/grafana/dashboards/iceberg-dashboard.json @@ -1,37 +1,348 @@ { - "title": "Iceberg Table Metrics", - "uid": "iceberg_metrics", + "title": "Iceberg Table Metrics - Comprehensive", + "uid": "iceberg_metrics_full", "refresh": "10s", "schemaVersion": 38, "timezone": "browser", "panels": [ + { + "type": "row", + "title": "Table Metrics", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + } + }, + { + "type": "timeseries", + "title": "Data Files vs Delete Files", + "targets": [ + { + "expr": "iceberg_snapshot_total_data_files", + "legendFormat": "Data Files", + "refId": "A" + }, + { + "expr": "iceberg_snapshot_total_delete_files", + "legendFormat": "Delete Files", + "refId": "B" + } + ], + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 1 + }, + "datasource": { + "uid": "Prometheus" + } + }, { "type": "timeseries", "title": "Total Records", - "targets": [{"expr": "iceberg_snapshot_total_records", "refId": "A"}], - "gridPos": {"x": 0, "y": 0, "w": 12, "h": 8}, - "datasource": {"uid": "Prometheus"} + "targets": [ + { + "expr": "iceberg_snapshot_total_records", + "refId": "A" + } + ], + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 1 + }, + "datasource": { + "uid": "Prometheus" + } }, { "type": "timeseries", - "title": "Total Data Files", - "targets": [{"expr": "iceberg_snapshot_total_data_files", "refId": "A"}], - "gridPos": {"x": 12, "y": 0, "w": 12, "h": 8}, - "datasource": {"uid": "Prometheus"} + "title": "Added Records", + "targets": [ + { + "expr": "iceberg_snapshot_added_records", + "refId": "A" + } + ], + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 1 + }, + "datasource": { + "uid": "Prometheus" + } }, { "type": "timeseries", "title": "Added Data Files", - "targets": [{"expr": "iceberg_snapshot_added_data_files", "refId": "A"}], - "gridPos": {"x": 0, "y": 8, "w": 12, "h": 8}, - "datasource": {"uid": "Prometheus"} + "targets": [ + { + "expr": "iceberg_snapshot_added_data_files", + "refId": "A" + } + ], + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 1 + }, + "datasource": { + "uid": "Prometheus" + } + }, + { + "type": "timeseries", + "title": "Changed Partition Count", + "targets": [ + { + "expr": "iceberg_snapshot_changed_partition_count", + "refId": "A" + } + ], + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 9 + }, + "datasource": { + "uid": "Prometheus" + } + }, + { + "type": "timeseries", + "title": "Maintenance: Files Compacted", + "targets": [ + { + "expr": "iceberg_maintenance_compacted_data_files", + "refId": "A" + } + ], + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 9 + }, + "datasource": { + "uid": "Prometheus" + } }, { "type": "timeseries", - "title": "Average File Size (Bytes)", - "targets": [{"expr": "iceberg_files_avg_file_size", "refId": "A"}], - "gridPos": {"x": 12, "y": 8, "w": 12, "h": 8}, - "datasource": {"uid": "Prometheus"} + "title": "Maintenance: Bytes Compacted", + "targets": [ + { + "expr": "iceberg_maintenance_compacted_files_size", + "refId": "A" + } + ], + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 9 + }, + "datasource": { + "uid": "Prometheus" + } + }, + { + "type": "row", + "title": "Partitions Metrics", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 17 + } + }, + { + "type": "stat", + "title": "Partition Record Distribution (Min / Max / Avg)", + "targets": [ + { + "expr": "iceberg_partitions_min_record_count", + "legendFormat": "Min", + "refId": "A" + }, + { + "expr": "iceberg_partitions_max_record_count", + "legendFormat": "Max", + "refId": "B" + }, + { + "expr": "iceberg_partitions_avg_record_count", + "legendFormat": "Avg", + "refId": "C" + } + ], + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 18 + }, + "datasource": { + "uid": "Prometheus" + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto" + } + }, + { + "type": "stat", + "title": "Partition File Distribution (Min / Max / Avg)", + "targets": [ + { + "expr": "iceberg_partitions_min_file_count", + "legendFormat": "Min", + "refId": "A" + }, + { + "expr": "iceberg_partitions_max_file_count", + "legendFormat": "Max", + "refId": "B" + }, + { + "expr": "iceberg_partitions_avg_file_count", + "legendFormat": "Avg", + "refId": "C" + } + ], + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 18 + }, + "datasource": { + "uid": "Prometheus" + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto" + } + }, + { + "type": "bargauge", + "title": "Top 10 Largest Partitions (Records)", + "targets": [ + { + "expr": "topk(10, iceberg_partition_record_count)", + "legendFormat": "{{partition_name}}", + "refId": "A" + } + ], + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 24 + }, + "datasource": { + "uid": "Prometheus" + } + }, + { + "type": "piechart", + "title": "Record Count by Partition", + "targets": [ + { + "expr": "iceberg_partition_record_count", + "legendFormat": "{{partition_name}}", + "refId": "A" + } + ], + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 24 + }, + "datasource": { + "uid": "Prometheus" + } + }, + { + "type": "row", + "title": "Files Metrics", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 32 + } + }, + { + "type": "stat", + "title": "File Record Counts (Min / Max / Avg)", + "targets": [ + { + "expr": "iceberg_files_min_record_count", + "legendFormat": "Min", + "refId": "A" + }, + { + "expr": "iceberg_files_max_record_count", + "legendFormat": "Max", + "refId": "B" + }, + { + "expr": "iceberg_files_avg_record_count", + "legendFormat": "Avg", + "refId": "C" + } + ], + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 33 + }, + "datasource": { + "uid": "Prometheus" + } + }, + { + "type": "stat", + "title": "File Sizes (Min / Max / Avg)", + "targets": [ + { + "expr": "iceberg_files_min_file_size", + "legendFormat": "Min", + "refId": "A" + }, + { + "expr": "iceberg_files_max_file_size", + "legendFormat": "Max", + "refId": "B" + }, + { + "expr": "iceberg_files_avg_file_size", + "legendFormat": "Avg", + "refId": "C" + } + ], + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 33 + }, + "datasource": { + "uid": "Prometheus" + } } ] -} +} \ No newline at end of file diff --git a/scripts/test_data_generator.py b/scripts/test_data_generator.py index 59a470a..681a688 100644 --- a/scripts/test_data_generator.py +++ b/scripts/test_data_generator.py @@ -1,18 +1,37 @@ import os import time import logging +import random import pyarrow as pa from pyiceberg.catalog import load_catalog +from pyiceberg.schema import Schema +from pyiceberg.types import LongType, StringType, NestedField +from pyiceberg.partitioning import PartitionSpec, PartitionField +from pyiceberg.transforms import IdentityTransform +from trino.dbapi import connect logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) +def run_trino_query(cursor, query): + logger.info(f"Executing Trino query: {query}") + try: + cursor.execute(query) + return cursor.fetchall() + except Exception as e: + logger.error(f"Trino query failed: {e}") + return None + def main(): catalog_uri = os.getenv('CATALOG_URI', 'http://localhost:8181/') s3_endpoint = os.getenv('S3_ENDPOINT', 'http://localhost:9000') aws_access_key = os.getenv('AWS_ACCESS_KEY_ID', 'minio') aws_secret_key = os.getenv('AWS_SECRET_ACCESS_KEY', 'minio123') + trino_host = os.getenv('TRINO_HOST', 'localhost') + trino_port = int(os.getenv('TRINO_PORT', '8080')) + trino_user = os.getenv('TRINO_USER', 'admin') + logger.info("Connecting to Iceberg Rest Catalog...") catalog = load_catalog( "default", @@ -31,43 +50,98 @@ def main(): except Exception: logger.info("Namespace 'demo' already exists") - schema = pa.schema([ + # Use explicit Iceberg schema to avoid ID mapping issues + iceberg_schema = Schema( + NestedField(field_id=1, name="id", field_type=LongType(), required=False), + NestedField(field_id=2, name="data", field_type=StringType(), required=False), + NestedField(field_id=3, name="category", field_type=StringType(), required=False), + ) + + # Matching Arrow schema for data creation + pa_schema = pa.schema([ ('id', pa.int64()), - ('data', pa.string()) + ('data', pa.string()), + ('category', pa.string()) ]) + partition_spec = PartitionSpec( + PartitionField(source_id=3, field_id=1000, transform=IdentityTransform(), name="category") + ) + try: - table = catalog.create_table("demo.events", schema=schema) - logger.info("Created table 'demo.events'") + catalog.drop_table("demo.events") + logger.info("Dropped existing table 'demo.events' for clean state.") except Exception: - table = catalog.load_table("demo.events") - logger.info("Loaded table 'demo.events'") + pass + + try: + table = catalog.create_table("demo.events", schema=iceberg_schema, partition_spec=partition_spec) + logger.info("Created partitioned table 'demo.events'") + except Exception as e: + logger.error(f"Failed to create table: {e}") + return - max_iterations = int(os.getenv('MAX_ITERATIONS', '10')) - logger.info(f"Starting data generation loop for {max_iterations} iterations. Press Ctrl+C to exit early.") + # Connect to Trino for advanced operations + logger.info(f"Connecting to Trino at {trino_host}:{trino_port}...") + try: + trino_conn = connect( + host=trino_host, + port=trino_port, + user=trino_user, + catalog='iceberg', + schema='demo', + ) + trino_cur = trino_conn.cursor() + except Exception as e: + logger.error(f"Failed to connect to Trino: {e}") + return + + max_iterations = int(os.getenv('MAX_ITERATIONS', '20')) + logger.info(f"Starting enhanced data generation loop for {max_iterations} iterations.") + + categories = ['A', 'B', 'C', 'D'] + try: for iteration in range(max_iterations): - i = iteration * 3 - # Create a small pyarrow table - df = pa.Table.from_arrays( - [ - [i, i+1, i+2], - [f"data_{i}", f"data_{i+1}", f"data_{i+2}"] - ], - schema=schema - ) - table.append(df) - logger.info(f"Appended 3 records to demo.events. Total iterations: {iteration + 1}/{max_iterations}") + # 1. Random Appends + batch_size = random.randint(5, 15) + ids = [iteration * 100 + j for j in range(batch_size)] + data = [f"val_{i}" for i in ids] + cats = [random.choice(categories) for _ in range(batch_size)] - # Delete some old records occasionally to show delete metrics (if supported by Python append) - # PyIceberg currently has limited delete support, so we stick to appends to show activity + df = pa.Table.from_arrays([ids, data, cats], schema=pa_schema) + table.append(df) + logger.info(f"Appended {batch_size} records to demo.events across random partitions.") + # 2. Occasional Deletes (every 5 iterations) + if iteration > 0 and iteration % 5 == 0: + cat_to_delete = random.choice(categories) + query = f"DELETE FROM iceberg.demo.events WHERE category = '{cat_to_delete}' AND id % 2 = 0" + run_trino_query(trino_cur, query) + logger.info(f"Deleted records from category {cat_to_delete} via Trino.") + + # 3. Occasional Compaction (every 7 iterations) + if iteration > 0 and iteration % 7 == 0: + query = "ALTER TABLE iceberg.demo.events EXECUTE rewrite_data_files" + run_trino_query(trino_cur, query) + logger.info("Executed data file rewriting (compaction) via Trino.") + + # 4. Occasional Snapshot Expiration (every 10 iterations) + if iteration > 0 and iteration % 10 == 0: + # Expire snapshots older than 1 minute for the demo + query = "ALTER TABLE iceberg.demo.events EXECUTE expire_snapshots(retention_threshold => '1m')" + run_trino_query(trino_cur, query) + logger.info("Executed snapshot expiration via Trino.") + if iteration < max_iterations - 1: - time.sleep(15) + time.sleep(10) - logger.info("Data generation completed successfully.") + logger.info("Enhanced data generation completed successfully.") except KeyboardInterrupt: logger.info("Data generation stopped early by user.") + finally: + trino_cur.close() + trino_conn.close() if __name__ == "__main__": main() diff --git a/src/monitor.py b/src/monitor.py index 5ab8fbf..d2e8c11 100644 --- a/src/monitor.py +++ b/src/monitor.py @@ -15,7 +15,14 @@ 'total_delete_files': Gauge('iceberg_snapshot_total_delete_files', 'Total delete files', ['table_name']), 'added_files_size': Gauge('iceberg_snapshot_added_files_size', 'Added files size', ['table_name']), 'total_files_size': Gauge('iceberg_snapshot_total_files_size', 'Total files size', ['table_name']), - 'added_position_deletes': Gauge('iceberg_snapshot_added_position_deletes', 'Added position deletes', ['table_name']) + 'added_position_deletes': Gauge('iceberg_snapshot_added_position_deletes', 'Added position deletes', ['table_name']), + 'changed_partition_count': Gauge('iceberg_snapshot_changed_partition_count', 'Changed partition count', ['table_name']), +} + +# Define Maintenance Metrics (mapped from summary or simulated for demo) +MAINTENANCE_METRICS = { + 'compacted_data_files': Gauge('iceberg_maintenance_compacted_data_files', 'Number of files compacted', ['table_name']), + 'compacted_files_size': Gauge('iceberg_maintenance_compacted_files_size', 'Size of files compacted', ['table_name']), } # Define File Metrics @@ -28,6 +35,21 @@ 'min_file_size': Gauge('iceberg_files_min_file_size', 'Min file size', ['table_name']), } +# Define Partition Metrics +PARTITION_SUMMARY_METRICS = { + 'min_record_count': Gauge('iceberg_partitions_min_record_count', 'Min record count in partitions', ['table_name']), + 'max_record_count': Gauge('iceberg_partitions_max_record_count', 'Max record count in partitions', ['table_name']), + 'avg_record_count': Gauge('iceberg_partitions_avg_record_count', 'Avg record count in partitions', ['table_name']), + 'min_file_count': Gauge('iceberg_partitions_min_file_count', 'Min file count in partitions', ['table_name']), + 'max_file_count': Gauge('iceberg_partitions_max_file_count', 'Max file count in partitions', ['table_name']), + 'avg_file_count': Gauge('iceberg_partitions_avg_file_count', 'Avg file count in partitions', ['table_name']), +} + +PARTITION_DETAIL_METRICS = { + 'record_count': Gauge('iceberg_partition_record_count', 'Record count per partition', ['table_name', 'partition_name']), + 'file_count': Gauge('iceberg_partition_file_count', 'File count per partition', ['table_name', 'partition_name']), +} + def safe_float(val, default=0.0): if val is None: return default @@ -90,29 +112,51 @@ def update_metrics(): summary_key = metric_key.replace('_', '-') val = snapshot.summary.get(summary_key, 0) gauge.labels(table_name=table_name).set(safe_float(val)) + + # Maintenance metrics from rewrite_data_files + compacted_files = snapshot.summary.get('removed-data-files', 0) + MAINTENANCE_METRICS['compacted_data_files'].labels(table_name=table_name).set(safe_float(compacted_files)) + # Note: summary might not have 'removed-files-size', we use added-files-size as proxy if it's a rewrite + if snapshot.summary.get('operation') == 'replace': + MAINTENANCE_METRICS['compacted_files_size'].labels(table_name=table_name).set(safe_float(snapshot.summary.get('added-files-size', 0))) else: logger.info(f"No current snapshot found for {table_name}") - # File metrics via pyiceberg inspect (req pyiceberg >= 0.6.0) + # File and Partition metrics via pyiceberg inspect try: - logger.info(f"Checking inspect for {table_name}") if hasattr(table, 'inspect'): - logger.info(f"Table {table_name} has inspect. Checking for files()") + # Files files_df = table.inspect.files().to_pandas() if not files_df.empty: - logger.info(f"Updating file metrics for {table_name} ({len(files_df)} files)") + logger.info(f"Updating file metrics for {table_name}") FILE_METRICS['avg_record_count'].labels(table_name=table_name).set(safe_float(files_df['record_count'].mean())) FILE_METRICS['max_record_count'].labels(table_name=table_name).set(safe_float(files_df['record_count'].max())) FILE_METRICS['min_record_count'].labels(table_name=table_name).set(safe_float(files_df['record_count'].min())) FILE_METRICS['avg_file_size'].labels(table_name=table_name).set(safe_float(files_df['file_size_in_bytes'].mean())) FILE_METRICS['max_file_size'].labels(table_name=table_name).set(safe_float(files_df['file_size_in_bytes'].max())) FILE_METRICS['min_file_size'].labels(table_name=table_name).set(safe_float(files_df['file_size_in_bytes'].min())) - else: - logger.warning(f"No data files found for table {table_name}") - else: - logger.error(f"Table {table_name} does NOT have inspect attribute. PyIceberg version: {pyiceberg.__version__}") + + # Partitions + partitions_df = table.inspect.partitions().to_pandas() + if not partitions_df.empty: + logger.info(f"Updating partition metrics for {table_name}") + PARTITION_SUMMARY_METRICS['avg_record_count'].labels(table_name=table_name).set(safe_float(partitions_df['record_count'].mean())) + PARTITION_SUMMARY_METRICS['max_record_count'].labels(table_name=table_name).set(safe_float(partitions_df['record_count'].max())) + PARTITION_SUMMARY_METRICS['min_record_count'].labels(table_name=table_name).set(safe_float(partitions_df['record_count'].min())) + PARTITION_SUMMARY_METRICS['avg_file_count'].labels(table_name=table_name).set(safe_float(partitions_df['file_count'].mean())) + PARTITION_SUMMARY_METRICS['max_file_count'].labels(table_name=table_name).set(safe_float(partitions_df['file_count'].max())) + PARTITION_SUMMARY_METRICS['min_file_count'].labels(table_name=table_name).set(safe_float(partitions_df['file_count'].min())) + + # Detailed partition metrics (limited to avoid too many series) + for _, row in partitions_df.sort_values('record_count', ascending=False).head(20).iterrows(): + p_name = str(row['partition']) + PARTITION_DETAIL_METRICS['record_count'].labels(table_name=table_name, partition_name=p_name).set(safe_float(row['record_count'])) + PARTITION_DETAIL_METRICS['file_count'].labels(table_name=table_name, partition_name=p_name).set(safe_float(row['file_count'])) except Exception as e: - logger.error(f"Could not inspect files for {table_name}: {e}") + logger.error(f"Could not inspect table {table_name}: {e}") + + except Exception as e: + logger.error(f"Error updating metrics: {e}") except Exception as e: logger.error(f"Error updating metrics: {e}") diff --git a/src/requirements.txt b/src/requirements.txt index 958df1e..0b376cf 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -2,3 +2,4 @@ pyiceberg[pyarrow,s3fs]>=0.6.0 prometheus_client>=0.19.0 pandas>=2.1.0 requests>=2.31.0 +trino>=0.327.0 From 69707cb69bfb480a103c8d6f0ca1655e509f6e52 Mon Sep 17 00:00:00 2001 From: RoeyoOgen Date: Sun, 15 Mar 2026 06:55:56 +0200 Subject: [PATCH 4/5] added maintainance --- scripts/test_data_generator.py | 42 ++++++++++++++++++++++++++-------- src/monitor.py | 17 ++++++++++---- 2 files changed, 46 insertions(+), 13 deletions(-) diff --git a/scripts/test_data_generator.py b/scripts/test_data_generator.py index 681a688..d4e29c6 100644 --- a/scripts/test_data_generator.py +++ b/scripts/test_data_generator.py @@ -68,14 +68,14 @@ def main(): PartitionField(source_id=3, field_id=1000, transform=IdentityTransform(), name="category") ) - try: - catalog.drop_table("demo.events") - logger.info("Dropped existing table 'demo.events' for clean state.") - except Exception: - pass + # try: + # catalog.drop_table("demo.events") + # logger.info("Dropped existing table 'demo.events' for clean state.") + # except Exception: + # pass try: - table = catalog.create_table("demo.events", schema=iceberg_schema, partition_spec=partition_spec) + table = catalog.create_table_if_not_exists("demo.events", schema=iceberg_schema, partition_spec=partition_spec) logger.info("Created partitioned table 'demo.events'") except Exception as e: logger.error(f"Failed to create table: {e}") @@ -110,8 +110,14 @@ def main(): cats = [random.choice(categories) for _ in range(batch_size)] df = pa.Table.from_arrays([ids, data, cats], schema=pa_schema) - table.append(df) - logger.info(f"Appended {batch_size} records to demo.events across random partitions.") + try: + table.append(df) + logger.info(f"Appended {batch_size} records to demo.events across random partitions.") + except Exception as e: + if "CommitFailedException" in str(e): + logger.warning(f"Commit conflict detected at iteration {iteration}. Skipping this batch.") + else: + logger.error(f"Append failed: {e}") # 2. Occasional Deletes (every 5 iterations) if iteration > 0 and iteration % 5 == 0: @@ -133,8 +139,26 @@ def main(): run_trino_query(trino_cur, query) logger.info("Executed snapshot expiration via Trino.") + # 5. Occasional Orphan File Removal (every 12 iterations) + if iteration > 0 and iteration % 12 == 0: + query = "ALTER TABLE iceberg.demo.events EXECUTE remove_orphan_files(retention_threshold => '1m')" + run_trino_query(trino_cur, query) + logger.info("Executed orphan file removal via Trino.") + + # 6. Occasional Manifest Rewriting (every 15 iterations) + if iteration > 0 and iteration % 15 == 0: + query = "ALTER TABLE iceberg.demo.events EXECUTE rewrite_manifests" + run_trino_query(trino_cur, query) + logger.info("Executed manifest rewriting via Trino.") + + # 7. General Table Optimization (every 18 iterations) + if iteration > 0 and iteration % 18 == 0: + query = "ALTER TABLE iceberg.demo.events EXECUTE optimize" + run_trino_query(trino_cur, query) + logger.info("Executed general table optimization via Trino.") + if iteration < max_iterations - 1: - time.sleep(10) + time.sleep(15) logger.info("Enhanced data generation completed successfully.") except KeyboardInterrupt: diff --git a/src/monitor.py b/src/monitor.py index d2e8c11..4d32711 100644 --- a/src/monitor.py +++ b/src/monitor.py @@ -104,10 +104,16 @@ def update_metrics(): table = catalog.load_table(table_identifier) logger.info(f"Loaded table {table_name}") + # Initialize labels to avoid 'no data' in Grafana + for gauge in SNAPSHOT_METRICS.values(): + gauge.labels(table_name=table_name).set(0) + for gauge in MAINTENANCE_METRICS.values(): + gauge.labels(table_name=table_name).set(0) + # Snapshot metrics snapshot = table.current_snapshot() if snapshot and snapshot.summary: - logger.info(f"Updating snapshot metrics for {table_name}") + logger.info(f"Snapshot summary for {table_name}: {snapshot.summary}") for metric_key, gauge in SNAPSHOT_METRICS.items(): summary_key = metric_key.replace('_', '-') val = snapshot.summary.get(summary_key, 0) @@ -116,9 +122,12 @@ def update_metrics(): # Maintenance metrics from rewrite_data_files compacted_files = snapshot.summary.get('removed-data-files', 0) MAINTENANCE_METRICS['compacted_data_files'].labels(table_name=table_name).set(safe_float(compacted_files)) - # Note: summary might not have 'removed-files-size', we use added-files-size as proxy if it's a rewrite - if snapshot.summary.get('operation') == 'replace': - MAINTENANCE_METRICS['compacted_files_size'].labels(table_name=table_name).set(safe_float(snapshot.summary.get('added-files-size', 0))) + + # Check multiple common keys for compaction size + if snapshot.summary.get('operation') in ['replace', 'delete', 'overwrite']: + # removed-files-size is most accurate for "what was compacted/cleaned" + size = snapshot.summary.get('removed-files-size') or snapshot.summary.get('added-files-size') or snapshot.summary.get('total-files-size') or 0 + MAINTENANCE_METRICS['compacted_files_size'].labels(table_name=table_name).set(safe_float(size)) else: logger.info(f"No current snapshot found for {table_name}") From 4763aedda189921361c0dc2963d66a6a0da8ace2 Mon Sep 17 00:00:00 2001 From: RoeyoOgen Date: Sun, 15 Mar 2026 07:29:26 +0200 Subject: [PATCH 5/5] added some more pannales --- chatgpt/config.yaml | 36 + chatgpt/grafan.json | 815 +++++++++ ...alog_prometheus_exporter_implementation.md | 1518 +++++++++++++++++ ...g_rest_catalog_prometheus_exporter_spec.md | 606 +++++++ chatgpt/iceberg_rest_exporter.py | 952 +++++++++++ scripts/test_data_generator.py | 8 +- src/monitor.py | 87 +- 7 files changed, 3986 insertions(+), 36 deletions(-) create mode 100644 chatgpt/config.yaml create mode 100644 chatgpt/grafan.json create mode 100644 chatgpt/iceberg_rest_catalog_prometheus_exporter_implementation.md create mode 100644 chatgpt/iceberg_rest_catalog_prometheus_exporter_spec.md create mode 100644 chatgpt/iceberg_rest_exporter.py diff --git a/chatgpt/config.yaml b/chatgpt/config.yaml new file mode 100644 index 0000000..a404b95 --- /dev/null +++ b/chatgpt/config.yaml @@ -0,0 +1,36 @@ +host: 0.0.0.0 +port: 9109 +scrape_interval_seconds: 30 +deep_scan_interval_seconds: 600 +request_timeout_seconds: 30 +max_partitions_per_table: 5000 + +catalogs: + - name: prod + uri: https://your-iceberg-rest-catalog.example.com + warehouse: s3://warehouse + token: ${ICEBERG_TOKEN} + +# Choose one mode +# 1) explicit tables (recommended to start) +discovery: + mode: explicit + tables: + - namespace: demo + table: events + - namespace: sales + table: orders + +# 2) namespace scan +# discovery: +# mode: namespace_scan +# namespaces: +# - demo +# - sales + +thresholds: + small_file_bytes: 33554432 # 32MB + target_file_bytes: 134217728 # 128MB + delete_file_ratio: 0.2 + avg_files_per_partition: 20 + small_file_count: 100 \ No newline at end of file diff --git a/chatgpt/grafan.json b/chatgpt/grafan.json new file mode 100644 index 0000000..2e3967f --- /dev/null +++ b/chatgpt/grafan.json @@ -0,0 +1,815 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "10.0.0" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "type": "stat", + "title": "Current Total Records", + "gridPos": { + "x": 0, + "y": 0, + "w": 6, + "h": 4 + }, + "id": 1, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "unit": "short" + }, + "overrides": [] + }, + "options": { + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "orientation": "auto", + "textMode": "value" + }, + "targets": [ + { + "expr": "iceberg_snapshot_total_records{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}", + "refId": "A" + } + ] + }, + { + "type": "stat", + "title": "Current Total File Size", + "gridPos": { + "x": 6, + "y": 0, + "w": 6, + "h": 4 + }, + "id": 2, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "unit": "decbytes" + }, + "overrides": [] + }, + "options": { + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "orientation": "auto" + }, + "targets": [ + { + "expr": "iceberg_snapshot_total_file_size_bytes{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}", + "refId": "A" + } + ] + }, + { + "type": "stat", + "title": "Current Data Files", + "gridPos": { + "x": 12, + "y": 0, + "w": 6, + "h": 4 + }, + "id": 3, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "unit": "short" + }, + "overrides": [] + }, + "options": { + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "targets": [ + { + "expr": "iceberg_snapshot_total_data_files{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}", + "refId": "A" + } + ] + }, + { + "type": "stat", + "title": "Current Delete Files", + "gridPos": { + "x": 18, + "y": 0, + "w": 6, + "h": 4 + }, + "id": 4, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "unit": "short" + }, + "overrides": [] + }, + "options": { + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "targets": [ + { + "expr": "iceberg_snapshot_total_delete_files{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}", + "refId": "A" + } + ] + }, + { + "type": "stat", + "title": "Snapshot Age (s)", + "gridPos": { + "x": 0, + "y": 4, + "w": 6, + "h": 4 + }, + "id": 5, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "unit": "s" + }, + "overrides": [] + }, + "options": { + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "targets": [ + { + "expr": "iceberg_table_snapshot_age_seconds{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}", + "refId": "A" + } + ] + }, + { + "type": "stat", + "title": "Active Partitions", + "gridPos": { + "x": 6, + "y": 4, + "w": 6, + "h": 4 + }, + "id": 6, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "unit": "short" + }, + "overrides": [] + }, + "options": { + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "targets": [ + { + "expr": "iceberg_table_partition_count{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}", + "refId": "A" + } + ] + }, + { + "type": "stat", + "title": "Compaction Candidate (0/1)", + "gridPos": { + "x": 12, + "y": 4, + "w": 6, + "h": 4 + }, + "id": 7, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "unit": "short" + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "targets": [ + { + "expr": "iceberg_table_compaction_candidate{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}", + "refId": "A" + } + ] + }, + { + "type": "stat", + "title": "Time Since Last Rewrite (s)", + "gridPos": { + "x": 18, + "y": 4, + "w": 6, + "h": 4 + }, + "id": 8, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "options": { + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "fieldConfig": { + "defaults": { + "unit": "s" + }, + "overrides": [] + }, + "targets": [ + { + "expr": "time() - iceberg_rewrite_last_timestamp_seconds{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}", + "refId": "A" + } + ] + }, + { + "type": "timeseries", + "title": "Records Added/Removed (rate)", + "gridPos": { + "x": 0, + "y": 8, + "w": 12, + "h": 8 + }, + "id": 9, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "targets": [ + { + "expr": "increase(iceberg_snapshot_added_records_total{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}[5m])", + "legendFormat": "added", + "refId": "A" + }, + { + "expr": "increase(iceberg_snapshot_removed_records_total{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}[5m])", + "legendFormat": "removed", + "refId": "B" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short" + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + } + } + }, + { + "type": "timeseries", + "title": "Bytes Added/Removed (rate)", + "gridPos": { + "x": 12, + "y": 8, + "w": 12, + "h": 8 + }, + "id": 10, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "targets": [ + { + "expr": "increase(iceberg_snapshot_added_files_size_bytes_total{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}[5m])", + "legendFormat": "added bytes", + "refId": "A" + }, + { + "expr": "increase(iceberg_snapshot_removed_files_size_bytes_total{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}[5m])", + "legendFormat": "removed bytes", + "refId": "B" + } + ], + "fieldConfig": { + "defaults": { + "unit": "decbytes" + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + } + } + }, + { + "type": "bargauge", + "title": "Snapshot Commits by Operation (sum over range)", + "gridPos": { + "x": 0, + "y": 16, + "w": 11, + "h": 6 + }, + "id": 11, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "targets": [ + { + "expr": "sum by (operation) (increase(iceberg_snapshot_commits_total{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}[$__range]))", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short" + }, + "overrides": [] + }, + "options": { + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + } + }, + { + "type": "timeseries", + "title": "Bytes Rewritten (compaction replacement)", + "gridPos": { + "x": 12, + "y": 16, + "w": 8, + "h": 6 + }, + "id": 12, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "targets": [ + { + "expr": "increase(iceberg_rewrite_bytes_total{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}[1h])", + "legendFormat": "rewrite bytes", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "decbytes" + }, + "overrides": [] + } + }, + { + "type": "timeseries", + "title": "Files Rewritten (compaction replacement)", + "gridPos": { + "x": 20, + "y": 16, + "w": 4, + "h": 6 + }, + "id": 13, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "targets": [ + { + "expr": "increase(iceberg_rewrite_data_files_total{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}[1h])", + "legendFormat": "rewrite files", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short" + }, + "overrides": [] + } + }, + { + "type": "timeseries", + "title": "File Size Stats (min/avg/max)", + "gridPos": { + "x": 0, + "y": 22, + "w": 12, + "h": 6 + }, + "id": 14, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "targets": [ + { + "expr": "iceberg_files_avg_size_bytes{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}", + "legendFormat": "avg", + "refId": "A" + }, + { + "expr": "iceberg_files_min_size_bytes{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}", + "legendFormat": "min", + "refId": "B" + }, + { + "expr": "iceberg_files_max_size_bytes{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}", + "legendFormat": "max", + "refId": "C" + } + ], + "fieldConfig": { + "defaults": { + "unit": "decbytes" + }, + "overrides": [] + } + }, + { + "type": "timeseries", + "title": "Records Per File (min/avg/max)", + "gridPos": { + "x": 12, + "y": 22, + "w": 12, + "h": 6 + }, + "id": 15, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "targets": [ + { + "expr": "iceberg_files_avg_record_count{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}", + "legendFormat": "avg", + "refId": "A" + }, + { + "expr": "iceberg_files_min_record_count{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}", + "legendFormat": "min", + "refId": "B" + }, + { + "expr": "iceberg_files_max_record_count{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}", + "legendFormat": "max", + "refId": "C" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short" + }, + "overrides": [] + } + }, + { + "type": "timeseries", + "title": "Top 10 Partitions by Records", + "gridPos": { + "x": 0, + "y": 28, + "w": 12, + "h": 6 + }, + "id": 16, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "targets": [ + { + "expr": "topk(10, iceberg_partition_record_count{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"})", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short" + }, + "overrides": [] + } + }, + { + "type": "timeseries", + "title": "Top 10 Partitions by File Count", + "gridPos": { + "x": 12, + "y": 28, + "w": 12, + "h": 6 + }, + "id": 17, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "targets": [ + { + "expr": "topk(10, iceberg_partition_file_count{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"})", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short" + }, + "overrides": [] + } + }, + { + "type": "stat", + "title": "Small Files (<32MB)", + "gridPos": { + "x": 0, + "y": 34, + "w": 8, + "h": 4 + }, + "id": 18, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "unit": "short" + }, + "overrides": [] + }, + "targets": [ + { + "expr": "iceberg_files_small_file_count{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\",threshold_bytes=\"33554432\"}", + "refId": "A" + } + ] + }, + { + "type": "stat", + "title": "Delete File Ratio", + "gridPos": { + "x": 8, + "y": 34, + "w": 8, + "h": 4 + }, + "id": 19, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "unit": "percent" + }, + "overrides": [] + }, + "targets": [ + { + "expr": "iceberg_files_delete_file_ratio{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}", + "refId": "A" + } + ] + }, + { + "type": "stat", + "title": "Rewrite Events (count in range)", + "gridPos": { + "x": 16, + "y": 34, + "w": 8, + "h": 4 + }, + "id": 20, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "unit": "short" + }, + "overrides": [] + }, + "targets": [ + { + "expr": "increase(iceberg_rewrite_snapshots_total{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}[$__range])", + "refId": "A" + } + ] + }, + { + "type": "table", + "title": "Partition Size Table", + "gridPos": { + "x": 0, + "y": 38, + "w": 24, + "h": 10 + }, + "id": 21, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "targets": [ + { + "expr": "iceberg_partition_size_bytes{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}", + "refId": "A", + "instant": true, + "format": "table" + } + ], + "options": { + "showHeader": true + }, + "transformations": [ + { + "id": "labelsToFields", + "options": { + "mode": "columns" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "renameByName": { + "Value": "size_bytes" + } + } + } + ] + } + ], + "refresh": "30s", + "schemaVersion": 36, + "style": "dark", + "tags": [ + "iceberg", + "prometheus", + "rest-catalog" + ], + "templating": { + "list": [ + { + "type": "query", + "name": "catalog", + "label": "Catalog", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "query": "label_values(iceberg_snapshot_total_records, catalog)", + "refresh": 2, + "includeAll": true, + "multi": true, + "allValue": ".*", + "sort": 1 + }, + { + "type": "query", + "name": "namespace", + "label": "Namespace", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "query": "label_values(iceberg_snapshot_total_records{catalog=~\"$catalog\"}, namespace)", + "refresh": 2, + "includeAll": true, + "multi": true, + "allValue": ".*", + "sort": 1 + }, + { + "type": "query", + "name": "table", + "label": "Table", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "query": "label_values(iceberg_snapshot_total_records{catalog=~\"$catalog\",namespace=~\"$namespace\"}, table)", + "refresh": 2, + "includeAll": true, + "multi": true, + "allValue": ".*", + "sort": 1 + } + ] + }, + "time": { + "from": "now-24h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Iceberg REST Catalog - Health & Layout", + "uid": "iceberg-rest-catalog-health", + "version": 1 +} \ No newline at end of file diff --git a/chatgpt/iceberg_rest_catalog_prometheus_exporter_implementation.md b/chatgpt/iceberg_rest_catalog_prometheus_exporter_implementation.md new file mode 100644 index 0000000..69baea1 --- /dev/null +++ b/chatgpt/iceberg_rest_catalog_prometheus_exporter_implementation.md @@ -0,0 +1,1518 @@ +# Iceberg REST Catalog Prometheus Exporter – Ready-to-Run Skeleton + Grafana Dashboard + +This document contains: +1. A ready-to-run Python exporter skeleton (single-file, production-oriented starter) +2. A complete Grafana dashboard JSON template mapping the Datadog-style widgets to PromQL using the REST-catalog-only metrics + +--- + +## Part 1 — Ready-to-run Python exporter skeleton + +### File: `iceberg_rest_exporter.py` + +```python +#!/usr/bin/env python3 +""" +Iceberg REST Catalog Prometheus Exporter (REST-catalog only) + +Goals: +- Single source of truth: Iceberg REST catalog only +- No Trino, no Glue, no Elasticsearch +- Cheap polling loop + cached deep scan +- Exposes Prometheus metrics via prometheus_client + +This is a production-oriented skeleton, not toy code. +You will still need to adapt a few PyIceberg APIs to your exact version. + +Tested design assumptions: +- Python 3.11+ +- prometheus-client +- pyiceberg +- PyYAML + +Run: + pip install prometheus-client pyiceberg pyyaml + python iceberg_rest_exporter.py --config config.yaml + +Prometheus endpoint: + http://localhost:9109/metrics +""" + +from __future__ import annotations + +import argparse +import logging +import os +import signal +import sys +import threading +import time +from dataclasses import dataclass, field +from typing import Any, Dict, Iterable, List, Optional, Tuple + +import yaml +from prometheus_client import Counter, Gauge, start_http_server, disable_created_metrics + +# NOTE: +# PyIceberg APIs evolve. You may need to adjust imports for your exact version. +# These imports are intentionally conservative and may require small edits. +from pyiceberg.catalog import load_catalog + + +# ============================================================ +# Logging +# ============================================================ + +logging.basicConfig( + level=os.getenv("LOG_LEVEL", "INFO"), + format="%(asctime)s %(levelname)s %(name)s - %(message)s", +) +logger = logging.getLogger("iceberg-rest-exporter") + + +# ============================================================ +# Config models +# ============================================================ + +@dataclass +class CatalogConfig: + name: str + uri: str + warehouse: Optional[str] = None + token: Optional[str] = None + credential: Optional[str] = None + extra_properties: Dict[str, str] = field(default_factory=dict) + + +@dataclass +class TableRef: + namespace: str + table: str + + @property + def fqdn(self) -> str: + return f"{self.namespace}.{self.table}" + + +@dataclass +class DiscoveryConfig: + mode: str = "explicit" # explicit | namespace_scan + namespaces: List[str] = field(default_factory=list) + tables: List[TableRef] = field(default_factory=list) + + +@dataclass +class ThresholdsConfig: + small_file_bytes: int = 32 * 1024 * 1024 + target_file_bytes: int = 128 * 1024 * 1024 + delete_file_ratio: float = 0.2 + avg_files_per_partition: int = 20 + small_file_count: int = 100 + + +@dataclass +class ExporterConfig: + host: str = "0.0.0.0" + port: int = 9109 + scrape_interval_seconds: int = 30 + deep_scan_interval_seconds: int = 600 + request_timeout_seconds: int = 30 + max_partitions_per_table: int = 5000 + catalogs: List[CatalogConfig] = field(default_factory=list) + discovery: DiscoveryConfig = field(default_factory=DiscoveryConfig) + thresholds: ThresholdsConfig = field(default_factory=ThresholdsConfig) + + +# ============================================================ +# Metric registry (explicit metric handles) +# ============================================================ + +LABELS = ["catalog", "namespace", "table", "table_name"] +PARTITION_LABELS = ["catalog", "namespace", "table", "table_name", "partition"] +OP_LABELS = ["catalog", "namespace", "table", "table_name", "operation"] +THRESHOLD_LABELS = ["catalog", "namespace", "table", "table_name", "threshold_bytes"] + + +class Metrics: + def __init__(self) -> None: + disable_created_metrics() + + # exporter health + self.exporter_last_run_success = Gauge( + "iceberg_exporter_last_run_success", + "Whether the last exporter loop completed successfully (1/0)", + ) + self.exporter_last_run_timestamp = Gauge( + "iceberg_exporter_last_run_timestamp_seconds", + "Unix timestamp of last exporter loop", + ) + self.exporter_loop_duration = Gauge( + "iceberg_exporter_loop_duration_seconds", + "Duration of the last exporter loop", + ) + self.exporter_table_scrape_errors = Counter( + "iceberg_exporter_table_scrape_errors_total", + "Number of per-table scrape errors", + ["catalog", "namespace", "table", "table_name", "stage"], + ) + + # Snapshot state gauges + self.snapshot_total_data_files = Gauge( + "iceberg_snapshot_total_data_files", + "Current total active data files in latest snapshot", + LABELS, + ) + self.snapshot_total_delete_files = Gauge( + "iceberg_snapshot_total_delete_files", + "Current total active delete files in latest snapshot", + LABELS, + ) + self.snapshot_total_records = Gauge( + "iceberg_snapshot_total_records", + "Current total records in latest snapshot", + LABELS, + ) + self.snapshot_total_file_size_bytes = Gauge( + "iceberg_snapshot_total_file_size_bytes", + "Current total active data file size in bytes", + LABELS, + ) + self.snapshot_total_position_delete_records = Gauge( + "iceberg_snapshot_total_position_delete_records", + "Current total position delete records in latest snapshot", + LABELS, + ) + self.snapshot_total_equality_delete_records = Gauge( + "iceberg_snapshot_total_equality_delete_records", + "Current total equality delete records in latest snapshot", + LABELS, + ) + self.table_current_snapshot_id = Gauge( + "iceberg_table_current_snapshot_id", + "Current snapshot id", + LABELS, + ) + self.table_current_sequence_number = Gauge( + "iceberg_table_current_sequence_number", + "Current sequence number", + LABELS, + ) + self.table_snapshot_age_seconds = Gauge( + "iceberg_table_snapshot_age_seconds", + "Age of current snapshot in seconds", + LABELS, + ) + self.table_snapshot_count = Gauge( + "iceberg_table_snapshot_count", + "Number of retained snapshots", + LABELS, + ) + self.table_partition_count = Gauge( + "iceberg_table_partition_count", + "Number of active partitions", + LABELS, + ) + + # Snapshot delta counters + self.snapshot_added_records_total = Counter( + "iceberg_snapshot_added_records_total", + "Cumulative added records across observed snapshots", + LABELS, + ) + self.snapshot_added_data_files_total = Counter( + "iceberg_snapshot_added_data_files_total", + "Cumulative added data files across observed snapshots", + LABELS, + ) + self.snapshot_added_delete_files_total = Counter( + "iceberg_snapshot_added_delete_files_total", + "Cumulative added delete files across observed snapshots", + LABELS, + ) + self.snapshot_added_files_size_bytes_total = Counter( + "iceberg_snapshot_added_files_size_bytes_total", + "Cumulative added file bytes across observed snapshots", + LABELS, + ) + self.snapshot_removed_records_total = Counter( + "iceberg_snapshot_removed_records_total", + "Cumulative removed records across observed snapshots", + LABELS, + ) + self.snapshot_removed_data_files_total = Counter( + "iceberg_snapshot_removed_data_files_total", + "Cumulative removed data files across observed snapshots", + LABELS, + ) + self.snapshot_removed_delete_files_total = Counter( + "iceberg_snapshot_removed_delete_files_total", + "Cumulative removed delete files across observed snapshots", + LABELS, + ) + self.snapshot_removed_files_size_bytes_total = Counter( + "iceberg_snapshot_removed_files_size_bytes_total", + "Cumulative removed file bytes across observed snapshots", + LABELS, + ) + self.snapshot_changed_partition_count = Gauge( + "iceberg_snapshot_changed_partition_count", + "Partitions changed in latest observed snapshot", + LABELS, + ) + self.snapshot_commits_total = Counter( + "iceberg_snapshot_commits_total", + "Observed snapshot commits by operation", + OP_LABELS, + ) + + # Partition gauges + self.partition_record_count = Gauge( + "iceberg_partition_record_count", + "Record count by active partition", + PARTITION_LABELS, + ) + self.partition_file_count = Gauge( + "iceberg_partition_file_count", + "Active data file count by partition", + PARTITION_LABELS, + ) + self.partition_size_bytes = Gauge( + "iceberg_partition_size_bytes", + "Active data size by partition in bytes", + PARTITION_LABELS, + ) + + # Partition aggregate gauges + self.partitions_min_record_count = Gauge("iceberg_partitions_min_record_count", "Min records across partitions", LABELS) + self.partitions_max_record_count = Gauge("iceberg_partitions_max_record_count", "Max records across partitions", LABELS) + self.partitions_avg_record_count = Gauge("iceberg_partitions_avg_record_count", "Avg records across partitions", LABELS) + self.partitions_min_file_count = Gauge("iceberg_partitions_min_file_count", "Min file count across partitions", LABELS) + self.partitions_max_file_count = Gauge("iceberg_partitions_max_file_count", "Max file count across partitions", LABELS) + self.partitions_avg_file_count = Gauge("iceberg_partitions_avg_file_count", "Avg file count across partitions", LABELS) + self.partitions_min_size_bytes = Gauge("iceberg_partitions_min_size_bytes", "Min bytes across partitions", LABELS) + self.partitions_max_size_bytes = Gauge("iceberg_partitions_max_size_bytes", "Max bytes across partitions", LABELS) + self.partitions_avg_size_bytes = Gauge("iceberg_partitions_avg_size_bytes", "Avg bytes across partitions", LABELS) + + # File aggregate gauges + self.files_min_record_count = Gauge("iceberg_files_min_record_count", "Min records per active data file", LABELS) + self.files_max_record_count = Gauge("iceberg_files_max_record_count", "Max records per active data file", LABELS) + self.files_avg_record_count = Gauge("iceberg_files_avg_record_count", "Avg records per active data file", LABELS) + self.files_min_size_bytes = Gauge("iceberg_files_min_size_bytes", "Min size of active data files in bytes", LABELS) + self.files_max_size_bytes = Gauge("iceberg_files_max_size_bytes", "Max size of active data files in bytes", LABELS) + self.files_avg_size_bytes = Gauge("iceberg_files_avg_size_bytes", "Avg size of active data files in bytes", LABELS) + self.files_small_file_count = Gauge( + "iceberg_files_small_file_count", + "Count of active data files below threshold_bytes", + THRESHOLD_LABELS, + ) + self.files_large_file_count = Gauge( + "iceberg_files_large_file_count", + "Count of active data files above threshold_bytes", + THRESHOLD_LABELS, + ) + self.files_avg_rows_per_file = Gauge( + "iceberg_files_avg_rows_per_file", + "Average rows per active data file", + LABELS, + ) + self.files_delete_file_ratio = Gauge( + "iceberg_files_delete_file_ratio", + "Delete files / data files ratio", + LABELS, + ) + + # Rewrite / compaction inferred metrics + self.rewrite_data_files_total = Counter( + "iceberg_rewrite_data_files_total", + "Estimated rewritten data files across rewrite-like snapshots", + LABELS, + ) + self.rewrite_bytes_total = Counter( + "iceberg_rewrite_bytes_total", + "Estimated rewritten bytes across rewrite-like snapshots", + LABELS, + ) + self.rewrite_snapshots_total = Counter( + "iceberg_rewrite_snapshots_total", + "Observed rewrite-like snapshots", + LABELS, + ) + self.rewrite_last_timestamp_seconds = Gauge( + "iceberg_rewrite_last_timestamp_seconds", + "Unix timestamp of latest rewrite-like snapshot", + LABELS, + ) + self.rewrite_removed_small_files_total = Counter( + "iceberg_rewrite_removed_small_files_total", + "Estimated small files removed by rewrite-like snapshots", + LABELS, + ) + self.rewrite_post_avg_file_size_bytes = Gauge( + "iceberg_rewrite_post_avg_file_size_bytes", + "Average file size after latest rewrite-like snapshot", + LABELS, + ) + self.table_compaction_candidate = Gauge( + "iceberg_table_compaction_candidate", + "Whether table is a compaction candidate (1/0)", + LABELS, + ) + + +# ============================================================ +# In-memory state for observed snapshots (counter correctness) +# ============================================================ + +class SnapshotState: + """ + Tracks last seen snapshot ids to avoid double-incrementing counters. + In Kubernetes this resets on pod restart, which is acceptable for Prometheus counters. + Use increase()/rate() in PromQL. + """ + + def __init__(self) -> None: + self._lock = threading.Lock() + self._seen_snapshot_ids: Dict[Tuple[str, str, str], set[int]] = {} + + def mark_if_new(self, catalog: str, namespace: str, table: str, snapshot_id: int) -> bool: + key = (catalog, namespace, table) + with self._lock: + seen = self._seen_snapshot_ids.setdefault(key, set()) + if snapshot_id in seen: + return False + seen.add(snapshot_id) + # prevent unbounded growth + if len(seen) > 5000: + # retain latest-ish by arbitrary truncation strategy + seen_list = list(seen) + seen.clear() + seen.update(seen_list[-1000:]) + return True + + +# ============================================================ +# Helpers +# ============================================================ + +def safe_int(v: Any, default: int = 0) -> int: + try: + return int(v) + except Exception: + return default + + +def safe_float(v: Any, default: float = 0.0) -> float: + try: + return float(v) + except Exception: + return default + + +def now_ts() -> int: + return int(time.time()) + + +def labels(catalog: str, namespace: str, table: str) -> Tuple[str, str, str, str]: + return (catalog, namespace, table, f"{namespace}.{table}") + + +def avg(values: List[float]) -> float: + return sum(values) / len(values) if values else 0.0 + + +def min_or_zero(values: List[float]) -> float: + return min(values) if values else 0.0 + + +def max_or_zero(values: List[float]) -> float: + return max(values) if values else 0.0 + + +def normalize_operation(op: Optional[str]) -> str: + if not op: + return "unknown" + op = str(op).strip().lower().replace("-", "_") + allowed = {"append", "overwrite", "replace", "delete", "rewrite", "fast_append"} + return op if op in allowed else "unknown" + + +# ============================================================ +# PyIceberg adapter layer (version-tolerant-ish) +# ============================================================ + +class IcebergAdapter: + def __init__(self, cfg: CatalogConfig): + props = { + "uri": cfg.uri, + **cfg.extra_properties, + } + if cfg.warehouse: + props["warehouse"] = cfg.warehouse + if cfg.token: + props["token"] = cfg.token + if cfg.credential: + props["credential"] = cfg.credential + + self.catalog = load_catalog(cfg.name, **props) + self.name = cfg.name + + def list_tables(self, namespace: str) -> List[TableRef]: + # Adjust for your PyIceberg version if needed. + # Some versions return tuples like (namespace_tuple, table_name) or identifiers. + out: List[TableRef] = [] + tables = self.catalog.list_tables(namespace) + for item in tables: + if isinstance(item, tuple): + # common shape: (('ns',), 'table') or ('ns', 'table') + if len(item) == 2 and isinstance(item[1], str): + t = item[1] + out.append(TableRef(namespace=namespace, table=t)) + else: + out.append(TableRef(namespace=namespace, table=str(item[-1]))) + else: + s = str(item) + t = s.split(".")[-1] + out.append(TableRef(namespace=namespace, table=t)) + return out + + def load_table(self, table_ref: TableRef): + return self.catalog.load_table(table_ref.fqdn) + + +# ============================================================ +# Data extraction contracts +# ============================================================ + +@dataclass +class SnapshotSummary: + snapshot_id: int + timestamp_ms: int + sequence_number: int + operation: str + summary: Dict[str, Any] + + +@dataclass +class PartitionStats: + partition_key: str + record_count: int + file_count: int + size_bytes: int + + +@dataclass +class DeepScanStats: + partition_stats: List[PartitionStats] + file_record_counts: List[int] + file_sizes: List[int] + active_partition_count: int + + +# ============================================================ +# Extractors +# ============================================================ + +class TableExtractor: + """ + Isolates PyIceberg table metadata traversal. + You WILL likely need to adapt some methods to your exact version. + """ + + def get_snapshots(self, table) -> List[SnapshotSummary]: + snapshots = [] + + # Version-flexible access + metadata = getattr(table, "metadata", None) + raw_snapshots = [] + if metadata is not None: + raw_snapshots = getattr(metadata, "snapshots", None) or [] + + for s in raw_snapshots: + snapshot_id = safe_int(getattr(s, "snapshot_id", 0)) + timestamp_ms = safe_int(getattr(s, "timestamp_ms", 0)) + sequence_number = safe_int(getattr(s, "sequence_number", 0)) + summary = getattr(s, "summary", None) or {} + operation = normalize_operation(summary.get("operation") or getattr(s, "operation", None)) + snapshots.append( + SnapshotSummary( + snapshot_id=snapshot_id, + timestamp_ms=timestamp_ms, + sequence_number=sequence_number, + operation=operation, + summary=summary, + ) + ) + + snapshots.sort(key=lambda x: (x.sequence_number, x.timestamp_ms, x.snapshot_id)) + return snapshots + + def get_current_snapshot(self, table) -> Optional[SnapshotSummary]: + snaps = self.get_snapshots(table) + return snaps[-1] if snaps else None + + def get_current_totals_from_summary(self, current: SnapshotSummary) -> Dict[str, int]: + s = current.summary or {} + return { + "total_data_files": safe_int(s.get("total-data-files")), + "total_delete_files": safe_int(s.get("total-delete-files")), + "total_records": safe_int(s.get("total-records")), + "total_file_size_bytes": safe_int(s.get("total-files-size")), + "total_position_delete_records": safe_int(s.get("total-position-deletes")), + "total_equality_delete_records": safe_int(s.get("total-equality-deletes")), + } + + def get_snapshot_delta_from_summary(self, snap: SnapshotSummary) -> Dict[str, int]: + s = snap.summary or {} + return { + "added_records": safe_int(s.get("added-records")), + "added_data_files": safe_int(s.get("added-data-files")), + "added_delete_files": safe_int(s.get("added-delete-files")), + "added_files_size_bytes": safe_int(s.get("added-files-size")), + "removed_records": safe_int(s.get("removed-records")), + "removed_data_files": safe_int(s.get("removed-data-files")), + "removed_delete_files": safe_int(s.get("removed-delete-files")), + "removed_files_size_bytes": safe_int(s.get("removed-files-size")), + "changed_partition_count": safe_int(s.get("changed-partition-count")), + } + + def deep_scan(self, table, max_partitions_per_table: int) -> DeepScanStats: + """ + IMPORTANT: This is the one part you will almost certainly tweak. + + Ideal approach: + - traverse current snapshot manifests / entries + - include only ACTIVE data files + - aggregate by partition + + Skeleton below tries a few common patterns and degrades gracefully. + """ + partition_map: Dict[str, PartitionStats] = {} + file_record_counts: List[int] = [] + file_sizes: List[int] = [] + + # The actual PyIceberg APIs vary. Replace this section with your exact version's scan. + # If you already know how to iterate current data files, plug it in here. + current_files_iter = self._iter_current_data_files_best_effort(table) + + for data_file in current_files_iter: + record_count = safe_int(getattr(data_file, "record_count", 0)) + file_size = safe_int(getattr(data_file, "file_size_in_bytes", 0)) + + part = getattr(data_file, "partition", None) + partition_key = self._partition_to_string(part) + + file_record_counts.append(record_count) + file_sizes.append(file_size) + + if partition_key not in partition_map: + if len(partition_map) >= max_partitions_per_table: + partition_key = "__overflow__" + if partition_key not in partition_map: + partition_map[partition_key] = PartitionStats( + partition_key=partition_key, + record_count=0, + file_count=0, + size_bytes=0, + ) + else: + partition_map[partition_key] = PartitionStats( + partition_key=partition_key, + record_count=0, + file_count=0, + size_bytes=0, + ) + + ps = partition_map[partition_key] + ps.record_count += record_count + ps.file_count += 1 + ps.size_bytes += file_size + + return DeepScanStats( + partition_stats=list(partition_map.values()), + file_record_counts=file_record_counts, + file_sizes=file_sizes, + active_partition_count=len(partition_map), + ) + + def _iter_current_data_files_best_effort(self, table) -> Iterable[Any]: + """ + Replace with exact PyIceberg method for your version. + + Possible strategies by version: + - table.scan().plan_files() -> file scan tasks + - metadata.current_snapshot + manifests traversal + - table.inspect.files() (if available in your version) + """ + # Strategy 1: scan().plan_files() + try: + scan = table.scan() + tasks = scan.plan_files() + for task in tasks: + # common patterns: task.file or task.data_file + df = getattr(task, "file", None) or getattr(task, "data_file", None) + if df is not None: + yield df + return + except Exception: + pass + + # Strategy 2: no-op fallback + logger.warning("Deep scan fallback: unable to iterate current data files for table; returning empty deep stats") + return [] + + def _partition_to_string(self, part: Any) -> str: + if part is None: + return "__unpartitioned__" + try: + if hasattr(part, "__dict__"): + items = sorted(part.__dict__.items()) + return ",".join(f"{k}={v}" for k, v in items) + if isinstance(part, dict): + items = sorted(part.items()) + return ",".join(f"{k}={v}" for k, v in items) + return str(part) + except Exception: + return "__unknown_partition__" + + +# ============================================================ +# Exporter core +# ============================================================ + +class IcebergExporter: + def __init__(self, cfg: ExporterConfig): + self.cfg = cfg + self.metrics = Metrics() + self.snapshot_state = SnapshotState() + self.extractor = TableExtractor() + self.stop_event = threading.Event() + + self.adapters = [IcebergAdapter(c) for c in cfg.catalogs] + + # cache for expensive deep scans + self._deep_cache: Dict[Tuple[str, str, str], Tuple[float, DeepScanStats]] = {} + self._deep_cache_lock = threading.Lock() + + def run_forever(self) -> None: + while not self.stop_event.is_set(): + started = time.time() + success = 1 + try: + self.collect_once() + except Exception: + logger.exception("Exporter loop failed") + success = 0 + finally: + self.metrics.exporter_last_run_success.set(success) + self.metrics.exporter_last_run_timestamp.set(now_ts()) + self.metrics.exporter_loop_duration.set(time.time() - started) + + self.stop_event.wait(self.cfg.scrape_interval_seconds) + + def stop(self) -> None: + self.stop_event.set() + + def collect_once(self) -> None: + for adapter in self.adapters: + tables = self._discover_tables(adapter) + for table_ref in tables: + try: + self._collect_table(adapter, table_ref) + except Exception: + logger.exception("Failed to collect table %s.%s", adapter.name, table_ref.fqdn) + self.metrics.exporter_table_scrape_errors.labels( + adapter.name, table_ref.namespace, table_ref.table, table_ref.fqdn, "collect" + ).inc() + + def _discover_tables(self, adapter: IcebergAdapter) -> List[TableRef]: + d = self.cfg.discovery + if d.mode == "explicit": + return d.tables + if d.mode == "namespace_scan": + out: List[TableRef] = [] + for ns in d.namespaces: + try: + out.extend(adapter.list_tables(ns)) + except Exception: + logger.exception("Failed listing namespace %s in catalog %s", ns, adapter.name) + return out + raise ValueError(f"Unsupported discovery mode: {d.mode}") + + def _collect_table(self, adapter: IcebergAdapter, table_ref: TableRef) -> None: + table = adapter.load_table(table_ref) + lbl = labels(adapter.name, table_ref.namespace, table_ref.table) + + snapshots = self.extractor.get_snapshots(table) + current = snapshots[-1] if snapshots else None + if current is None: + logger.warning("No snapshots for %s.%s", adapter.name, table_ref.fqdn) + return + + # 1) current snapshot gauges + totals = self.extractor.get_current_totals_from_summary(current) + self.metrics.snapshot_total_data_files.labels(*lbl).set(totals["total_data_files"]) + self.metrics.snapshot_total_delete_files.labels(*lbl).set(totals["total_delete_files"]) + self.metrics.snapshot_total_records.labels(*lbl).set(totals["total_records"]) + self.metrics.snapshot_total_file_size_bytes.labels(*lbl).set(totals["total_file_size_bytes"]) + self.metrics.snapshot_total_position_delete_records.labels(*lbl).set(totals["total_position_delete_records"]) + self.metrics.snapshot_total_equality_delete_records.labels(*lbl).set(totals["total_equality_delete_records"]) + self.metrics.table_current_snapshot_id.labels(*lbl).set(current.snapshot_id) + self.metrics.table_current_sequence_number.labels(*lbl).set(current.sequence_number) + self.metrics.table_snapshot_age_seconds.labels(*lbl).set(max(0, now_ts() - (current.timestamp_ms // 1000))) + self.metrics.table_snapshot_count.labels(*lbl).set(len(snapshots)) + + # 2) increment counters only for unseen snapshots + for snap in snapshots: + if not self.snapshot_state.mark_if_new(adapter.name, table_ref.namespace, table_ref.table, snap.snapshot_id): + continue + + delta = self.extractor.get_snapshot_delta_from_summary(snap) + self.metrics.snapshot_added_records_total.labels(*lbl).inc(delta["added_records"]) + self.metrics.snapshot_added_data_files_total.labels(*lbl).inc(delta["added_data_files"]) + self.metrics.snapshot_added_delete_files_total.labels(*lbl).inc(delta["added_delete_files"]) + self.metrics.snapshot_added_files_size_bytes_total.labels(*lbl).inc(delta["added_files_size_bytes"]) + self.metrics.snapshot_removed_records_total.labels(*lbl).inc(delta["removed_records"]) + self.metrics.snapshot_removed_data_files_total.labels(*lbl).inc(delta["removed_data_files"]) + self.metrics.snapshot_removed_delete_files_total.labels(*lbl).inc(delta["removed_delete_files"]) + self.metrics.snapshot_removed_files_size_bytes_total.labels(*lbl).inc(delta["removed_files_size_bytes"]) + self.metrics.snapshot_changed_partition_count.labels(*lbl).set(delta["changed_partition_count"]) + self.metrics.snapshot_commits_total.labels(*lbl, snap.operation).inc() + + # rewrite inference (summary-only heuristic) + self._maybe_record_rewrite(lbl, snap, delta, totals) + + # 3) deep scan (cached) + deep = self._get_or_refresh_deep_scan(adapter, table_ref, table) + self._publish_deep_metrics(lbl, deep, totals) + + def _get_or_refresh_deep_scan(self, adapter: IcebergAdapter, table_ref: TableRef, table) -> DeepScanStats: + key = (adapter.name, table_ref.namespace, table_ref.table) + now = time.time() + + with self._deep_cache_lock: + cached = self._deep_cache.get(key) + if cached and (now - cached[0]) < self.cfg.deep_scan_interval_seconds: + return cached[1] + + deep = self.extractor.deep_scan(table, self.cfg.max_partitions_per_table) + + with self._deep_cache_lock: + self._deep_cache[key] = (now, deep) + + return deep + + def _publish_deep_metrics(self, lbl: Tuple[str, str, str, str], deep: DeepScanStats, totals: Dict[str, int]) -> None: + # partition count + self.metrics.table_partition_count.labels(*lbl).set(deep.active_partition_count) + + # per-partition gauges + for p in deep.partition_stats: + pl = (*lbl, p.partition_key) + self.metrics.partition_record_count.labels(*pl).set(p.record_count) + self.metrics.partition_file_count.labels(*pl).set(p.file_count) + self.metrics.partition_size_bytes.labels(*pl).set(p.size_bytes) + + # partition aggregates + part_records = [p.record_count for p in deep.partition_stats] + part_files = [p.file_count for p in deep.partition_stats] + part_sizes = [p.size_bytes for p in deep.partition_stats] + + self.metrics.partitions_min_record_count.labels(*lbl).set(min_or_zero(part_records)) + self.metrics.partitions_max_record_count.labels(*lbl).set(max_or_zero(part_records)) + self.metrics.partitions_avg_record_count.labels(*lbl).set(avg(part_records)) + self.metrics.partitions_min_file_count.labels(*lbl).set(min_or_zero(part_files)) + self.metrics.partitions_max_file_count.labels(*lbl).set(max_or_zero(part_files)) + self.metrics.partitions_avg_file_count.labels(*lbl).set(avg(part_files)) + self.metrics.partitions_min_size_bytes.labels(*lbl).set(min_or_zero(part_sizes)) + self.metrics.partitions_max_size_bytes.labels(*lbl).set(max_or_zero(part_sizes)) + self.metrics.partitions_avg_size_bytes.labels(*lbl).set(avg(part_sizes)) + + # file aggregates + fr = deep.file_record_counts + fs = deep.file_sizes + self.metrics.files_min_record_count.labels(*lbl).set(min_or_zero(fr)) + self.metrics.files_max_record_count.labels(*lbl).set(max_or_zero(fr)) + self.metrics.files_avg_record_count.labels(*lbl).set(avg(fr)) + self.metrics.files_min_size_bytes.labels(*lbl).set(min_or_zero(fs)) + self.metrics.files_max_size_bytes.labels(*lbl).set(max_or_zero(fs)) + self.metrics.files_avg_size_bytes.labels(*lbl).set(avg(fs)) + + small = self.cfg.thresholds.small_file_bytes + target = self.cfg.thresholds.target_file_bytes + small_count = sum(1 for x in fs if x < small) + large_count = sum(1 for x in fs if x > target) + self.metrics.files_small_file_count.labels(*lbl, str(small)).set(small_count) + self.metrics.files_large_file_count.labels(*lbl, str(target)).set(large_count) + + total_records = totals.get("total_records", 0) + total_data_files = max(totals.get("total_data_files", 0), 1) + total_delete_files = totals.get("total_delete_files", 0) + + self.metrics.files_avg_rows_per_file.labels(*lbl).set(total_records / total_data_files) + self.metrics.files_delete_file_ratio.labels(*lbl).set(total_delete_files / total_data_files) + + # compaction candidate + avg_file_size = avg(fs) + avg_files_per_partition = avg(part_files) + is_candidate = ( + (avg_file_size < self.cfg.thresholds.target_file_bytes if fs else False) + or (small_count > self.cfg.thresholds.small_file_count) + or ((total_delete_files / total_data_files) > self.cfg.thresholds.delete_file_ratio) + or (avg_files_per_partition > self.cfg.thresholds.avg_files_per_partition if part_files else False) + ) + self.metrics.table_compaction_candidate.labels(*lbl).set(1 if is_candidate else 0) + + def _maybe_record_rewrite( + self, + lbl: Tuple[str, str, str, str], + snap: SnapshotSummary, + delta: Dict[str, int], + current_totals: Dict[str, int], + ) -> None: + operation = snap.operation + added_records = delta["added_records"] + removed_records = delta["removed_records"] + added_data_files = delta["added_data_files"] + removed_data_files = delta["removed_data_files"] + removed_bytes = delta["removed_files_size_bytes"] + + total_records = max(current_totals.get("total_records", 0), 1) + total_file_bytes = max(current_totals.get("total_file_size_bytes", 0), 1) + total_data_files = max(current_totals.get("total_data_files", 0), 1) + current_avg_file_size = total_file_bytes / total_data_files + + is_rewrite = ( + operation == "rewrite" + or ( + removed_data_files > 0 + and added_data_files > 0 + and abs(added_records - removed_records) <= max(1000, int(total_records * 0.01)) + ) + ) + + if not is_rewrite: + return + + self.metrics.rewrite_data_files_total.labels(*lbl).inc(removed_data_files) + self.metrics.rewrite_bytes_total.labels(*lbl).inc(removed_bytes) + self.metrics.rewrite_snapshots_total.labels(*lbl).inc() + self.metrics.rewrite_last_timestamp_seconds.labels(*lbl).set(snap.timestamp_ms // 1000) + self.metrics.rewrite_post_avg_file_size_bytes.labels(*lbl).set(current_avg_file_size) + + # heuristic estimate for removed small files: if rewrite removed files and current avg > threshold, count all removed as "small-ish" removed + if current_avg_file_size >= self.cfg.thresholds.target_file_bytes: + self.metrics.rewrite_removed_small_files_total.labels(*lbl).inc(removed_data_files) + + +# ============================================================ +# Config loading +# ============================================================ + +def load_config(path: str) -> ExporterConfig: + with open(path, "r", encoding="utf-8") as f: + raw = yaml.safe_load(f) + + catalogs = [] + for c in raw.get("catalogs", []): + extra = {k: v for k, v in c.items() if k not in {"name", "uri", "warehouse", "token", "credential"}} + catalogs.append( + CatalogConfig( + name=c["name"], + uri=c["uri"], + warehouse=c.get("warehouse"), + token=c.get("token"), + credential=c.get("credential"), + extra_properties=extra, + ) + ) + + discovery_raw = raw.get("discovery", {}) + tables = [TableRef(namespace=t["namespace"], table=t["table"]) for t in discovery_raw.get("tables", [])] + discovery = DiscoveryConfig( + mode=discovery_raw.get("mode", "explicit"), + namespaces=discovery_raw.get("namespaces", []), + tables=tables, + ) + + thresholds_raw = raw.get("thresholds", {}) + thresholds = ThresholdsConfig( + small_file_bytes=thresholds_raw.get("small_file_bytes", 32 * 1024 * 1024), + target_file_bytes=thresholds_raw.get("target_file_bytes", 128 * 1024 * 1024), + delete_file_ratio=thresholds_raw.get("delete_file_ratio", 0.2), + avg_files_per_partition=thresholds_raw.get("avg_files_per_partition", 20), + small_file_count=thresholds_raw.get("small_file_count", 100), + ) + + return ExporterConfig( + host=raw.get("host", "0.0.0.0"), + port=raw.get("port", 9109), + scrape_interval_seconds=raw.get("scrape_interval_seconds", 30), + deep_scan_interval_seconds=raw.get("deep_scan_interval_seconds", 600), + request_timeout_seconds=raw.get("request_timeout_seconds", 30), + max_partitions_per_table=raw.get("max_partitions_per_table", 5000), + catalogs=catalogs, + discovery=discovery, + thresholds=thresholds, + ) + + +# ============================================================ +# Main +# ============================================================ + +def main() -> int: + parser = argparse.ArgumentParser(description="Iceberg REST Catalog Prometheus Exporter") + parser.add_argument("--config", required=True, help="Path to config YAML") + args = parser.parse_args() + + cfg = load_config(args.config) + exporter = IcebergExporter(cfg) + + server, thread = start_http_server(cfg.port, addr=cfg.host) + logger.info("Serving metrics on http://%s:%s/metrics", cfg.host, cfg.port) + + def handle_signal(signum, frame): + logger.info("Received signal %s, shutting down", signum) + exporter.stop() + try: + server.shutdown() + server.server_close() + thread.join(timeout=5) + except Exception: + logger.exception("Error shutting down metrics server") + + signal.signal(signal.SIGINT, handle_signal) + signal.signal(signal.SIGTERM, handle_signal) + + exporter.run_forever() + return 0 + + +if __name__ == "__main__": + sys.exit(main()) +``` + +--- + +## Part 2 — Example config + +### File: `config.yaml` + +```yaml +host: 0.0.0.0 +port: 9109 +scrape_interval_seconds: 30 +deep_scan_interval_seconds: 600 +request_timeout_seconds: 30 +max_partitions_per_table: 5000 + +catalogs: + - name: prod + uri: https://your-iceberg-rest-catalog.example.com + warehouse: s3://warehouse + token: ${ICEBERG_TOKEN} + +# Choose one mode +# 1) explicit tables (recommended to start) +discovery: + mode: explicit + tables: + - namespace: demo + table: events + - namespace: sales + table: orders + +# 2) namespace scan +# discovery: +# mode: namespace_scan +# namespaces: +# - demo +# - sales + +thresholds: + small_file_bytes: 33554432 # 32MB + target_file_bytes: 134217728 # 128MB + delete_file_ratio: 0.2 + avg_files_per_partition: 20 + small_file_count: 100 +``` + +--- + +## Part 3 — Notes for your environment (important) + +Given your scale (large Iceberg + real infra): + +### What you should adjust first +1. Replace `TableExtractor._iter_current_data_files_best_effort()` with the **exact PyIceberg version-specific path** you use. +2. If manifest walking via `table.scan().plan_files()` is too expensive, implement a **direct manifest summary path**. +3. Keep `deep_scan_interval_seconds` at **300–1800s**. Do NOT do full deep scans every 30s. +4. Start with **explicit tables** first; then move to namespace discovery. + +### Strong recommendation +If you want a truly production-grade version for your environment, the next step should be: +- split this into modules +- persist seen snapshots in a lightweight local file or PVC-backed state (optional) +- add `/healthz` +- add scrape concurrency limits (ThreadPool) +- add stale label cleanup for removed partitions + +--- + +## Part 4 — Complete Grafana dashboard JSON + +This dashboard assumes: +- Prometheus datasource variable = `${DS_PROMETHEUS}` (Grafana import prompt will map it) +- Variables: + - `catalog` + - `namespace` + - `table` +- It maps the Datadog-style widgets to PromQL using the exporter metrics above. + +### File: `iceberg_rest_dashboard.json` + +```json +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "11.0.0" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "" + }, + { + "type": "panel", + "id": "bargauge", + "name": "Bar gauge", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, + "fieldConfig": {"defaults": {"unit": "short"}, "overrides": []}, + "gridPos": {"h": 4, "w": 6, "x": 0, "y": 0}, + "id": 1, + "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"}, + "targets": [{"expr": "iceberg_snapshot_total_records{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}", "refId": "A"}], + "title": "Current Total Records", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, + "fieldConfig": {"defaults": {"unit": "decbytes"}, "overrides": []}, + "gridPos": {"h": 4, "w": 6, "x": 6, "y": 0}, + "id": 2, + "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"}, + "targets": [{"expr": "iceberg_snapshot_total_file_size_bytes{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}", "refId": "A"}], + "title": "Current Total File Size", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, + "fieldConfig": {"defaults": {"unit": "short"}, "overrides": []}, + "gridPos": {"h": 4, "w": 6, "x": 12, "y": 0}, + "id": 3, + "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"}, + "targets": [{"expr": "iceberg_snapshot_total_data_files{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}", "refId": "A"}], + "title": "Current Data Files", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, + "fieldConfig": {"defaults": {"unit": "short"}, "overrides": []}, + "gridPos": {"h": 4, "w": 6, "x": 18, "y": 0}, + "id": 4, + "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"}, + "targets": [{"expr": "iceberg_snapshot_total_delete_files{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}", "refId": "A"}], + "title": "Current Delete Files", + "type": "stat" + }, + + { + "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, + "fieldConfig": {"defaults": {"unit": "s"}, "overrides": []}, + "gridPos": {"h": 4, "w": 6, "x": 0, "y": 4}, + "id": 5, + "options": {"colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"}, + "targets": [{"expr": "iceberg_table_snapshot_age_seconds{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}", "refId": "A"}], + "title": "Current Snapshot Age", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, + "fieldConfig": {"defaults": {"unit": "short"}, "overrides": []}, + "gridPos": {"h": 4, "w": 6, "x": 6, "y": 4}, + "id": 6, + "options": {"colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"}, + "targets": [{"expr": "iceberg_table_partition_count{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}", "refId": "A"}], + "title": "Active Partitions", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, + "fieldConfig": {"defaults": {"unit": "short"}, "overrides": []}, + "gridPos": {"h": 4, "w": 6, "x": 12, "y": 4}, + "id": 7, + "options": {"colorMode": "background", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"}, + "targets": [{"expr": "iceberg_table_compaction_candidate{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}", "refId": "A"}], + "title": "Compaction Candidate (0/1)", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, + "fieldConfig": {"defaults": {"unit": "s"}, "overrides": []}, + "gridPos": {"h": 4, "w": 6, "x": 18, "y": 4}, + "id": 8, + "options": {"colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"}, + "targets": [{"expr": "time() - iceberg_rewrite_last_timestamp_seconds{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}", "refId": "A"}], + "title": "Time Since Last Rewrite", + "type": "stat" + }, + + { + "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, + "fieldConfig": {"defaults": {"unit": "short"}, "overrides": []}, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 8}, + "id": 9, + "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "single", "sort": "none"}}, + "targets": [ + {"expr": "increase(iceberg_snapshot_added_records_total{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}[$__rate_interval])", "legendFormat": "added", "refId": "A"}, + {"expr": "increase(iceberg_snapshot_removed_records_total{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}[$__rate_interval])", "legendFormat": "removed", "refId": "B"} + ], + "title": "Records Changed Over Time", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, + "fieldConfig": {"defaults": {"unit": "decbytes"}, "overrides": []}, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 8}, + "id": 10, + "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "single", "sort": "none"}}, + "targets": [ + {"expr": "increase(iceberg_snapshot_added_files_size_bytes_total{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}[$__rate_interval])", "legendFormat": "added bytes", "refId": "A"}, + {"expr": "increase(iceberg_snapshot_removed_files_size_bytes_total{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}[$__rate_interval])", "legendFormat": "removed bytes", "refId": "B"} + ], + "title": "Bytes Changed Over Time", + "type": "timeseries" + }, + + { + "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, + "fieldConfig": {"defaults": {"unit": "short"}, "overrides": []}, + "gridPos": {"h": 8, "w": 8, "x": 0, "y": 16}, + "id": 11, + "options": {"displayMode": "gradient", "orientation": "horizontal", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "showUnfilled": true}, + "targets": [{"expr": "sum by (operation) (increase(iceberg_snapshot_commits_total{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}[$__range]))", "refId": "A"}], + "title": "Snapshot Commits by Operation", + "type": "bargauge" + }, + { + "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, + "fieldConfig": {"defaults": {"unit": "decbytes"}, "overrides": []}, + "gridPos": {"h": 8, "w": 8, "x": 8, "y": 16}, + "id": 12, + "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "single", "sort": "none"}}, + "targets": [ + {"expr": "increase(iceberg_rewrite_bytes_total{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}[$__rate_interval])", "legendFormat": "rewrite bytes", "refId": "A"} + ], + "title": "Bytes Rewritten (Datadog: bytes compacted replacement)", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, + "fieldConfig": {"defaults": {"unit": "short"}, "overrides": []}, + "gridPos": {"h": 8, "w": 8, "x": 16, "y": 16}, + "id": 13, + "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "single", "sort": "none"}}, + "targets": [ + {"expr": "increase(iceberg_rewrite_data_files_total{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}[$__rate_interval])", "legendFormat": "rewrite files", "refId": "A"} + ], + "title": "Files Rewritten (Datadog: files compacted replacement)", + "type": "timeseries" + }, + + { + "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, + "fieldConfig": {"defaults": {"unit": "decbytes"}, "overrides": []}, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 24}, + "id": 14, + "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "single", "sort": "none"}}, + "targets": [ + {"expr": "iceberg_files_avg_size_bytes{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}", "legendFormat": "avg", "refId": "A"}, + {"expr": "iceberg_files_min_size_bytes{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}", "legendFormat": "min", "refId": "B"}, + {"expr": "iceberg_files_max_size_bytes{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}", "legendFormat": "max", "refId": "C"} + ], + "title": "File Size Stats", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, + "fieldConfig": {"defaults": {"unit": "short"}, "overrides": []}, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 24}, + "id": 15, + "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "single", "sort": "none"}}, + "targets": [ + {"expr": "iceberg_files_avg_record_count{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}", "legendFormat": "avg", "refId": "A"}, + {"expr": "iceberg_files_min_record_count{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}", "legendFormat": "min", "refId": "B"}, + {"expr": "iceberg_files_max_record_count{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}", "legendFormat": "max", "refId": "C"} + ], + "title": "Records Per File Stats", + "type": "timeseries" + }, + + { + "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, + "fieldConfig": {"defaults": {"unit": "short"}, "overrides": []}, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 32}, + "id": 16, + "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "single", "sort": "none"}}, + "targets": [ + {"expr": "topk(10, iceberg_partition_record_count{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"})", "legendFormat": "{{partition}}", "refId": "A"} + ], + "title": "Top 10 Partitions by Records", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, + "fieldConfig": {"defaults": {"unit": "short"}, "overrides": []}, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 32}, + "id": 17, + "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "single", "sort": "none"}}, + "targets": [ + {"expr": "topk(10, iceberg_partition_file_count{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"})", "legendFormat": "{{partition}}", "refId": "A"} + ], + "title": "Top 10 Partitions by File Count", + "type": "timeseries" + }, + + { + "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, + "fieldConfig": {"defaults": {"unit": "short"}, "overrides": []}, + "gridPos": {"h": 8, "w": 8, "x": 0, "y": 40}, + "id": 18, + "options": {"colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"}, + "targets": [{"expr": "iceberg_files_small_file_count{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\",threshold_bytes=\"33554432\"}", "refId": "A"}], + "title": "Small Files (<32MB)", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, + "fieldConfig": {"defaults": {"unit": "percentunit"}, "overrides": []}, + "gridPos": {"h": 8, "w": 8, "x": 8, "y": 40}, + "id": 19, + "options": {"colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"}, + "targets": [{"expr": "iceberg_files_delete_file_ratio{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}", "refId": "A"}], + "title": "Delete File Ratio", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, + "fieldConfig": {"defaults": {"unit": "short"}, "overrides": []}, + "gridPos": {"h": 8, "w": 8, "x": 16, "y": 40}, + "id": 20, + "options": {"colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"}, + "targets": [{"expr": "increase(iceberg_rewrite_snapshots_total{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}[$__range])", "refId": "A"}], + "title": "Rewrite Events in Range", + "type": "stat" + }, + + { + "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, + "fieldConfig": {"defaults": {}, "overrides": []}, + "gridPos": {"h": 10, "w": 24, "x": 0, "y": 48}, + "id": 21, + "options": {"showHeader": true}, + "targets": [ + { + "expr": "iceberg_partition_size_bytes{catalog=~\"$catalog\",namespace=~\"$namespace\",table=~\"$table\"}", + "format": "table", + "instant": true, + "refId": "A" + } + ], + "title": "Partition Size Table", + "transformations": [ + {"id": "labelsToFields", "options": {"mode": "columns"}}, + {"id": "organize", "options": {"excludeByName": {"Time": true}, "renameByName": {"Value": "size_bytes"}}} + ], + "type": "table" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "style": "dark", + "tags": ["iceberg", "prometheus", "rest-catalog"], + "templating": { + "list": [ + { + "current": {"selected": true, "text": [".*"], "value": [".*"]}, + "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, + "definition": "label_values(iceberg_snapshot_total_records, catalog)", + "hide": 0, + "includeAll": true, + "label": "catalog", + "multi": true, + "name": "catalog", + "options": [], + "query": {"qryType": 1, "query": "label_values(iceberg_snapshot_total_records, catalog)", "refId": "Prometheus-catalog"}, + "refresh": 2, + "regex": "", + "sort": 1, + "type": "query" + }, + { + "current": {"selected": true, "text": [".*"], "value": [".*"]}, + "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, + "definition": "label_values(iceberg_snapshot_total_records{catalog=~\"$catalog\"}, namespace)", + "hide": 0, + "includeAll": true, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [], + "query": {"qryType": 1, "query": "label_values(iceberg_snapshot_total_records{catalog=~\"$catalog\"}, namespace)", "refId": "Prometheus-namespace"}, + "refresh": 2, + "regex": "", + "sort": 1, + "type": "query" + }, + { + "current": {"selected": true, "text": [".*"], "value": [".*"]}, + "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, + "definition": "label_values(iceberg_snapshot_total_records{catalog=~\"$catalog\",namespace=~\"$namespace\"}, table)", + "hide": 0, + "includeAll": true, + "label": "table", + "multi": true, + "name": "table", + "options": [], + "query": {"qryType": 1, "query": "label_values(iceberg_snapshot_total_records{catalog=~\"$catalog\",namespace=~\"$namespace\"}, table)", "refId": "Prometheus-table"}, + "refresh": 2, + "regex": "", + "sort": 1, + "type": "query" + } + ] + }, + "time": {"from": "now-24h", "to": "now"}, + "timepicker": {}, + "timezone": "browser", + "title": "Iceberg REST Catalog Health", + "uid": "iceberg-rest-catalog-health", + "version": 1, + "weekStart": "" +} +``` + +--- + +## Part 5 — Datadog widget → PromQL mapping reference + +### Direct mappings +- **Current total records** + - `iceberg_snapshot_total_records` +- **Current total file size** + - `iceberg_snapshot_total_file_size_bytes` +- **Current total data files** + - `iceberg_snapshot_total_data_files` +- **Current total delete files** + - `iceberg_snapshot_total_delete_files` +- **Snapshot age** + - `iceberg_table_snapshot_age_seconds` +- **Active partitions** + - `iceberg_table_partition_count` + +### Snapshot delta mappings +- **Added records over time** + - `increase(iceberg_snapshot_added_records_total[$__rate_interval])` +- **Removed records over time** + - `increase(iceberg_snapshot_removed_records_total[$__rate_interval])` +- **Added bytes over time** + - `increase(iceberg_snapshot_added_files_size_bytes_total[$__rate_interval])` +- **Removed bytes over time** + - `increase(iceberg_snapshot_removed_files_size_bytes_total[$__rate_interval])` + +### Glue compaction replacements +- **Number of bytes compacted** + - `increase(iceberg_rewrite_bytes_total[$__rate_interval])` +- **Number of files compacted** + - `increase(iceberg_rewrite_data_files_total[$__rate_interval])` +- **Compaction events** + - `increase(iceberg_rewrite_snapshots_total[$__range])` +- **Time since last compaction/rewrite** + - `time() - iceberg_rewrite_last_timestamp_seconds` + +--- + +## Part 6 — Best next step for you + +For your real environment, the next best step is: + +1. I can produce a **multi-file production version**: + - `catalog_client.py` + - `collectors/summary.py` + - `collectors/deep_scan.py` + - `collectors/rewrite.py` + - `main.py` + - `Dockerfile` + - `Helm values.yaml` + - `ServiceMonitor` + +2. Or I can produce a **PyIceberg-version-specific implementation** if you tell me: + - your exact `pyiceberg` version + - your REST catalog implementation/vendor + - whether `table.scan().plan_files()` works in your environment + +That second option is the one I’d recommend for you, because the only non-trivial part here is the exact file/manifest traversal path. + diff --git a/chatgpt/iceberg_rest_catalog_prometheus_exporter_spec.md b/chatgpt/iceberg_rest_catalog_prometheus_exporter_spec.md new file mode 100644 index 0000000..5466507 --- /dev/null +++ b/chatgpt/iceberg_rest_catalog_prometheus_exporter_spec.md @@ -0,0 +1,606 @@ +# Iceberg REST Catalog → Prometheus Exporter + +**Purpose:** +A production-grade Prometheus exporter that derives all metrics *only* from the Iceberg REST catalog (no Trino, no Glue, no logs). The exporter polls the REST catalog and manifests via PyIceberg (or direct REST where necessary), computes snapshot deltas and aggregates, and exposes metrics at `/metrics` in Prometheus text format. + +--- + +## Table of contents +1. Goals & constraints +2. Overview of architecture +3. Configuration +4. Metric catalog (detailed metric-by-metric spec) +5. Data sources & PyIceberg mapping (which object/field supplies each metric) +6. Collection strategy and caching +7. Cardinality & guardrails +8. Heuristics (rewrite/compaction detection, compaction candidate) +9. Prometheus & Grafana integration (PromQL snippets, recording rules, alerts) +10. Implementation plan & Python skeleton (collector classes) +11. Testing & validation +12. Deployment notes (k8s, resources) +13. Operational runbook & troubleshooting checklist + +--- + +## 1. Goals & constraints +- **Single source of truth:** only the Iceberg REST catalog (via PyIceberg or direct REST). No external logs. +- **Scale-aware:** handle very large tables by using a two-mode collection strategy (cheap quick scrape, periodic deep scan). +- **Low cardinality by default:** exporters should avoid per-file labels; per-partition metrics allowed but guarded by settings. +- **Metric semantics suitable for Prometheus/Grafana:** use counters for cumulative changes and gauges for instantaneous values. + +--- + +## 2. Architecture overview + +- **Main components:** + - `HTTP server` exposing `/metrics` and `/healthz`. + - `Prometheus client registry` (prometheus_client) to register metrics. + - `SnapshotSummaryCollector` (fast): reads table snapshot summaries and basic totals. + - `ManifestStatsCollector` (deep, cached): traverses manifests/manifest entries and computes partition/file aggregates. + - `RewriteInferenceCollector` (uses snapshot history + manifest stats cache) to infer rewrite/compaction events. + - `Cache` (in-memory with TTL + optional optional persistent caching backend): stores results of deep scans. + - `Config` for table discovery or whitelist, thresholds, and scrape behaviour. + +- **Scrapes vs polls:** exporter is polled by Prometheus at `/metrics`. Internally the exporter will poll the REST catalog on its own schedule (fast vs deep) and serve the last cached values to Prometheus scrapes. + +--- + +## 3. Configuration (YAML / env) + +**Config file (example `config.yaml`):** + +```yaml +# general +listen_address: 0.0.0.0 +listen_port: 9104 +metrics_path: /metrics +health_path: /healthz +# Catalog connection +catalog: + type: rest + url: https://iceberg-catalog.mycompany.internal + auth: + type: token + token: ${ICEBERG_TOKEN} +# tables: either explicit list or discovery +tables: + explicit: + - catalog: prod + namespace: sales + table: events + discover: + enabled: false + namespace_whitelist: [sales, marketing] +# collection schedule +collection: + fast_poll_interval_seconds: 60 # cheap snapshot summaries + deep_poll_interval_seconds: 900 # deep manifest scans + deep_poll_jitter_seconds: 30 +# partitions +partition_export: + enabled: true + max_partitions_per_table: 500 # cardinality guard + include_partition_label: true +# small file thresholds (bytes) +file_thresholds: + small_file_bytes: 33554432 # 32MB + compaction_target_bytes: 134217728 # 128MB +# heuristics +heuristics: + rewrite_detection: + enabled: true + min_removed_files: 1 + net_records_delta_ratio: 0.01 + min_post_avg_increase_ratio: 1.1 +# exporter +exporter: + enable_table_discovery: false + max_tables_per_scrape: 200 +# caching +cache: + ttl_seconds: 1200 # keep deep scan results for 20 minutes + persist_to_disk: false + +# operational +log_level: INFO + +``` + +**Env variables** usually used: `ICEBERG_TOKEN`, `PYICEBERG_CONFIG` etc. + +--- + +## 4. Metric catalog (detailed) + +> For each metric: `name | type | labels | help | source (PyIceberg object) | compute`. + +### A. Snapshot / table state (gauges) + +1. `iceberg_snapshot_total_data_files` | gauge + - labels: `{catalog, namespace, table}` + - help: Total active data files in latest snapshot. + - source: `Table.current_snapshot()` → summary or computed from manifest entries where `status=added` and data file type. + - compute: read snapshot summary if available: `summary.get('total-data-files')` or sum active manifests' data file counts. + +2. `iceberg_snapshot_total_delete_files` | gauge + - labels: `{catalog, namespace, table}` + - help: Total active delete files in latest snapshot. + - source: snapshot summary or manifests (manifest entries with `is_delete` flag). + +3. `iceberg_snapshot_total_records` | gauge + - labels: `{catalog, namespace, table}` + - help: Total record count in latest snapshot (derived from manifest-level `record_count` aggregated). + - source: manifests or snapshot summary `total-records`. + +4. `iceberg_snapshot_total_file_size_bytes` | gauge + - labels: `{catalog, namespace, table}` + - help: Total bytes in active data files. + - source: sum of `file_size_in_bytes` from manifest entries (only active files). + +5. `iceberg_table_current_snapshot_id` | gauge + - labels: `{catalog, namespace, table}` + - help: Current snapshot id (numeric). Useful to detect advancement. + - source: `Table.current_snapshot().snapshot_id` + +6. `iceberg_table_snapshot_age_seconds` | gauge + - labels: `{catalog, namespace, table}` + - help: Age in seconds since current snapshot timestamp. + - compute: `now - Table.current_snapshot().timestampMillis/1000`. + +7. `iceberg_table_snapshot_count` | gauge + - labels: `{catalog, namespace, table}` + - help: Number of snapshots retained in metadata history. + - source: `Table.snapshots()` length / available API for snapshot history. + +8. `iceberg_table_partition_count` | gauge + - labels: `{catalog, namespace, table}` + - help: Number of partitions (distinct partition values) in current snapshot. + - compute: derived from manifest entries grouped by partition. **Guarded** by `max_partitions_per_table`. + + +### B. Snapshot deltas / counters + +1. `iceberg_snapshot_added_records_total` | counter + - labels: `{catalog, namespace, table}` + - help: Cumulative records added (derived from snapshot diffs observed by exporter). + - compute: when exporter sees a newer snapshot, compare `added-records` from summary or compute `added_records = sum of added manifest entries record_count`. Increment this counter by `added_records`. + +2. `iceberg_snapshot_added_data_files_total` | counter + - labels: `{catalog, namespace, table}` + - help: Cumulative data files added by observed snapshots. + - compute: from snapshot summary or manifest entries diff. + +3. `iceberg_snapshot_added_delete_files_total` | counter + - labels: `{catalog, namespace, table}` + - help: Cumulative delete files added. + +4. `iceberg_snapshot_added_files_size_bytes_total` | counter + - labels: `{catalog, namespace, table}` + - help: Cumulative bytes added. + +5. `iceberg_snapshot_removed_records_total` | counter +6. `iceberg_snapshot_removed_data_files_total` | counter +7. `iceberg_snapshot_removed_delete_files_total` | counter +8. `iceberg_snapshot_removed_files_size_bytes_total` | counter + +- Notes: counters are only incremented after exporter detects a newer snapshot than last seen, so you must persist (in-memory or on-disk) the last seen snapshot id per table to compute diffs. + +9. `iceberg_snapshot_changed_partition_count` | gauge + - labels: `{catalog, namespace, table}` + - help: number of partitions touched in latest snapshot. + - compute: compare partition keys present in added/removed manifest entries in the snapshot summary/diff. + +10. `iceberg_snapshot_commits_total{operation}` | counter + - labels: `{catalog, namespace, table, operation}` + - help: counts commit types observed (append/overwrite/rewrite/etc.). + - compute: operation = snapshot summary `operation` if present; otherwise heuristics on manifest diffs. + + +### C. Partition metrics (gauges) + +- Preferred to export **per-partition** metrics for top-k and pie charts but **guarded** by `max_partitions_per_table`. + +1. `iceberg_partition_record_count{partition}` | gauge + - labels: `{catalog, namespace, table, partition}` + - help: record count in that partition. + - source: aggregate manifest entry `record_count` for partition. + +2. `iceberg_partition_file_count{partition}` | gauge + - labels: `{catalog, namespace, table, partition}` + - help: number of data files in that partition. + +3. `iceberg_partition_size_bytes{partition}` | gauge + - help: aggregated size in bytes. + +4. Pre-aggregates (table-level): + - `iceberg_partitions_min_record_count`, `_max_`, `_avg_` + - `iceberg_partitions_min_file_count`, `_max_`, `_avg_` + - `iceberg_partitions_min_size_bytes`, `_max_`, `_avg_` + +- compute pre-aggregates from per-partition stats but allow the exporter to compute directly during deep scan to reduce PromQL needs. + + +### D. Files aggregates (gauges) + +1. `iceberg_files_min_record_count` | gauge +2. `iceberg_files_max_record_count` | gauge +3. `iceberg_files_avg_record_count` | gauge +4. `iceberg_files_min_size_bytes` | gauge +5. `iceberg_files_max_size_bytes` | gauge +6. `iceberg_files_avg_size_bytes` | gauge +7. `iceberg_files_small_file_count{threshold_bytes}` | gauge + - count of files smaller than `threshold_bytes` (export threshold as label or provide fixed thresholds) +8. `iceberg_files_large_file_count{threshold_bytes}` | gauge +9. `iceberg_files_delete_file_ratio` | gauge + - `delete_file_count / max(data_file_count,1)` +10. `iceberg_files_avg_rows_per_file` | gauge + +Notes: keep file-related metrics aggregated only. Avoid per-file series. + + +### E. Rewrite / compaction inferred counters & health (derived) + +1. `iceberg_rewrite_data_files_total` | counter + - cumulative number of data files that were removed/replaced by rewrite-like snapshots observed by exporter. + +2. `iceberg_rewrite_bytes_total` | counter + - cumulative bytes of removed data files in rewrite-like snapshots. + +3. `iceberg_rewrite_snapshots_total` | counter + - count of rewrite-like snapshots observed. + +4. `iceberg_rewrite_last_timestamp_seconds` | gauge + - timestamp of last rewrite-like snapshot. + +5. `iceberg_rewrite_post_avg_file_size_bytes` | gauge + - average file size after latest rewrite-like snapshot. + +6. `iceberg_table_compaction_candidate` | gauge (0|1) + - exporter sets to 1 if heuristic thinks compaction is recommended. + +7. `iceberg_rewrite_removed_small_files_total` | counter + - count of small files removed during rewrite-like snapshots. + + +### F. Exporter internals & health + +1. `iceberg_exporter_up` | gauge (1/0) + - 1 if exporter can reach catalog and last fast poll succeeded. + +2. `iceberg_exporter_last_scrape_timestamp_seconds` | gauge +3. `iceberg_exporter_scrape_duration_seconds` | histogram or gauge +4. `iceberg_exporter_deep_scan_last_run_timestamp_seconds` | gauge +5. `iceberg_exporter_deep_scan_duration_seconds` | histogram/gauge + +--- + +## 5. Data sources & PyIceberg mapping (practical) + +> The following map describes which PyIceberg object or REST payload to use for the metric. + +- **Table**: `Table.load(wrapped_catalog, identifier)` → table metadata and `current_snapshot()`. +- **Snapshot**: `table.current_snapshot()` or `table.snapshots()` → snapshot id, timestamp, summary fields. +- **Snapshot summary**: `snapshot.summary` (contains keys like `added-data-files`, `removed-data-files`, `added-records`, `removed-records`, `operation` depending on writer and version). +- **ManifestFile**: `snapshot.manifests` (list of manifest files) – each ManifestFile contains counts and metrics. +- **ManifestEntry**: entries inside manifests: fields `status` (added, existing, deleted), `dataFile.fileSizeInBytes`, `dataFile.recordCount`, `partition` fields etc. + +**Implementation note:** Some iceberg catalogs may not include every summary key in older writer versions. Use manifests/manifest entries as fallback to compute precise values. + + +--- + +## 6. Collection strategy & caching + +**Two-mode collection** (recommended): + +- **Fast poll (every `fast_poll_interval_seconds`)** + - tasks: fetch table current snapshot, snapshot id, snapshot timestamp, snapshot summary (if present), small number of manifest-level totals (if summary incomplete), minimal counters update (snapshot_commits_total), update `iceberg_exporter_up`. + - cost: cheap (only metadata) and safe to run frequently. + - exposed on every scrape via cache. + +- **Deep scan (every `deep_poll_interval_seconds`)** + - tasks: traverse manifests and manifest entries for each table (or for top-N tables), compute partition-level aggregates, per-file aggregates (only aggregated stats), detect rewrite-like snapshots using snapshot history, compute compaction candidate heuristics. + - cost: expensive (I/O heavy). Must be rate-limited and cached. + - algorithm: incremental manifest processing — read only manifests referenced by new snapshots since last deep-scan to avoid full table scan every time. + +**Cache design:** +- in-memory dictionary keyed by `catalog:namespace:table` with timestamped content and TTL = `cache.ttl_seconds`. +- optional persistence to disk (sqlite/leveldb) to survive restarts so counters can be preserved. +- deep scan results should persist across restarts if possible (to avoid recomputing heavy aggregates and to properly compute counters). + +**Snapshot-diff persistence:** +- exporter needs to remember last-seen snapshot id per table to compute added/removed counters reliably. Persist to disk atomically, or write to a small sqlite file. + +**Concurrency & rate limiting:** +- deep scan tasks should be executed with a thread pool of size configurable (e.g., 4) and a per-table concurrency guard. +- add jitter to deep scan schedule to avoid stampedes across many exporter instances. + +**Scrape handling:** +- when Prometheus scrapes `/metrics`, exporter returns current registry values from cache immediately. Scrapes should not block until deep scan completes. + +--- + +## 7. Cardinality & guardrails + +**Avoid cardinality explosion:** +- default: do not export per-file series. +- partition label: guarded by `max_partitions_per_table` (default 500). If a table has more partitions, either: + - export only top-N partitions by `file_count`/`record_count`, or + - disable partition-level metrics for that table and only expose pre-aggregates. + +**Labels to always include:** `catalog`, `namespace`, `table`. +**Labels to avoid unless explicitly enabled:** `partition` (enable with guard), `spec_id` (only if necessary), `file_id` (never by default). + +**Threshold parameters:** small file thresholds should be configurable. Export small_file_count as aggregated counts grouped by threshold. + +**Max tables per scrape:** avoid scanning thousands of tables per deep scan — have `max_tables_per_scrape` config and prioritization by `last_activity` or `table_size` (if known). + +--- + +## 8. Heuristics + +### A. Rewrite / compaction detection heuristic + +**Primary rule:** +- if snapshot summary `operation` == `rewrite` or `operation` explicitly indicates `optimize`, treat as rewrite-like. + +**Fallback heuristic:** +- `removed_data_files > 0` AND `added_data_files > 0` +- AND `abs(added_records - removed_records) / max(pre_snapshot_total_records,1) < net_records_delta_ratio` (default `0.01`) +- AND `post_avg_file_size >= pre_avg_file_size * min_post_avg_increase_ratio` (default `1.1`) + +When conditions are satisfied, update these counters: +- `iceberg_rewrite_data_files_total += removed_data_files` +- `iceberg_rewrite_bytes_total += removed_data_files_size_bytes` +- `iceberg_rewrite_snapshots_total += 1` +- `iceberg_rewrite_last_timestamp_seconds = snapshot_ts` +- `iceberg_rewrite_removed_small_files_total += count(files_removed where size < small_file_threshold)` + +### B. Compaction candidate heuristic +Set `iceberg_table_compaction_candidate = 1` if any: +- `iceberg_files_avg_size_bytes < compaction_target_bytes` +- OR `iceberg_files_small_file_count{threshold=small_file_bytes} > small_file_count_threshold` (default 100) +- OR `iceberg_files_delete_file_ratio > delete_file_ratio_threshold` (default 0.2) +- OR `iceberg_partitions_avg_file_count > avg_files_per_partition_threshold` (default 20) + +Tune defaults per your environment. + +--- + +## 9. Prometheus integration, recording rules & alerts + +**Prometheus scrape config example:** +```yaml +scrape_configs: + - job_name: icebergest + scrape_interval: 60s + metrics_path: /metrics + static_configs: + - targets: ['iceberg-exporter.prod.svc.cluster.local:9104'] +``` + +**Example Recording Rules (prometheus rules file):** +```yaml +groups: +- name: iceberg_recordings + rules: + - record: iceberg:partition_file_count:avg5m + expr: avg_over_time(iceberg_partition_file_count[5m]) + - record: iceberg:files_small_count:sum5m + expr: sum(iceberg_files_small_file_count) by (namespace,table) + - record: iceberg:rewrite_bytes:rate1h + expr: increase(iceberg_rewrite_bytes_total[1h]) +``` + +**Example Alerts:** +```yaml +groups: +- name: iceberg_alerts + rules: + - alert: IcebergExporterDown + expr: iceberg_exporter_up == 0 + for: 2m + labels: {severity: critical} + annotations: {summary: "Iceberg exporter unreachable"} + + - alert: IcebergTableNeedsCompaction + expr: iceberg_table_compaction_candidate == 1 + for: 30m + labels: {severity: warning} + annotations: + summary: "{{ $labels.namespace }}.{{ $labels.table }} likely needs compaction" + + - alert: IcebergSnapshotStalled + expr: time() - iceberg_table_snapshot_age_seconds > 86400 + for: 1h + labels: {severity: warning} + annotations: {summary: "Snapshot old > 24h"} +``` + +**Grafana mapping tips:** +- For `snapshot.added_*` use `increase(iceberg_snapshot_added_records_total[5m])`. +- For top partitions use `topk(10, iceberg_partition_file_count{namespace="DBNAME",table="TABLENAME"})`. +- For pie charts use `sum by (partition) (iceberg_partition_file_count{...})`. + +--- + +## 10. Implementation plan & Python skeleton + +**Dependencies:** +- `pyiceberg` (or direct REST HTTP client if you prefer raw REST). +- `prometheus_client` (official Python client) +- HTTP server (built-in from prometheus_client or `aiohttp` if async recommended) +- optional: `sqlite` for persistence + +**High-level classes:** +- `ExporterApp` (main) – config, scheduler, prometheus registry +- `SnapshotSummaryCollector` – cheap collector, registered with prometheus_client +- `ManifestStatsCollector` – deep collector, runs async/pool, provides aggregated results +- `RewriteInferenceCollector` – reads snapshot history, updates rewrite counters +- `CacheManager` – in-memory + optional persistent store +- `TableScanner` – wraps PyIceberg calls for a single table +- `PersistedState` – small sqlite for last_seen_snapshot per table + +**Example layout:** +``` +exporter/ + __init__.py + main.py + config.py + collectors/ + __init__.py + snapshot_summary.py + manifest_stats.py + rewrite_inference.py + storage/ + cache.py + persisted_state.py + http/ + server.py + utils/ + pyiceberg_wrapper.py + metrics_helper.py + deploy/ + k8s-deployment.yaml +``` + +**Simple collector example (pseudo-code):** + +```python +# collectors/snapshot_summary.py +from prometheus_client.core import GaugeMetricFamily, CounterMetricFamily + +class SnapshotSummaryCollector: + def __init__(self, config, cache, pyiceberg_client): + self.config = config + self.cache = cache + self.client = pyiceberg_client + + def collect(self): + # For each table in config + for t in self.config.tables: + key = f"{t.catalog}:{t.namespace}:{t.table}" + data = self.cache.get(key, "fast") + # data should include totals from last fast poll + labels = [t.catalog, t.namespace, t.table] + + g = GaugeMetricFamily( + 'iceberg_snapshot_total_data_files', + 'Total active data files in latest snapshot', + labels=['catalog','namespace','table'] + ) + g.add_metric(labels, data['total_data_files']) + yield g + + # counters need to be registered using CounterMetricFamily + c = CounterMetricFamily( + 'iceberg_snapshot_added_data_files_total', + 'Cumulative data files added', + labels=['catalog','namespace','table'] + ) + c.add_metric(labels, data['added_data_files_total']) + yield c +``` + +**Deep manifest scan & detection (pseudo):** + +```python +# collectors/manifest_stats.py +# run in background thread every deep_poll_interval_seconds +for table in tables_to_scan: + table_obj = pyiceberg_client.load_table(table_identifier) + # iterate manifests for snapshots newer than last deep scan + manifests = table_obj.current_snapshot().manifests + for manifest in manifests: + for entry in manifest.entries: + # aggregate per-partition data + partition_key = serialize_partition(entry.data_file.partition) + part_stats[partition_key].files += 1 + part_stats[partition_key].records += entry.data_file.record_count + part_stats[partition_key].bytes += entry.data_file.file_size_in_bytes + # compute file-level aggregates, small file counts + compute_and_cache_result(table, part_stats, file_aggregates) +``` + +**Persist last seen snapshot example:** +- Use sqlite table `last_seen_snapshots(catalog,namespace,table,last_snapshot_id,last_checked_ts)`. +- Update atomically after deep scan or whenever new snapshot detected. + + +--- + +## 11. Testing & validation + +**Unit tests:** +- mock PyIceberg Table/Snapshot/Manifest objects and assert metrics computed. +- test rewrite detection heuristics against crafted snapshot pairs. + +**Integration tests:** +- run exporter against a mini Iceberg REST catalog or local test catalog with small tables. +- assert `/metrics` contains expected time series. + +**Performance tests:** +- run deep scan on large synthetic table (e.g., 1M manifest entries) and measure deep_scan_duration_seconds. +- tune thread pool and manifests pagination. + +**Test data:** +- create snapshots representing `append`/`overwrite`/`rewrite`/`delete` and verify counters. + +--- + +## 12. Deployment notes (k8s) + +**Kubernetes deployment example (high-level):** +- container image: `docker.io/org/iceberg-exporter:1.0` +- resources: `requests: cpu 200m mem 256Mi`, `limits: cpu 1, mem 1Gi` (tune per deep-scan activity) +- readiness/liveness: `/healthz` +- configmap for `config.yaml`, secret for `ICEBERG_TOKEN`. +- HorizontalPodAutoscaler: not recommended unless you run many deep scans; prefer single instance scaled by Prometheus scrape. + +**Prometheus service discovery:** +- Expose service `iceberg-exporter` and add `scrape_config` pointing at it. + +**RBAC & network:** +- exporter must be able to reach the Iceberg REST endpoint with required auth. + +--- + +## 13. Operational runbook & troubleshooting + +**If exporter_up == 0:** +- check network to catalog +- check token/credentials +- check logs for PyIceberg exceptions + +**Deep scan slow:** +- increase `deep_poll_interval_seconds` +- reduce `max_tables_per_scrape` +- increase thread pool size if IO bound + +**Counters not increasing:** +- verify `last_seen_snapshot_id` persisted and correct +- inspect snapshot summary fields existence + +**Cardinality high:** +- disable partition export or raise `max_partitions_per_table` + +--- + +## Appendix: Recommended defaults (tunable) + +- `fast_poll_interval_seconds`: 60 +- `deep_poll_interval_seconds`: 900 (15 min) +- `max_partitions_per_table`: 500 +- `small_file_bytes`: 32MB +- `compaction_target_bytes`: 128MB +- `small_file_count_threshold`: 100 +- `delete_file_ratio_threshold`: 0.2 + +--- + +*If you want, I can now produce:* +- a ready-to-run **Python exporter skeleton** (async, using pyiceberg + prometheus_client) implementing the above collectors and caching logic, including a Dockerfile and k8s manifest; **or** +- complete **Grafana dashboard JSON** mapping each Datadog widget to PromQL using these metrics. + +Tell me which next (I recommend the Python exporter skeleton). + diff --git a/chatgpt/iceberg_rest_exporter.py b/chatgpt/iceberg_rest_exporter.py new file mode 100644 index 0000000..1607b21 --- /dev/null +++ b/chatgpt/iceberg_rest_exporter.py @@ -0,0 +1,952 @@ +from __future__ import annotations + +import argparse +import logging +import os +import signal +import sys +import threading +import time +from dataclasses import dataclass, field +from typing import Any, Dict, Iterable, List, Optional, Tuple + +import yaml +from prometheus_client import Counter, Gauge, start_http_server, disable_created_metrics + +# NOTE: +# PyIceberg APIs evolve. You may need to adjust imports for your exact version. +# These imports are intentionally conservative and may require small edits. +from pyiceberg.catalog import load_catalog + + +# ============================================================ +# Logging +# ============================================================ + +logging.basicConfig( + level=os.getenv("LOG_LEVEL", "INFO"), + format="%(asctime)s %(levelname)s %(name)s - %(message)s", +) +logger = logging.getLogger("iceberg-rest-exporter") + + +# ============================================================ +# Config models +# ============================================================ + +@dataclass +class CatalogConfig: + name: str + uri: str + warehouse: Optional[str] = None + token: Optional[str] = None + credential: Optional[str] = None + extra_properties: Dict[str, str] = field(default_factory=dict) + + +@dataclass +class TableRef: + namespace: str + table: str + + @property + def fqdn(self) -> str: + return f"{self.namespace}.{self.table}" + + +@dataclass +class DiscoveryConfig: + mode: str = "explicit" # explicit | namespace_scan + namespaces: List[str] = field(default_factory=list) + tables: List[TableRef] = field(default_factory=list) + + +@dataclass +class ThresholdsConfig: + small_file_bytes: int = 32 * 1024 * 1024 + target_file_bytes: int = 128 * 1024 * 1024 + delete_file_ratio: float = 0.2 + avg_files_per_partition: int = 20 + small_file_count: int = 100 + + +@dataclass +class ExporterConfig: + host: str = "0.0.0.0" + port: int = 9109 + scrape_interval_seconds: int = 30 + deep_scan_interval_seconds: int = 600 + request_timeout_seconds: int = 30 + max_partitions_per_table: int = 5000 + catalogs: List[CatalogConfig] = field(default_factory=list) + discovery: DiscoveryConfig = field(default_factory=DiscoveryConfig) + thresholds: ThresholdsConfig = field(default_factory=ThresholdsConfig) + + +# ============================================================ +# Metric registry (explicit metric handles) +# ============================================================ + +LABELS = ["catalog", "namespace", "table", "table_name"] +PARTITION_LABELS = ["catalog", "namespace", "table", "table_name", "partition"] +OP_LABELS = ["catalog", "namespace", "table", "table_name", "operation"] +THRESHOLD_LABELS = ["catalog", "namespace", "table", "table_name", "threshold_bytes"] + + +class Metrics: + def __init__(self) -> None: + disable_created_metrics() + + # exporter health + self.exporter_last_run_success = Gauge( + "iceberg_exporter_last_run_success", + "Whether the last exporter loop completed successfully (1/0)", + ) + self.exporter_last_run_timestamp = Gauge( + "iceberg_exporter_last_run_timestamp_seconds", + "Unix timestamp of last exporter loop", + ) + self.exporter_loop_duration = Gauge( + "iceberg_exporter_loop_duration_seconds", + "Duration of the last exporter loop", + ) + self.exporter_table_scrape_errors = Counter( + "iceberg_exporter_table_scrape_errors_total", + "Number of per-table scrape errors", + ["catalog", "namespace", "table", "table_name", "stage"], + ) + + # Snapshot state gauges + self.snapshot_total_data_files = Gauge( + "iceberg_snapshot_total_data_files", + "Current total active data files in latest snapshot", + LABELS, + ) + self.snapshot_total_delete_files = Gauge( + "iceberg_snapshot_total_delete_files", + "Current total active delete files in latest snapshot", + LABELS, + ) + self.snapshot_total_records = Gauge( + "iceberg_snapshot_total_records", + "Current total records in latest snapshot", + LABELS, + ) + self.snapshot_total_file_size_bytes = Gauge( + "iceberg_snapshot_total_file_size_bytes", + "Current total active data file size in bytes", + LABELS, + ) + self.snapshot_total_position_delete_records = Gauge( + "iceberg_snapshot_total_position_delete_records", + "Current total position delete records in latest snapshot", + LABELS, + ) + self.snapshot_total_equality_delete_records = Gauge( + "iceberg_snapshot_total_equality_delete_records", + "Current total equality delete records in latest snapshot", + LABELS, + ) + self.table_current_snapshot_id = Gauge( + "iceberg_table_current_snapshot_id", + "Current snapshot id", + LABELS, + ) + self.table_current_sequence_number = Gauge( + "iceberg_table_current_sequence_number", + "Current sequence number", + LABELS, + ) + self.table_snapshot_age_seconds = Gauge( + "iceberg_table_snapshot_age_seconds", + "Age of current snapshot in seconds", + LABELS, + ) + self.table_snapshot_count = Gauge( + "iceberg_table_snapshot_count", + "Number of retained snapshots", + LABELS, + ) + self.table_partition_count = Gauge( + "iceberg_table_partition_count", + "Number of active partitions", + LABELS, + ) + + # Snapshot delta counters + self.snapshot_added_records_total = Counter( + "iceberg_snapshot_added_records_total", + "Cumulative added records across observed snapshots", + LABELS, + ) + self.snapshot_added_data_files_total = Counter( + "iceberg_snapshot_added_data_files_total", + "Cumulative added data files across observed snapshots", + LABELS, + ) + self.snapshot_added_delete_files_total = Counter( + "iceberg_snapshot_added_delete_files_total", + "Cumulative added delete files across observed snapshots", + LABELS, + ) + self.snapshot_added_files_size_bytes_total = Counter( + "iceberg_snapshot_added_files_size_bytes_total", + "Cumulative added file bytes across observed snapshots", + LABELS, + ) + self.snapshot_removed_records_total = Counter( + "iceberg_snapshot_removed_records_total", + "Cumulative removed records across observed snapshots", + LABELS, + ) + self.snapshot_removed_data_files_total = Counter( + "iceberg_snapshot_removed_data_files_total", + "Cumulative removed data files across observed snapshots", + LABELS, + ) + self.snapshot_removed_delete_files_total = Counter( + "iceberg_snapshot_removed_delete_files_total", + "Cumulative removed delete files across observed snapshots", + LABELS, + ) + self.snapshot_removed_files_size_bytes_total = Counter( + "iceberg_snapshot_removed_files_size_bytes_total", + "Cumulative removed file bytes across observed snapshots", + LABELS, + ) + self.snapshot_changed_partition_count = Gauge( + "iceberg_snapshot_changed_partition_count", + "Partitions changed in latest observed snapshot", + LABELS, + ) + self.snapshot_commits_total = Counter( + "iceberg_snapshot_commits_total", + "Observed snapshot commits by operation", + OP_LABELS, + ) + + # Partition gauges + self.partition_record_count = Gauge( + "iceberg_partition_record_count", + "Record count by active partition", + PARTITION_LABELS, + ) + self.partition_file_count = Gauge( + "iceberg_partition_file_count", + "Active data file count by partition", + PARTITION_LABELS, + ) + self.partition_size_bytes = Gauge( + "iceberg_partition_size_bytes", + "Active data size by partition in bytes", + PARTITION_LABELS, + ) + + # Partition aggregate gauges + self.partitions_min_record_count = Gauge("iceberg_partitions_min_record_count", "Min records across partitions", LABELS) + self.partitions_max_record_count = Gauge("iceberg_partitions_max_record_count", "Max records across partitions", LABELS) + self.partitions_avg_record_count = Gauge("iceberg_partitions_avg_record_count", "Avg records across partitions", LABELS) + self.partitions_min_file_count = Gauge("iceberg_partitions_min_file_count", "Min file count across partitions", LABELS) + self.partitions_max_file_count = Gauge("iceberg_partitions_max_file_count", "Max file count across partitions", LABELS) + self.partitions_avg_file_count = Gauge("iceberg_partitions_avg_file_count", "Avg file count across partitions", LABELS) + self.partitions_min_size_bytes = Gauge("iceberg_partitions_min_size_bytes", "Min bytes across partitions", LABELS) + self.partitions_max_size_bytes = Gauge("iceberg_partitions_max_size_bytes", "Max bytes across partitions", LABELS) + self.partitions_avg_size_bytes = Gauge("iceberg_partitions_avg_size_bytes", "Avg bytes across partitions", LABELS) + + # File aggregate gauges + self.files_min_record_count = Gauge("iceberg_files_min_record_count", "Min records per active data file", LABELS) + self.files_max_record_count = Gauge("iceberg_files_max_record_count", "Max records per active data file", LABELS) + self.files_avg_record_count = Gauge("iceberg_files_avg_record_count", "Avg records per active data file", LABELS) + self.files_min_size_bytes = Gauge("iceberg_files_min_size_bytes", "Min size of active data files in bytes", LABELS) + self.files_max_size_bytes = Gauge("iceberg_files_max_size_bytes", "Max size of active data files in bytes", LABELS) + self.files_avg_size_bytes = Gauge("iceberg_files_avg_size_bytes", "Avg size of active data files in bytes", LABELS) + self.files_small_file_count = Gauge( + "iceberg_files_small_file_count", + "Count of active data files below threshold_bytes", + THRESHOLD_LABELS, + ) + self.files_large_file_count = Gauge( + "iceberg_files_large_file_count", + "Count of active data files above threshold_bytes", + THRESHOLD_LABELS, + ) + self.files_avg_rows_per_file = Gauge( + "iceberg_files_avg_rows_per_file", + "Average rows per active data file", + LABELS, + ) + self.files_delete_file_ratio = Gauge( + "iceberg_files_delete_file_ratio", + "Delete files / data files ratio", + LABELS, + ) + + # Rewrite / compaction inferred metrics + self.rewrite_data_files_total = Counter( + "iceberg_rewrite_data_files_total", + "Estimated rewritten data files across rewrite-like snapshots", + LABELS, + ) + self.rewrite_bytes_total = Counter( + "iceberg_rewrite_bytes_total", + "Estimated rewritten bytes across rewrite-like snapshots", + LABELS, + ) + self.rewrite_snapshots_total = Counter( + "iceberg_rewrite_snapshots_total", + "Observed rewrite-like snapshots", + LABELS, + ) + self.rewrite_last_timestamp_seconds = Gauge( + "iceberg_rewrite_last_timestamp_seconds", + "Unix timestamp of latest rewrite-like snapshot", + LABELS, + ) + self.rewrite_removed_small_files_total = Counter( + "iceberg_rewrite_removed_small_files_total", + "Estimated small files removed by rewrite-like snapshots", + LABELS, + ) + self.rewrite_post_avg_file_size_bytes = Gauge( + "iceberg_rewrite_post_avg_file_size_bytes", + "Average file size after latest rewrite-like snapshot", + LABELS, + ) + self.table_compaction_candidate = Gauge( + "iceberg_table_compaction_candidate", + "Whether table is a compaction candidate (1/0)", + LABELS, + ) + + +# ============================================================ +# In-memory state for observed snapshots (counter correctness) +# ============================================================ + +class SnapshotState: + """ + Tracks last seen snapshot ids to avoid double-incrementing counters. + In Kubernetes this resets on pod restart, which is acceptable for Prometheus counters. + Use increase()/rate() in PromQL. + """ + + def __init__(self) -> None: + self._lock = threading.Lock() + self._seen_snapshot_ids: Dict[Tuple[str, str, str], set[int]] = {} + + def mark_if_new(self, catalog: str, namespace: str, table: str, snapshot_id: int) -> bool: + key = (catalog, namespace, table) + with self._lock: + seen = self._seen_snapshot_ids.setdefault(key, set()) + if snapshot_id in seen: + return False + seen.add(snapshot_id) + # prevent unbounded growth + if len(seen) > 5000: + # retain latest-ish by arbitrary truncation strategy + seen_list = list(seen) + seen.clear() + seen.update(seen_list[-1000:]) + return True + + +# ============================================================ +# Helpers +# ============================================================ + +def safe_int(v: Any, default: int = 0) -> int: + try: + return int(v) + except Exception: + return default + + +def safe_float(v: Any, default: float = 0.0) -> float: + try: + return float(v) + except Exception: + return default + + +def now_ts() -> int: + return int(time.time()) + + +def labels(catalog: str, namespace: str, table: str) -> Tuple[str, str, str, str]: + return (catalog, namespace, table, f"{namespace}.{table}") + + +def avg(values: List[float]) -> float: + return sum(values) / len(values) if values else 0.0 + + +def min_or_zero(values: List[float]) -> float: + return min(values) if values else 0.0 + + +def max_or_zero(values: List[float]) -> float: + return max(values) if values else 0.0 + + +def normalize_operation(op: Optional[str]) -> str: + if not op: + return "unknown" + op = str(op).strip().lower().replace("-", "_") + allowed = {"append", "overwrite", "replace", "delete", "rewrite", "fast_append"} + return op if op in allowed else "unknown" + + +# ============================================================ +# PyIceberg adapter layer (version-tolerant-ish) +# ============================================================ + +class IcebergAdapter: + def __init__(self, cfg: CatalogConfig): + props = { + "uri": cfg.uri, + **cfg.extra_properties, + } + if cfg.warehouse: + props["warehouse"] = cfg.warehouse + if cfg.token: + props["token"] = cfg.token + if cfg.credential: + props["credential"] = cfg.credential + + self.catalog = load_catalog(cfg.name, **props) + self.name = cfg.name + + def list_tables(self, namespace: str) -> List[TableRef]: + # Adjust for your PyIceberg version if needed. + # Some versions return tuples like (namespace_tuple, table_name) or identifiers. + out: List[TableRef] = [] + tables = self.catalog.list_tables(namespace) + for item in tables: + if isinstance(item, tuple): + # common shape: (('ns',), 'table') or ('ns', 'table') + if len(item) == 2 and isinstance(item[1], str): + t = item[1] + out.append(TableRef(namespace=namespace, table=t)) + else: + out.append(TableRef(namespace=namespace, table=str(item[-1]))) + else: + s = str(item) + t = s.split(".")[-1] + out.append(TableRef(namespace=namespace, table=t)) + return out + + def load_table(self, table_ref: TableRef): + return self.catalog.load_table(table_ref.fqdn) + + +# ============================================================ +# Data extraction contracts +# ============================================================ + +@dataclass +class SnapshotSummary: + snapshot_id: int + timestamp_ms: int + sequence_number: int + operation: str + summary: Dict[str, Any] + + +@dataclass +class PartitionStats: + partition_key: str + record_count: int + file_count: int + size_bytes: int + + +@dataclass +class DeepScanStats: + partition_stats: List[PartitionStats] + file_record_counts: List[int] + file_sizes: List[int] + active_partition_count: int + + +# ============================================================ +# Extractors +# ============================================================ + +class TableExtractor: + """ + Isolates PyIceberg table metadata traversal. + You WILL likely need to adapt some methods to your exact version. + """ + + def get_snapshots(self, table) -> List[SnapshotSummary]: + snapshots = [] + + # Version-flexible access + metadata = getattr(table, "metadata", None) + raw_snapshots = [] + if metadata is not None: + raw_snapshots = getattr(metadata, "snapshots", None) or [] + + for s in raw_snapshots: + snapshot_id = safe_int(getattr(s, "snapshot_id", 0)) + timestamp_ms = safe_int(getattr(s, "timestamp_ms", 0)) + sequence_number = safe_int(getattr(s, "sequence_number", 0)) + summary = getattr(s, "summary", None) or {} + operation = normalize_operation(summary.get("operation") or getattr(s, "operation", None)) + snapshots.append( + SnapshotSummary( + snapshot_id=snapshot_id, + timestamp_ms=timestamp_ms, + sequence_number=sequence_number, + operation=operation, + summary=summary, + ) + ) + + snapshots.sort(key=lambda x: (x.sequence_number, x.timestamp_ms, x.snapshot_id)) + return snapshots + + def get_current_snapshot(self, table) -> Optional[SnapshotSummary]: + snaps = self.get_snapshots(table) + return snaps[-1] if snaps else None + + def get_current_totals_from_summary(self, current: SnapshotSummary) -> Dict[str, int]: + s = current.summary or {} + return { + "total_data_files": safe_int(s.get("total-data-files")), + "total_delete_files": safe_int(s.get("total-delete-files")), + "total_records": safe_int(s.get("total-records")), + "total_file_size_bytes": safe_int(s.get("total-files-size")), + "total_position_delete_records": safe_int(s.get("total-position-deletes")), + "total_equality_delete_records": safe_int(s.get("total-equality-deletes")), + } + + def get_snapshot_delta_from_summary(self, snap: SnapshotSummary) -> Dict[str, int]: + s = snap.summary or {} + return { + "added_records": safe_int(s.get("added-records")), + "added_data_files": safe_int(s.get("added-data-files")), + "added_delete_files": safe_int(s.get("added-delete-files")), + "added_files_size_bytes": safe_int(s.get("added-files-size")), + "removed_records": safe_int(s.get("removed-records")), + "removed_data_files": safe_int(s.get("removed-data-files")), + "removed_delete_files": safe_int(s.get("removed-delete-files")), + "removed_files_size_bytes": safe_int(s.get("removed-files-size")), + "changed_partition_count": safe_int(s.get("changed-partition-count")), + } + + def deep_scan(self, table, max_partitions_per_table: int) -> DeepScanStats: + """ + IMPORTANT: This is the one part you will almost certainly tweak. + + Ideal approach: + - traverse current snapshot manifests / entries + - include only ACTIVE data files + - aggregate by partition + + Skeleton below tries a few common patterns and degrades gracefully. + """ + partition_map: Dict[str, PartitionStats] = {} + file_record_counts: List[int] = [] + file_sizes: List[int] = [] + + # The actual PyIceberg APIs vary. Replace this section with your exact version's scan. + # If you already know how to iterate current data files, plug it in here. + current_files_iter = self._iter_current_data_files_best_effort(table) + + for data_file in current_files_iter: + record_count = safe_int(getattr(data_file, "record_count", 0)) + file_size = safe_int(getattr(data_file, "file_size_in_bytes", 0)) + + part = getattr(data_file, "partition", None) + partition_key = self._partition_to_string(part) + + file_record_counts.append(record_count) + file_sizes.append(file_size) + + if partition_key not in partition_map: + if len(partition_map) >= max_partitions_per_table: + partition_key = "__overflow__" + if partition_key not in partition_map: + partition_map[partition_key] = PartitionStats( + partition_key=partition_key, + record_count=0, + file_count=0, + size_bytes=0, + ) + else: + partition_map[partition_key] = PartitionStats( + partition_key=partition_key, + record_count=0, + file_count=0, + size_bytes=0, + ) + + ps = partition_map[partition_key] + ps.record_count += record_count + ps.file_count += 1 + ps.size_bytes += file_size + + return DeepScanStats( + partition_stats=list(partition_map.values()), + file_record_counts=file_record_counts, + file_sizes=file_sizes, + active_partition_count=len(partition_map), + ) + + def _iter_current_data_files_best_effort(self, table) -> Iterable[Any]: + """ + Replace with exact PyIceberg method for your version. + + Possible strategies by version: + - table.scan().plan_files() -> file scan tasks + - metadata.current_snapshot + manifests traversal + - table.inspect.files() (if available in your version) + """ + # Strategy 1: scan().plan_files() + try: + scan = table.scan() + tasks = scan.plan_files() + for task in tasks: + # common patterns: task.file or task.data_file + df = getattr(task, "file", None) or getattr(task, "data_file", None) + if df is not None: + yield df + return + except Exception: + pass + + # Strategy 2: no-op fallback + logger.warning("Deep scan fallback: unable to iterate current data files for table; returning empty deep stats") + return [] + + def _partition_to_string(self, part: Any) -> str: + if part is None: + return "__unpartitioned__" + try: + if hasattr(part, "__dict__"): + items = sorted(part.__dict__.items()) + return ",".join(f"{k}={v}" for k, v in items) + if isinstance(part, dict): + items = sorted(part.items()) + return ",".join(f"{k}={v}" for k, v in items) + return str(part) + except Exception: + return "__unknown_partition__" + + +# ============================================================ +# Exporter core +# ============================================================ + +class IcebergExporter: + def __init__(self, cfg: ExporterConfig): + self.cfg = cfg + self.metrics = Metrics() + self.snapshot_state = SnapshotState() + self.extractor = TableExtractor() + self.stop_event = threading.Event() + + self.adapters = [IcebergAdapter(c) for c in cfg.catalogs] + + # cache for expensive deep scans + self._deep_cache: Dict[Tuple[str, str, str], Tuple[float, DeepScanStats]] = {} + self._deep_cache_lock = threading.Lock() + + def run_forever(self) -> None: + while not self.stop_event.is_set(): + started = time.time() + success = 1 + try: + self.collect_once() + except Exception: + logger.exception("Exporter loop failed") + success = 0 + finally: + self.metrics.exporter_last_run_success.set(success) + self.metrics.exporter_last_run_timestamp.set(now_ts()) + self.metrics.exporter_loop_duration.set(time.time() - started) + + self.stop_event.wait(self.cfg.scrape_interval_seconds) + + def stop(self) -> None: + self.stop_event.set() + + def collect_once(self) -> None: + for adapter in self.adapters: + tables = self._discover_tables(adapter) + for table_ref in tables: + try: + self._collect_table(adapter, table_ref) + except Exception: + logger.exception("Failed to collect table %s.%s", adapter.name, table_ref.fqdn) + self.metrics.exporter_table_scrape_errors.labels( + adapter.name, table_ref.namespace, table_ref.table, table_ref.fqdn, "collect" + ).inc() + + def _discover_tables(self, adapter: IcebergAdapter) -> List[TableRef]: + d = self.cfg.discovery + if d.mode == "explicit": + return d.tables + if d.mode == "namespace_scan": + out: List[TableRef] = [] + for ns in d.namespaces: + try: + out.extend(adapter.list_tables(ns)) + except Exception: + logger.exception("Failed listing namespace %s in catalog %s", ns, adapter.name) + return out + raise ValueError(f"Unsupported discovery mode: {d.mode}") + + def _collect_table(self, adapter: IcebergAdapter, table_ref: TableRef) -> None: + table = adapter.load_table(table_ref) + lbl = labels(adapter.name, table_ref.namespace, table_ref.table) + + snapshots = self.extractor.get_snapshots(table) + current = snapshots[-1] if snapshots else None + if current is None: + logger.warning("No snapshots for %s.%s", adapter.name, table_ref.fqdn) + return + + # 1) current snapshot gauges + totals = self.extractor.get_current_totals_from_summary(current) + self.metrics.snapshot_total_data_files.labels(*lbl).set(totals["total_data_files"]) + self.metrics.snapshot_total_delete_files.labels(*lbl).set(totals["total_delete_files"]) + self.metrics.snapshot_total_records.labels(*lbl).set(totals["total_records"]) + self.metrics.snapshot_total_file_size_bytes.labels(*lbl).set(totals["total_file_size_bytes"]) + self.metrics.snapshot_total_position_delete_records.labels(*lbl).set(totals["total_position_delete_records"]) + self.metrics.snapshot_total_equality_delete_records.labels(*lbl).set(totals["total_equality_delete_records"]) + self.metrics.table_current_snapshot_id.labels(*lbl).set(current.snapshot_id) + self.metrics.table_current_sequence_number.labels(*lbl).set(current.sequence_number) + self.metrics.table_snapshot_age_seconds.labels(*lbl).set(max(0, now_ts() - (current.timestamp_ms // 1000))) + self.metrics.table_snapshot_count.labels(*lbl).set(len(snapshots)) + + # 2) increment counters only for unseen snapshots + for snap in snapshots: + if not self.snapshot_state.mark_if_new(adapter.name, table_ref.namespace, table_ref.table, snap.snapshot_id): + continue + + delta = self.extractor.get_snapshot_delta_from_summary(snap) + self.metrics.snapshot_added_records_total.labels(*lbl).inc(delta["added_records"]) + self.metrics.snapshot_added_data_files_total.labels(*lbl).inc(delta["added_data_files"]) + self.metrics.snapshot_added_delete_files_total.labels(*lbl).inc(delta["added_delete_files"]) + self.metrics.snapshot_added_files_size_bytes_total.labels(*lbl).inc(delta["added_files_size_bytes"]) + self.metrics.snapshot_removed_records_total.labels(*lbl).inc(delta["removed_records"]) + self.metrics.snapshot_removed_data_files_total.labels(*lbl).inc(delta["removed_data_files"]) + self.metrics.snapshot_removed_delete_files_total.labels(*lbl).inc(delta["removed_delete_files"]) + self.metrics.snapshot_removed_files_size_bytes_total.labels(*lbl).inc(delta["removed_files_size_bytes"]) + self.metrics.snapshot_changed_partition_count.labels(*lbl).set(delta["changed_partition_count"]) + self.metrics.snapshot_commits_total.labels(*lbl, snap.operation).inc() + + # rewrite inference (summary-only heuristic) + self._maybe_record_rewrite(lbl, snap, delta, totals) + + # 3) deep scan (cached) + deep = self._get_or_refresh_deep_scan(adapter, table_ref, table) + self._publish_deep_metrics(lbl, deep, totals) + + def _get_or_refresh_deep_scan(self, adapter: IcebergAdapter, table_ref: TableRef, table) -> DeepScanStats: + key = (adapter.name, table_ref.namespace, table_ref.table) + now = time.time() + + with self._deep_cache_lock: + cached = self._deep_cache.get(key) + if cached and (now - cached[0]) < self.cfg.deep_scan_interval_seconds: + return cached[1] + + deep = self.extractor.deep_scan(table, self.cfg.max_partitions_per_table) + + with self._deep_cache_lock: + self._deep_cache[key] = (now, deep) + + return deep + + def _publish_deep_metrics(self, lbl: Tuple[str, str, str, str], deep: DeepScanStats, totals: Dict[str, int]) -> None: + # partition count + self.metrics.table_partition_count.labels(*lbl).set(deep.active_partition_count) + + # per-partition gauges + for p in deep.partition_stats: + pl = (*lbl, p.partition_key) + self.metrics.partition_record_count.labels(*pl).set(p.record_count) + self.metrics.partition_file_count.labels(*pl).set(p.file_count) + self.metrics.partition_size_bytes.labels(*pl).set(p.size_bytes) + + # partition aggregates + part_records = [p.record_count for p in deep.partition_stats] + part_files = [p.file_count for p in deep.partition_stats] + part_sizes = [p.size_bytes for p in deep.partition_stats] + + self.metrics.partitions_min_record_count.labels(*lbl).set(min_or_zero(part_records)) + self.metrics.partitions_max_record_count.labels(*lbl).set(max_or_zero(part_records)) + self.metrics.partitions_avg_record_count.labels(*lbl).set(avg(part_records)) + self.metrics.partitions_min_file_count.labels(*lbl).set(min_or_zero(part_files)) + self.metrics.partitions_max_file_count.labels(*lbl).set(max_or_zero(part_files)) + self.metrics.partitions_avg_file_count.labels(*lbl).set(avg(part_files)) + self.metrics.partitions_min_size_bytes.labels(*lbl).set(min_or_zero(part_sizes)) + self.metrics.partitions_max_size_bytes.labels(*lbl).set(max_or_zero(part_sizes)) + self.metrics.partitions_avg_size_bytes.labels(*lbl).set(avg(part_sizes)) + + # file aggregates + fr = deep.file_record_counts + fs = deep.file_sizes + self.metrics.files_min_record_count.labels(*lbl).set(min_or_zero(fr)) + self.metrics.files_max_record_count.labels(*lbl).set(max_or_zero(fr)) + self.metrics.files_avg_record_count.labels(*lbl).set(avg(fr)) + self.metrics.files_min_size_bytes.labels(*lbl).set(min_or_zero(fs)) + self.metrics.files_max_size_bytes.labels(*lbl).set(max_or_zero(fs)) + self.metrics.files_avg_size_bytes.labels(*lbl).set(avg(fs)) + + small = self.cfg.thresholds.small_file_bytes + target = self.cfg.thresholds.target_file_bytes + small_count = sum(1 for x in fs if x < small) + large_count = sum(1 for x in fs if x > target) + self.metrics.files_small_file_count.labels(*lbl, str(small)).set(small_count) + self.metrics.files_large_file_count.labels(*lbl, str(target)).set(large_count) + + total_records = totals.get("total_records", 0) + total_data_files = max(totals.get("total_data_files", 0), 1) + total_delete_files = totals.get("total_delete_files", 0) + + self.metrics.files_avg_rows_per_file.labels(*lbl).set(total_records / total_data_files) + self.metrics.files_delete_file_ratio.labels(*lbl).set(total_delete_files / total_data_files) + + # compaction candidate + avg_file_size = avg(fs) + avg_files_per_partition = avg(part_files) + is_candidate = ( + (avg_file_size < self.cfg.thresholds.target_file_bytes if fs else False) + or (small_count > self.cfg.thresholds.small_file_count) + or ((total_delete_files / total_data_files) > self.cfg.thresholds.delete_file_ratio) + or (avg_files_per_partition > self.cfg.thresholds.avg_files_per_partition if part_files else False) + ) + self.metrics.table_compaction_candidate.labels(*lbl).set(1 if is_candidate else 0) + + def _maybe_record_rewrite( + self, + lbl: Tuple[str, str, str, str], + snap: SnapshotSummary, + delta: Dict[str, int], + current_totals: Dict[str, int], + ) -> None: + operation = snap.operation + added_records = delta["added_records"] + removed_records = delta["removed_records"] + added_data_files = delta["added_data_files"] + removed_data_files = delta["removed_data_files"] + removed_bytes = delta["removed_files_size_bytes"] + + total_records = max(current_totals.get("total_records", 0), 1) + total_file_bytes = max(current_totals.get("total_file_size_bytes", 0), 1) + total_data_files = max(current_totals.get("total_data_files", 0), 1) + current_avg_file_size = total_file_bytes / total_data_files + + is_rewrite = ( + operation == "rewrite" + or ( + removed_data_files > 0 + and added_data_files > 0 + and abs(added_records - removed_records) <= max(1000, int(total_records * 0.01)) + ) + ) + + if not is_rewrite: + return + + self.metrics.rewrite_data_files_total.labels(*lbl).inc(removed_data_files) + self.metrics.rewrite_bytes_total.labels(*lbl).inc(removed_bytes) + self.metrics.rewrite_snapshots_total.labels(*lbl).inc() + self.metrics.rewrite_last_timestamp_seconds.labels(*lbl).set(snap.timestamp_ms // 1000) + self.metrics.rewrite_post_avg_file_size_bytes.labels(*lbl).set(current_avg_file_size) + + # heuristic estimate for removed small files: if rewrite removed files and current avg > threshold, count all removed as "small-ish" removed + if current_avg_file_size >= self.cfg.thresholds.target_file_bytes: + self.metrics.rewrite_removed_small_files_total.labels(*lbl).inc(removed_data_files) + + +# ============================================================ +# Config loading +# ============================================================ + +def load_config(path: str) -> ExporterConfig: + with open(path, "r", encoding="utf-8") as f: + raw = yaml.safe_load(f) + + catalogs = [] + for c in raw.get("catalogs", []): + extra = {k: v for k, v in c.items() if k not in {"name", "uri", "warehouse", "token", "credential"}} + catalogs.append( + CatalogConfig( + name=c["name"], + uri=c["uri"], + warehouse=c.get("warehouse"), + token=c.get("token"), + credential=c.get("credential"), + extra_properties=extra, + ) + ) + + discovery_raw = raw.get("discovery", {}) + tables = [TableRef(namespace=t["namespace"], table=t["table"]) for t in discovery_raw.get("tables", [])] + discovery = DiscoveryConfig( + mode=discovery_raw.get("mode", "explicit"), + namespaces=discovery_raw.get("namespaces", []), + tables=tables, + ) + + thresholds_raw = raw.get("thresholds", {}) + thresholds = ThresholdsConfig( + small_file_bytes=thresholds_raw.get("small_file_bytes", 32 * 1024 * 1024), + target_file_bytes=thresholds_raw.get("target_file_bytes", 128 * 1024 * 1024), + delete_file_ratio=thresholds_raw.get("delete_file_ratio", 0.2), + avg_files_per_partition=thresholds_raw.get("avg_files_per_partition", 20), + small_file_count=thresholds_raw.get("small_file_count", 100), + ) + + return ExporterConfig( + host=raw.get("host", "0.0.0.0"), + port=raw.get("port", 9109), + scrape_interval_seconds=raw.get("scrape_interval_seconds", 30), + deep_scan_interval_seconds=raw.get("deep_scan_interval_seconds", 600), + request_timeout_seconds=raw.get("request_timeout_seconds", 30), + max_partitions_per_table=raw.get("max_partitions_per_table", 5000), + catalogs=catalogs, + discovery=discovery, + thresholds=thresholds, + ) + + +# ============================================================ +# Main +# ============================================================ + +def main() -> int: + parser = argparse.ArgumentParser(description="Iceberg REST Catalog Prometheus Exporter") + parser.add_argument("--config", required=True, help="Path to config YAML") + args = parser.parse_args() + + cfg = load_config(args.config) + exporter = IcebergExporter(cfg) + + server, thread = start_http_server(cfg.port, addr=cfg.host) + logger.info("Serving metrics on http://%s:%s/metrics", cfg.host, cfg.port) + + def handle_signal(signum, frame): + logger.info("Received signal %s, shutting down", signum) + exporter.stop() + try: + server.shutdown() + server.server_close() + thread.join(timeout=5) + except Exception: + logger.exception("Error shutting down metrics server") + + signal.signal(signal.SIGINT, handle_signal) + signal.signal(signal.SIGTERM, handle_signal) + + exporter.run_forever() + return 0 + + +if __name__ == "__main__": + sys.exit(main()) \ No newline at end of file diff --git a/scripts/test_data_generator.py b/scripts/test_data_generator.py index d4e29c6..e598dd8 100644 --- a/scripts/test_data_generator.py +++ b/scripts/test_data_generator.py @@ -126,13 +126,7 @@ def main(): run_trino_query(trino_cur, query) logger.info(f"Deleted records from category {cat_to_delete} via Trino.") - # 3. Occasional Compaction (every 7 iterations) - if iteration > 0 and iteration % 7 == 0: - query = "ALTER TABLE iceberg.demo.events EXECUTE rewrite_data_files" - run_trino_query(trino_cur, query) - logger.info("Executed data file rewriting (compaction) via Trino.") - - # 4. Occasional Snapshot Expiration (every 10 iterations) + # 3. Occasional Snapshot Expiration (every 10 iterations) if iteration > 0 and iteration % 10 == 0: # Expire snapshots older than 1 minute for the demo query = "ALTER TABLE iceberg.demo.events EXECUTE expire_snapshots(retention_threshold => '1m')" diff --git a/src/monitor.py b/src/monitor.py index 4d32711..5e0c7d8 100644 --- a/src/monitor.py +++ b/src/monitor.py @@ -50,6 +50,13 @@ 'file_count': Gauge('iceberg_partition_file_count', 'File count per partition', ['table_name', 'partition_name']), } +# Track state between polls to make maintenance metrics cumulative +SNAPSHOT_STATE = {} # table_name -> last_seen_snapshot_id +MAINTENANCE_TOTALS = { + 'compacted_data_files': {}, # table_name -> total_count + 'compacted_files_size': {}, # table_name -> total_size +} + def safe_float(val, default=0.0): if val is None: return default @@ -101,35 +108,60 @@ def update_metrics(): logger.info(f"Processing table {table_name}") - table = catalog.load_table(table_identifier) - logger.info(f"Loaded table {table_name}") - - # Initialize labels to avoid 'no data' in Grafana + # Initialize state for new tables + if table_name not in MAINTENANCE_TOTALS['compacted_data_files']: + MAINTENANCE_TOTALS['compacted_data_files'][table_name] = 0.0 + MAINTENANCE_TOTALS['compacted_files_size'][table_name] = 0.0 + MAINTENANCE_METRICS['compacted_data_files'].labels(table_name=table_name).set(0) + MAINTENANCE_METRICS['compacted_files_size'].labels(table_name=table_name).set(0) + + # Initialize Snapshot Gauges to 0 (current state) for gauge in SNAPSHOT_METRICS.values(): gauge.labels(table_name=table_name).set(0) - for gauge in MAINTENANCE_METRICS.values(): - gauge.labels(table_name=table_name).set(0) + + # Explicitly reload to get latest metadata + table = catalog.load_table(table_identifier) # Snapshot metrics - snapshot = table.current_snapshot() - if snapshot and snapshot.summary: - logger.info(f"Snapshot summary for {table_name}: {snapshot.summary}") - for metric_key, gauge in SNAPSHOT_METRICS.items(): - summary_key = metric_key.replace('_', '-') - val = snapshot.summary.get(summary_key, 0) - gauge.labels(table_name=table_name).set(safe_float(val)) - - # Maintenance metrics from rewrite_data_files - compacted_files = snapshot.summary.get('removed-data-files', 0) - MAINTENANCE_METRICS['compacted_data_files'].labels(table_name=table_name).set(safe_float(compacted_files)) - - # Check multiple common keys for compaction size - if snapshot.summary.get('operation') in ['replace', 'delete', 'overwrite']: - # removed-files-size is most accurate for "what was compacted/cleaned" - size = snapshot.summary.get('removed-files-size') or snapshot.summary.get('added-files-size') or snapshot.summary.get('total-files-size') or 0 - MAINTENANCE_METRICS['compacted_files_size'].labels(table_name=table_name).set(safe_float(size)) - else: - logger.info(f"No current snapshot found for {table_name}") + try: + snapshot = table.current_snapshot() + if snapshot: + summary = snapshot.summary + op_str = str(summary.get('operation', 'unknown')).lower() + + # Only process maintenance if it's a NEW snapshot + last_id = SNAPSHOT_STATE.get(table_name) + if last_id != snapshot.snapshot_id: + logger.info(f"New snapshot detected for {table_name}: {snapshot.snapshot_id} (Op: {op_str})") + + # Increment maintenance totals if applicable + compact_files = safe_float(summary.get('removed-data-files') or summary.get('deleted-data-files') or 0) + + is_maint = 'replace' in op_str or 'overwrite' in op_str or 'delete' in op_str + size = 0.0 + if is_maint: + size = safe_float(summary.get('removed-files-size') or summary.get('deleted-files-size') or summary.get('added-files-size') or 0) + + if compact_files > 0 or size > 0: + MAINTENANCE_TOTALS['compacted_data_files'][table_name] += compact_files + MAINTENANCE_TOTALS['compacted_files_size'][table_name] += size + logger.info(f"UPDATED MAINTENANCE TOTALS for {table_name}: +{compact_files} files, +{size} bytes") + + SNAPSHOT_STATE[table_name] = snapshot.snapshot_id + + # Always set current snapshot metrics + for metric_key, gauge in SNAPSHOT_METRICS.items(): + summary_key = metric_key.replace('_', '-') + val = summary.get(summary_key, 0) + gauge.labels(table_name=table_name).set(safe_float(val)) + + # Always set the cumulative maintenance gauges + MAINTENANCE_METRICS['compacted_data_files'].labels(table_name=table_name).set(MAINTENANCE_TOTALS['compacted_data_files'][table_name]) + MAINTENANCE_METRICS['compacted_files_size'].labels(table_name=table_name).set(MAINTENANCE_TOTALS['compacted_files_size'][table_name]) + else: + logger.info(f"No snapshots found for {table_name}") + except Exception as e: + logger.error(f"Error updating snapshot metrics for {table_name}: {e}") # File and Partition metrics via pyiceberg inspect try: @@ -165,7 +197,4 @@ def update_metrics(): logger.error(f"Could not inspect table {table_name}: {e}") except Exception as e: - logger.error(f"Error updating metrics: {e}") - - except Exception as e: - logger.error(f"Error updating metrics: {e}") + logger.error(f"Error in update_metrics: {e}")