From 0333e0b7ec44cf0c0b45c28ecf039e4ed6b3f8f9 Mon Sep 17 00:00:00 2001 From: Harish Kotra Date: Tue, 14 Apr 2026 19:49:27 +0530 Subject: [PATCH 1/5] Fixes #27348: handle Pinot DOUBLE mapping across SQLAlchemy versions --- .../ingestion/source/database/pinotdb/metadata.py | 8 +++++++- ingestion/tests/unit/topology/database/test_pinotdb.py | 7 +++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/ingestion/src/metadata/ingestion/source/database/pinotdb/metadata.py b/ingestion/src/metadata/ingestion/source/database/pinotdb/metadata.py index a8829cabcb2f..3156b5cd419b 100644 --- a/ingestion/src/metadata/ingestion/source/database/pinotdb/metadata.py +++ b/ingestion/src/metadata/ingestion/source/database/pinotdb/metadata.py @@ -13,6 +13,7 @@ from pinotdb import sqlalchemy as pinot_sqlalchemy from sqlalchemy import types +from sqlalchemy.sql import sqltypes from metadata.generated.schema.entity.services.connections.database.pinotDBConnection import ( PinotDBConnection, @@ -26,11 +27,16 @@ def get_type_custom(data_type, field_size): + # SQLAlchemy 1.4 does not expose DOUBLE in sqlalchemy.types, but + # pinotdb returns "double". Prefer DOUBLE when available, then fall back + # to sqltypes.DOUBLE, and finally Float to avoid runtime crashes. + double_type = getattr(types, "DOUBLE", getattr(sqltypes, "DOUBLE", types.Float)) + type_map = { "int": types.BigInteger, "long": types.BigInteger, "float": types.Float, - "double": types.DOUBLE, + "double": double_type, # BOOLEAN, is added after release 0.7.1. # In release 0.7.1 and older releases, BOOLEAN is equivalent to STRING. "boolean": types.Boolean, diff --git a/ingestion/tests/unit/topology/database/test_pinotdb.py b/ingestion/tests/unit/topology/database/test_pinotdb.py index 4938d4651785..562d5fdbf3a7 100644 --- a/ingestion/tests/unit/topology/database/test_pinotdb.py +++ b/ingestion/tests/unit/topology/database/test_pinotdb.py @@ -33,7 +33,6 @@ def _resolve(pinot_type: str) -> str: @pytest.mark.parametrize( "pinot_type, expected_om_type", [ - ("double", "DOUBLE"), ("float", "FLOAT"), ("int", "BIGINT"), ("long", "BIGINT"), @@ -49,8 +48,8 @@ def test_pinot_type_mapping(pinot_type, expected_om_type): assert _resolve(pinot_type) == expected_om_type -def test_double_not_mapped_to_int(): - """Explicit regression test: Pinot DOUBLE must never resolve to INT.""" +def test_double_mapping_is_supported_and_not_integer(): + """Pinot double must map to a floating-point type across SQLAlchemy versions.""" result = _resolve("double") + assert result in {"DOUBLE", "FLOAT"} assert result != "INT", "Pinot DOUBLE is incorrectly mapped to INT" - assert result == "DOUBLE" From 2578139d5befc88729e4d27372b9cd024eee7c4a Mon Sep 17 00:00:00 2001 From: Harish Kotra Date: Tue, 14 Apr 2026 23:17:52 +0530 Subject: [PATCH 2/5] Apply review suggestions for Pinot DOUBLE mapping PR --- .../ingestion/source/database/pinotdb/metadata.py | 13 +++++++------ .../tests/unit/topology/database/test_pinotdb.py | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/ingestion/src/metadata/ingestion/source/database/pinotdb/metadata.py b/ingestion/src/metadata/ingestion/source/database/pinotdb/metadata.py index 3156b5cd419b..c5d4cc5ee8d7 100644 --- a/ingestion/src/metadata/ingestion/source/database/pinotdb/metadata.py +++ b/ingestion/src/metadata/ingestion/source/database/pinotdb/metadata.py @@ -26,17 +26,18 @@ from metadata.ingestion.source.database.common_db_source import CommonDbSourceService -def get_type_custom(data_type, field_size): - # SQLAlchemy 1.4 does not expose DOUBLE in sqlalchemy.types, but - # pinotdb returns "double". Prefer DOUBLE when available, then fall back - # to sqltypes.DOUBLE, and finally Float to avoid runtime crashes. - double_type = getattr(types, "DOUBLE", getattr(sqltypes, "DOUBLE", types.Float)) +DOUBLE_TYPE = getattr(types, "DOUBLE", getattr(sqltypes, "DOUBLE", types.Float)) + +def get_type_custom(data_type, field_size): type_map = { "int": types.BigInteger, "long": types.BigInteger, "float": types.Float, - "double": double_type, + # SQLAlchemy 1.4 does not expose DOUBLE in sqlalchemy.types, but + # pinotdb returns "double". Prefer DOUBLE when available, then fall back + # to sqltypes.DOUBLE, and finally Float to avoid runtime crashes. + "double": DOUBLE_TYPE, # BOOLEAN, is added after release 0.7.1. # In release 0.7.1 and older releases, BOOLEAN is equivalent to STRING. "boolean": types.Boolean, diff --git a/ingestion/tests/unit/topology/database/test_pinotdb.py b/ingestion/tests/unit/topology/database/test_pinotdb.py index 562d5fdbf3a7..7270d0619d47 100644 --- a/ingestion/tests/unit/topology/database/test_pinotdb.py +++ b/ingestion/tests/unit/topology/database/test_pinotdb.py @@ -51,5 +51,5 @@ def test_pinot_type_mapping(pinot_type, expected_om_type): def test_double_mapping_is_supported_and_not_integer(): """Pinot double must map to a floating-point type across SQLAlchemy versions.""" result = _resolve("double") - assert result in {"DOUBLE", "FLOAT"} assert result != "INT", "Pinot DOUBLE is incorrectly mapped to INT" + assert result in {"DOUBLE", "FLOAT"} From 8c4b88ccf65072113d58f696e4b26af81711c4fc Mon Sep 17 00:00:00 2001 From: Harish Kotra Date: Tue, 14 Apr 2026 19:55:53 +0530 Subject: [PATCH 3/5] Add scheduled workflow to sync fork main with upstream --- .github/workflows/sync-fork-main.yml | 39 ++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 .github/workflows/sync-fork-main.yml diff --git a/.github/workflows/sync-fork-main.yml b/.github/workflows/sync-fork-main.yml new file mode 100644 index 000000000000..f0e6c9fea794 --- /dev/null +++ b/.github/workflows/sync-fork-main.yml @@ -0,0 +1,39 @@ +name: Sync Fork Main + +on: + schedule: + - cron: "15 * * * *" + workflow_dispatch: + +permissions: + contents: write + +jobs: + sync: + runs-on: ubuntu-latest + steps: + - name: Checkout fork main + uses: actions/checkout@v4 + with: + ref: main + fetch-depth: 0 + + - name: Configure git + run: | + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + + - name: Fetch upstream main + run: | + git remote add upstream https://github.com/open-metadata/OpenMetadata.git || true + git fetch upstream main + + - name: Merge upstream into fork main + run: | + if git merge-base --is-ancestor upstream/main HEAD; then + echo "Already up to date with upstream/main" + exit 0 + fi + + git merge --no-edit upstream/main + git push origin HEAD:main From a65568c9c578c3b186efb97c9c2b9be47e69e92b Mon Sep 17 00:00:00 2001 From: Harish Kotra Date: Tue, 14 Apr 2026 23:32:58 +0530 Subject: [PATCH 4/5] Remove fork upstream sync workflow --- .github/workflows/sync-fork-main.yml | 39 ---------------------------- 1 file changed, 39 deletions(-) delete mode 100644 .github/workflows/sync-fork-main.yml diff --git a/.github/workflows/sync-fork-main.yml b/.github/workflows/sync-fork-main.yml deleted file mode 100644 index f0e6c9fea794..000000000000 --- a/.github/workflows/sync-fork-main.yml +++ /dev/null @@ -1,39 +0,0 @@ -name: Sync Fork Main - -on: - schedule: - - cron: "15 * * * *" - workflow_dispatch: - -permissions: - contents: write - -jobs: - sync: - runs-on: ubuntu-latest - steps: - - name: Checkout fork main - uses: actions/checkout@v4 - with: - ref: main - fetch-depth: 0 - - - name: Configure git - run: | - git config user.name "github-actions[bot]" - git config user.email "41898282+github-actions[bot]@users.noreply.github.com" - - - name: Fetch upstream main - run: | - git remote add upstream https://github.com/open-metadata/OpenMetadata.git || true - git fetch upstream main - - - name: Merge upstream into fork main - run: | - if git merge-base --is-ancestor upstream/main HEAD; then - echo "Already up to date with upstream/main" - exit 0 - fi - - git merge --no-edit upstream/main - git push origin HEAD:main From 44945689dc1d0710db3ae5f9f970396a67b77e0a Mon Sep 17 00:00:00 2001 From: Harish Kotra Date: Thu, 16 Apr 2026 12:59:15 +0530 Subject: [PATCH 5/5] Apply Python formatting for Pinot fix branch --- .../src/metadata/ingestion/source/database/pinotdb/metadata.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ingestion/src/metadata/ingestion/source/database/pinotdb/metadata.py b/ingestion/src/metadata/ingestion/source/database/pinotdb/metadata.py index c5d4cc5ee8d7..40ab7692e93c 100644 --- a/ingestion/src/metadata/ingestion/source/database/pinotdb/metadata.py +++ b/ingestion/src/metadata/ingestion/source/database/pinotdb/metadata.py @@ -25,7 +25,6 @@ from metadata.ingestion.ometa.ometa_api import OpenMetadata from metadata.ingestion.source.database.common_db_source import CommonDbSourceService - DOUBLE_TYPE = getattr(types, "DOUBLE", getattr(sqltypes, "DOUBLE", types.Float))