[GR-71176] Add Oracle DB direct path load macro benchmark

timfel · timfel · commit 44cf9974ea41 · 2026-05-21T08:12:33.000Z
PullRequest: graalpython/4551
diff --git a/ci.jsonnet b/ci.jsonnet
@@ -75,6 +75,47 @@
     local watchdog              = self.watchdog,
     local bench_task(bench=null, benchmarks=BENCHMARKS) = super.bench_task(bench=bench, benchmarks=benchmarks),
     local bisect_bench_task     = self.bisect_bench_task,
+    local oracledb_free_image    = "container-registry.oracle.com/database/free:23.26.0.0",
+    local oracledb_extra_index_urls = std.join(" ", [
+        "https://ol-graal.oraclecorp.com/mt_data/graalpy-25.0-repository/",
+        "https://artifactory.oci.oraclecorp.com/api/pypi/graalpy-wheels-internal-patches-dev-pypi-local/simple",
+        $.overlay_imports.PIP_EXTRA_INDEX_URL,
+    ]),
+    local oracledb_bench_env     = task_spec({
+        cacheVenv: false,
+        capabilities +: ["pinp"],
+        environment +: {
+            GRAALPY_ORACLEDB_QUIET_SECONDS: "60",
+            GRAALPY_ORACLEDB_WAIT_TIMEOUT: "600",
+            PYO_TEST_ADMIN_PASSWORD: "graalpy",
+            PYO_TEST_ADMIN_USER: "SYSTEM",
+            PYO_TEST_CONNECT_STRING: "127.0.0.1:1521/FREEPDB1",
+            PIP_EXTRA_INDEX_URL: oracledb_extra_index_urls,
+            PIP_ONLY_BINARY: ":all:",
+        },
+        setup: [
+            [
+                "podman", "run",
+                "--detach",
+                "--replace",
+                "--name", "graalpy-oracledb",
+                "-p", "1521:1521",
+                "-e", "ORACLE_PWD=graalpy",
+                oracledb_free_image,
+            ],
+        ] + super.setup,
+        teardown +: [
+            ["podman", "rm", "--force", "graalpy-oracledb"],
+        ],
+        evaluate_late +:: {
+            z_oracledb_podman: function(builder) {
+                docker: {
+                    image: "buildslave_ol8_podman_rootless",
+                    mount_modules: true,
+                },
+            },
+        },
+    }),
 
     local native_debug_build_env = task_spec({
        environment +: {
@@ -310,6 +351,10 @@
             "vm_name:pypy"                                              : {"linux:amd64:jdk-latest" : on_demand    + t("04:00:00")},
         }),
         for bench in ["micro", "meso", "macro"]
+    } + {
+        "macro_oracledb": bench_task("macro_oracledb") + platform_spec(no_jobs) + bench_variants({
+            "vm_name:graalpython_enterprise"                            : {"linux:amd64:jdk-latest" : on_demand  + t("02:00:00")},
+        }) + oracledb_bench_env,
     } + {
         [bench]: bench_task(bench) + platform_spec(no_jobs) + bench_variants({
             "vm_name:graalvm_ee_default"                                : {"linux:amd64:jdk-latest" : post_merge + t("08:00:00") + need_pgo},
diff --git a/ci/python-bench.libsonnet b/ci/python-bench.libsonnet
@@ -27,7 +27,8 @@
         micro: "micro-graalpython:*",
         micro_native: "micro-native-graalpython:*",
         meso: "meso-graalpython:*",
-        macro: "macro-graalpython:*",
+        macro: "macro-graalpython:~c-oracledb-load",
+        macro_oracledb: "macro-graalpython:c-oracledb-load",
         interop: "interop-graalpython:*",
         warmup: "python-warmup-graalpython:*",
         micro_small: "micro-small-graalpython:*",
diff --git a/graalpython/com.oracle.graal.python.benchmarks/python/macro/c-oracledb-load.py b/graalpython/com.oracle.graal.python.benchmarks/python/macro/c-oracledb-load.py
@@ -61,39 +61,39 @@
 #  python blog_load.py 500000
 #
 # Install:
-#   python -m pip install oracledb pyarrow sqlalchemy pandas
+#   python -m pip install oracledb pyarrow
 # Requires python-oracledb 3.4+
 
 
-ensure_packages(oracledb="3.4.1", pandas="2.2.3", pyarrow="20.0.0", sqlalchemy="2.0.45")
+ensure_packages(numpy="2.2.6", cryptography="45.0.7", oracledb="3.4.2", pyarrow="20.0.0")
 
 import csv
 from datetime import datetime
 import getpass
 import os
 import sys
+import tempfile
 import time
 
 import pyarrow.csv
-from sqlalchemy import create_engine
-import pandas
 
 import oracledb
 
 # startup database with
-# $ podman run --detach --replace --name oracledb -p 1521:1521 -e ORACLE_PWD=graalpy  container-registry.oracle.com/database/free:latest
-USERNAME = 'system'
-CONNECTSTRING = 'localhost:1521/freepdb1'
-PASSWORD = "graalpy"
+# $ podman run --detach --replace --name oracledb -p 1521:1521 -e ORACLE_PWD=graalpy \
+#       container-registry.oracle.com/database/free:23.26.0.0
+USERNAME = os.environ.get("PYO_TEST_ADMIN_USER", "system")
+CONNECTSTRING = os.environ.get("PYO_TEST_CONNECT_STRING", "127.0.0.1:1521/FREEPDB1")
+PASSWORD = os.environ.get("PYO_TEST_ADMIN_PASSWORD", "graalpy")
 
 # -----------------------------------------------------------------------------
 
-FILE_NAME = os.path.join(os.path.dirname(__file__), "sample.csv")
+FILE_NAME = os.path.join(tempfile.gettempdir(), "graalpy-c-oracledb-load-sample.csv")
+BATCH_SIZE = 2_000_000
+TABLES = ["mytabpya", "mytabdpl", "mytabpyaem", "mytabem", "mytabpd"]
 
-if (len(sys.argv) > 1):
-    BATCH_SIZE = int(sys.argv[1])
-else:
-    BATCH_SIZE = 2_000_000
+def __process_args__(batch_size=BATCH_SIZE):
+    return [int(str(batch_size).replace("_", ""))]
 
 # -----------------------------------------------------------------------------
 
@@ -148,6 +148,10 @@ def compare(connection, t1, t2):
 def pd(tab):
     print("\nPandas read_csv() - Pandas to_sql()")
 
+    ensure_packages(pandas="2.2.3", sqlalchemy="2.0.45")
+    import pandas
+    from sqlalchemy import create_engine
+
     engine = create_engine(
         "oracle+oracledb://@",
         connect_args={
@@ -326,7 +330,10 @@ def pya(connection, tab):
 BLOCK_SIZE = 0
 CONNECTION = None
 
-def __setup__(*args):
+def __setup__(batch_size=BATCH_SIZE):
+    global BATCH_SIZE
+    BATCH_SIZE = batch_size
+
     # blog_create.py
     #
     # christopher.jones@oracle.com, 2025
@@ -356,32 +363,49 @@ def __setup__(*args):
 
     global BLOCK_SIZE, CONNECTION
     BLOCK_SIZE = len(max(open(FILE_NAME, 'r'), key=len)) * BATCH_SIZE
-    CONNECTION = oracledb.connect(user=USERNAME, password=PASSWORD, dsn=CONNECTSTRING)
-
-
-def __benchmark__(num=1):
-    assert num == 1
-    t1 = "mytabpya"
+    timeout = float(os.environ.get("GRAALPY_ORACLEDB_WAIT_TIMEOUT", "0"))
+    deadline = time.monotonic() + timeout
+    attempt = 0
+    while True:
+        try:
+            CONNECTION = oracledb.connect(user=USERNAME, password=PASSWORD, dsn=CONNECTSTRING)
+            break
+        except oracledb.Error:
+            if time.monotonic() >= deadline:
+                raise
+            attempt += 1
+            print(f"Waiting for Oracle Database at {CONNECTSTRING} (attempt {attempt})")
+            time.sleep(5)
+
+    quiet_seconds = float(os.environ.get("GRAALPY_ORACLEDB_QUIET_SECONDS", "0"))
+    if quiet_seconds > 0:
+        print(f"Waiting {quiet_seconds:g} seconds for Oracle Database to settle")
+        time.sleep(quiet_seconds)
+
+
+def __benchmark__(batch_size=BATCH_SIZE):
+    assert batch_size == BATCH_SIZE
+    t1 = TABLES[0]
     createtab(CONNECTION, t1)
     pya(CONNECTION, t1)
     checkrowcount(CONNECTION, t1)
 
-    t2 = "mytabdpl"
+    t2 = TABLES[1]
     # createtab(CONNECTION, t2)
     # dpl(CONNECTION, t2)
     # checkrowcount(CONNECTION, t2)
 
-    t3 = "mytabpyaem"
+    t3 = TABLES[2]
     # createtab(CONNECTION, t3)
     # pyaem(CONNECTION, t3)
     # checkrowcount(CONNECTION, t3)
 
-    t4 = "mytabem"
+    t4 = TABLES[3]
     # createtab(CONNECTION, t4)
     # em(CONNECTION, t4)
     # checkrowcount(CONNECTION, t4)
 
-    t5 = "mytabpd"
+    t5 = TABLES[4]
     # createtab(CONNECTION, t5)
     # pd(t5)
     # checkrowcount(CONNECTION, t5)
@@ -391,12 +415,23 @@ def __benchmark__(num=1):
 
 
 def __cleanup__(*args):
-    droptabs(CONNECTION, [t1, t2, t3, t4, t5])
+    if CONNECTION is not None:
+        droptabs(CONNECTION, TABLES)
+
+
+def __teardown__():
+    global CONNECTION
+    if CONNECTION is not None:
+        CONNECTION.close()
+        CONNECTION = None
 
 
 if __name__ == "__main__":
-    __setup__()
+    if len(sys.argv) > 1:
+        BATCH_SIZE = int(sys.argv[1])
+    __setup__(BATCH_SIZE)
     print("\nCompare end-to-end times for reading a "
           "CSV file (number, date, string) in chunks and inserting into the Database")
-    __benchmark__(1)
+    __benchmark__(BATCH_SIZE)
     __cleanup__()
+    __teardown__()
diff --git a/mx.graalpython/mx_graalpython_bench_param.py b/mx.graalpython/mx_graalpython_bench_param.py
@@ -306,6 +306,7 @@ def _pickling_benchmarks(module='pickle'):
     'gcbench': ITER_10 + ['10'],
     'c-pydantic-validate': ITER_10 + ['200000'],
     'c-pymupdf-parse': ITER_10 + ['1'],
+    'c-oracledb-load': ITER_5 + ['2000000'],
 }
 
 

Original file line number	Diff line number	Diff line change
`@@ -306,6 +306,7 @@ def _pickling_benchmarks(module='pickle'):`
`306`	`306`	`'gcbench': ITER_10 + ['10'],`
`307`	`307`	`'c-pydantic-validate': ITER_10 + ['200000'],`
`308`	`308`	`'c-pymupdf-parse': ITER_10 + ['1'],`
	`309`	`+ 'c-oracledb-load': ITER_5 + ['2000000'],`
`309`	`310`	`}`
`310`	`311`
`311`	`312`