Skip to content

Commit 44cf997

Browse files
committed
[GR-71176] Add Oracle DB direct path load macro benchmark
PullRequest: graalpython/4551
2 parents 21df2c0 + 82f6417 commit 44cf997

4 files changed

Lines changed: 110 additions & 28 deletions

File tree

ci.jsonnet

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,47 @@
7575
local watchdog = self.watchdog,
7676
local bench_task(bench=null, benchmarks=BENCHMARKS) = super.bench_task(bench=bench, benchmarks=benchmarks),
7777
local bisect_bench_task = self.bisect_bench_task,
78+
local oracledb_free_image = "container-registry.oracle.com/database/free:23.26.0.0",
79+
local oracledb_extra_index_urls = std.join(" ", [
80+
"https://ol-graal.oraclecorp.com/mt_data/graalpy-25.0-repository/",
81+
"https://artifactory.oci.oraclecorp.com/api/pypi/graalpy-wheels-internal-patches-dev-pypi-local/simple",
82+
$.overlay_imports.PIP_EXTRA_INDEX_URL,
83+
]),
84+
local oracledb_bench_env = task_spec({
85+
cacheVenv: false,
86+
capabilities +: ["pinp"],
87+
environment +: {
88+
GRAALPY_ORACLEDB_QUIET_SECONDS: "60",
89+
GRAALPY_ORACLEDB_WAIT_TIMEOUT: "600",
90+
PYO_TEST_ADMIN_PASSWORD: "graalpy",
91+
PYO_TEST_ADMIN_USER: "SYSTEM",
92+
PYO_TEST_CONNECT_STRING: "127.0.0.1:1521/FREEPDB1",
93+
PIP_EXTRA_INDEX_URL: oracledb_extra_index_urls,
94+
PIP_ONLY_BINARY: ":all:",
95+
},
96+
setup: [
97+
[
98+
"podman", "run",
99+
"--detach",
100+
"--replace",
101+
"--name", "graalpy-oracledb",
102+
"-p", "1521:1521",
103+
"-e", "ORACLE_PWD=graalpy",
104+
oracledb_free_image,
105+
],
106+
] + super.setup,
107+
teardown +: [
108+
["podman", "rm", "--force", "graalpy-oracledb"],
109+
],
110+
evaluate_late +:: {
111+
z_oracledb_podman: function(builder) {
112+
docker: {
113+
image: "buildslave_ol8_podman_rootless",
114+
mount_modules: true,
115+
},
116+
},
117+
},
118+
}),
78119

79120
local native_debug_build_env = task_spec({
80121
environment +: {
@@ -310,6 +351,10 @@
310351
"vm_name:pypy" : {"linux:amd64:jdk-latest" : on_demand + t("04:00:00")},
311352
}),
312353
for bench in ["micro", "meso", "macro"]
354+
} + {
355+
"macro_oracledb": bench_task("macro_oracledb") + platform_spec(no_jobs) + bench_variants({
356+
"vm_name:graalpython_enterprise" : {"linux:amd64:jdk-latest" : on_demand + t("02:00:00")},
357+
}) + oracledb_bench_env,
313358
} + {
314359
[bench]: bench_task(bench) + platform_spec(no_jobs) + bench_variants({
315360
"vm_name:graalvm_ee_default" : {"linux:amd64:jdk-latest" : post_merge + t("08:00:00") + need_pgo},

ci/python-bench.libsonnet

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@
2727
micro: "micro-graalpython:*",
2828
micro_native: "micro-native-graalpython:*",
2929
meso: "meso-graalpython:*",
30-
macro: "macro-graalpython:*",
30+
macro: "macro-graalpython:~c-oracledb-load",
31+
macro_oracledb: "macro-graalpython:c-oracledb-load",
3132
interop: "interop-graalpython:*",
3233
warmup: "python-warmup-graalpython:*",
3334
micro_small: "micro-small-graalpython:*",

graalpython/com.oracle.graal.python.benchmarks/python/macro/c-oracledb-load.py

Lines changed: 62 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -61,39 +61,39 @@
6161
# python blog_load.py 500000
6262
#
6363
# Install:
64-
# python -m pip install oracledb pyarrow sqlalchemy pandas
64+
# python -m pip install oracledb pyarrow
6565
# Requires python-oracledb 3.4+
6666

6767

68-
ensure_packages(oracledb="3.4.1", pandas="2.2.3", pyarrow="20.0.0", sqlalchemy="2.0.45")
68+
ensure_packages(numpy="2.2.6", cryptography="45.0.7", oracledb="3.4.2", pyarrow="20.0.0")
6969

7070
import csv
7171
from datetime import datetime
7272
import getpass
7373
import os
7474
import sys
75+
import tempfile
7576
import time
7677

7778
import pyarrow.csv
78-
from sqlalchemy import create_engine
79-
import pandas
8079

8180
import oracledb
8281

8382
# startup database with
84-
# $ podman run --detach --replace --name oracledb -p 1521:1521 -e ORACLE_PWD=graalpy container-registry.oracle.com/database/free:latest
85-
USERNAME = 'system'
86-
CONNECTSTRING = 'localhost:1521/freepdb1'
87-
PASSWORD = "graalpy"
83+
# $ podman run --detach --replace --name oracledb -p 1521:1521 -e ORACLE_PWD=graalpy \
84+
# container-registry.oracle.com/database/free:23.26.0.0
85+
USERNAME = os.environ.get("PYO_TEST_ADMIN_USER", "system")
86+
CONNECTSTRING = os.environ.get("PYO_TEST_CONNECT_STRING", "127.0.0.1:1521/FREEPDB1")
87+
PASSWORD = os.environ.get("PYO_TEST_ADMIN_PASSWORD", "graalpy")
8888

8989
# -----------------------------------------------------------------------------
9090

91-
FILE_NAME = os.path.join(os.path.dirname(__file__), "sample.csv")
91+
FILE_NAME = os.path.join(tempfile.gettempdir(), "graalpy-c-oracledb-load-sample.csv")
92+
BATCH_SIZE = 2_000_000
93+
TABLES = ["mytabpya", "mytabdpl", "mytabpyaem", "mytabem", "mytabpd"]
9294

93-
if (len(sys.argv) > 1):
94-
BATCH_SIZE = int(sys.argv[1])
95-
else:
96-
BATCH_SIZE = 2_000_000
95+
def __process_args__(batch_size=BATCH_SIZE):
96+
return [int(str(batch_size).replace("_", ""))]
9797

9898
# -----------------------------------------------------------------------------
9999

@@ -148,6 +148,10 @@ def compare(connection, t1, t2):
148148
def pd(tab):
149149
print("\nPandas read_csv() - Pandas to_sql()")
150150

151+
ensure_packages(pandas="2.2.3", sqlalchemy="2.0.45")
152+
import pandas
153+
from sqlalchemy import create_engine
154+
151155
engine = create_engine(
152156
"oracle+oracledb://@",
153157
connect_args={
@@ -326,7 +330,10 @@ def pya(connection, tab):
326330
BLOCK_SIZE = 0
327331
CONNECTION = None
328332

329-
def __setup__(*args):
333+
def __setup__(batch_size=BATCH_SIZE):
334+
global BATCH_SIZE
335+
BATCH_SIZE = batch_size
336+
330337
# blog_create.py
331338
#
332339
# christopher.jones@oracle.com, 2025
@@ -356,32 +363,49 @@ def __setup__(*args):
356363

357364
global BLOCK_SIZE, CONNECTION
358365
BLOCK_SIZE = len(max(open(FILE_NAME, 'r'), key=len)) * BATCH_SIZE
359-
CONNECTION = oracledb.connect(user=USERNAME, password=PASSWORD, dsn=CONNECTSTRING)
360-
361-
362-
def __benchmark__(num=1):
363-
assert num == 1
364-
t1 = "mytabpya"
366+
timeout = float(os.environ.get("GRAALPY_ORACLEDB_WAIT_TIMEOUT", "0"))
367+
deadline = time.monotonic() + timeout
368+
attempt = 0
369+
while True:
370+
try:
371+
CONNECTION = oracledb.connect(user=USERNAME, password=PASSWORD, dsn=CONNECTSTRING)
372+
break
373+
except oracledb.Error:
374+
if time.monotonic() >= deadline:
375+
raise
376+
attempt += 1
377+
print(f"Waiting for Oracle Database at {CONNECTSTRING} (attempt {attempt})")
378+
time.sleep(5)
379+
380+
quiet_seconds = float(os.environ.get("GRAALPY_ORACLEDB_QUIET_SECONDS", "0"))
381+
if quiet_seconds > 0:
382+
print(f"Waiting {quiet_seconds:g} seconds for Oracle Database to settle")
383+
time.sleep(quiet_seconds)
384+
385+
386+
def __benchmark__(batch_size=BATCH_SIZE):
387+
assert batch_size == BATCH_SIZE
388+
t1 = TABLES[0]
365389
createtab(CONNECTION, t1)
366390
pya(CONNECTION, t1)
367391
checkrowcount(CONNECTION, t1)
368392

369-
t2 = "mytabdpl"
393+
t2 = TABLES[1]
370394
# createtab(CONNECTION, t2)
371395
# dpl(CONNECTION, t2)
372396
# checkrowcount(CONNECTION, t2)
373397

374-
t3 = "mytabpyaem"
398+
t3 = TABLES[2]
375399
# createtab(CONNECTION, t3)
376400
# pyaem(CONNECTION, t3)
377401
# checkrowcount(CONNECTION, t3)
378402

379-
t4 = "mytabem"
403+
t4 = TABLES[3]
380404
# createtab(CONNECTION, t4)
381405
# em(CONNECTION, t4)
382406
# checkrowcount(CONNECTION, t4)
383407

384-
t5 = "mytabpd"
408+
t5 = TABLES[4]
385409
# createtab(CONNECTION, t5)
386410
# pd(t5)
387411
# checkrowcount(CONNECTION, t5)
@@ -391,12 +415,23 @@ def __benchmark__(num=1):
391415

392416

393417
def __cleanup__(*args):
394-
droptabs(CONNECTION, [t1, t2, t3, t4, t5])
418+
if CONNECTION is not None:
419+
droptabs(CONNECTION, TABLES)
420+
421+
422+
def __teardown__():
423+
global CONNECTION
424+
if CONNECTION is not None:
425+
CONNECTION.close()
426+
CONNECTION = None
395427

396428

397429
if __name__ == "__main__":
398-
__setup__()
430+
if len(sys.argv) > 1:
431+
BATCH_SIZE = int(sys.argv[1])
432+
__setup__(BATCH_SIZE)
399433
print("\nCompare end-to-end times for reading a "
400434
"CSV file (number, date, string) in chunks and inserting into the Database")
401-
__benchmark__(1)
435+
__benchmark__(BATCH_SIZE)
402436
__cleanup__()
437+
__teardown__()

mx.graalpython/mx_graalpython_bench_param.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,7 @@ def _pickling_benchmarks(module='pickle'):
306306
'gcbench': ITER_10 + ['10'],
307307
'c-pydantic-validate': ITER_10 + ['200000'],
308308
'c-pymupdf-parse': ITER_10 + ['1'],
309+
'c-oracledb-load': ITER_5 + ['2000000'],
309310
}
310311

311312

0 commit comments

Comments
 (0)