6161# python blog_load.py 500000
6262#
6363# Install:
64- # python -m pip install oracledb pyarrow sqlalchemy pandas
64+ # python -m pip install oracledb pyarrow
6565# Requires python-oracledb 3.4+
6666
6767
68- ensure_packages (oracledb = "3.4.1 " , pandas = "2.2.3 " , pyarrow = "20.0.0 " , sqlalchemy = "2 .0.45 " )
68+ ensure_packages (numpy = "2.2.6 " , cryptography = "45.0.7 " , oracledb = "3.4.2 " , pyarrow = "20 .0.0 " )
6969
7070import csv
7171from datetime import datetime
7272import getpass
7373import os
7474import sys
75+ import tempfile
7576import time
7677
7778import pyarrow .csv
78- from sqlalchemy import create_engine
79- import pandas
8079
8180import oracledb
8281
8382# startup database with
84- # $ podman run --detach --replace --name oracledb -p 1521:1521 -e ORACLE_PWD=graalpy container-registry.oracle.com/database/free:latest
85- USERNAME = 'system'
86- CONNECTSTRING = 'localhost:1521/freepdb1'
87- PASSWORD = "graalpy"
83+ # $ podman run --detach --replace --name oracledb -p 1521:1521 -e ORACLE_PWD=graalpy \
84+ # container-registry.oracle.com/database/free:23.26.0.0
85+ USERNAME = os .environ .get ("PYO_TEST_ADMIN_USER" , "system" )
86+ CONNECTSTRING = os .environ .get ("PYO_TEST_CONNECT_STRING" , "127.0.0.1:1521/FREEPDB1" )
87+ PASSWORD = os .environ .get ("PYO_TEST_ADMIN_PASSWORD" , "graalpy" )
8888
8989# -----------------------------------------------------------------------------
9090
91- FILE_NAME = os .path .join (os .path .dirname (__file__ ), "sample.csv" )
91+ FILE_NAME = os .path .join (tempfile .gettempdir (), "graalpy-c-oracledb-load-sample.csv" )
92+ BATCH_SIZE = 2_000_000
93+ TABLES = ["mytabpya" , "mytabdpl" , "mytabpyaem" , "mytabem" , "mytabpd" ]
9294
93- if (len (sys .argv ) > 1 ):
94- BATCH_SIZE = int (sys .argv [1 ])
95- else :
96- BATCH_SIZE = 2_000_000
95+ def __process_args__ (batch_size = BATCH_SIZE ):
96+ return [int (str (batch_size ).replace ("_" , "" ))]
9797
9898# -----------------------------------------------------------------------------
9999
@@ -148,6 +148,10 @@ def compare(connection, t1, t2):
148148def pd (tab ):
149149 print ("\n Pandas read_csv() - Pandas to_sql()" )
150150
151+ ensure_packages (pandas = "2.2.3" , sqlalchemy = "2.0.45" )
152+ import pandas
153+ from sqlalchemy import create_engine
154+
151155 engine = create_engine (
152156 "oracle+oracledb://@" ,
153157 connect_args = {
@@ -326,7 +330,10 @@ def pya(connection, tab):
326330BLOCK_SIZE = 0
327331CONNECTION = None
328332
329- def __setup__ (* args ):
333+ def __setup__ (batch_size = BATCH_SIZE ):
334+ global BATCH_SIZE
335+ BATCH_SIZE = batch_size
336+
330337 # blog_create.py
331338 #
332339 # christopher.jones@oracle.com, 2025
@@ -356,32 +363,49 @@ def __setup__(*args):
356363
357364 global BLOCK_SIZE , CONNECTION
358365 BLOCK_SIZE = len (max (open (FILE_NAME , 'r' ), key = len )) * BATCH_SIZE
359- CONNECTION = oracledb .connect (user = USERNAME , password = PASSWORD , dsn = CONNECTSTRING )
360-
361-
362- def __benchmark__ (num = 1 ):
363- assert num == 1
364- t1 = "mytabpya"
366+ timeout = float (os .environ .get ("GRAALPY_ORACLEDB_WAIT_TIMEOUT" , "0" ))
367+ deadline = time .monotonic () + timeout
368+ attempt = 0
369+ while True :
370+ try :
371+ CONNECTION = oracledb .connect (user = USERNAME , password = PASSWORD , dsn = CONNECTSTRING )
372+ break
373+ except oracledb .Error :
374+ if time .monotonic () >= deadline :
375+ raise
376+ attempt += 1
377+ print (f"Waiting for Oracle Database at { CONNECTSTRING } (attempt { attempt } )" )
378+ time .sleep (5 )
379+
380+ quiet_seconds = float (os .environ .get ("GRAALPY_ORACLEDB_QUIET_SECONDS" , "0" ))
381+ if quiet_seconds > 0 :
382+ print (f"Waiting { quiet_seconds :g} seconds for Oracle Database to settle" )
383+ time .sleep (quiet_seconds )
384+
385+
386+ def __benchmark__ (batch_size = BATCH_SIZE ):
387+ assert batch_size == BATCH_SIZE
388+ t1 = TABLES [0 ]
365389 createtab (CONNECTION , t1 )
366390 pya (CONNECTION , t1 )
367391 checkrowcount (CONNECTION , t1 )
368392
369- t2 = "mytabdpl"
393+ t2 = TABLES [ 1 ]
370394 # createtab(CONNECTION, t2)
371395 # dpl(CONNECTION, t2)
372396 # checkrowcount(CONNECTION, t2)
373397
374- t3 = "mytabpyaem"
398+ t3 = TABLES [ 2 ]
375399 # createtab(CONNECTION, t3)
376400 # pyaem(CONNECTION, t3)
377401 # checkrowcount(CONNECTION, t3)
378402
379- t4 = "mytabem"
403+ t4 = TABLES [ 3 ]
380404 # createtab(CONNECTION, t4)
381405 # em(CONNECTION, t4)
382406 # checkrowcount(CONNECTION, t4)
383407
384- t5 = "mytabpd"
408+ t5 = TABLES [ 4 ]
385409 # createtab(CONNECTION, t5)
386410 # pd(t5)
387411 # checkrowcount(CONNECTION, t5)
@@ -391,12 +415,23 @@ def __benchmark__(num=1):
391415
392416
393417def __cleanup__ (* args ):
394- droptabs (CONNECTION , [t1 , t2 , t3 , t4 , t5 ])
418+ if CONNECTION is not None :
419+ droptabs (CONNECTION , TABLES )
420+
421+
422+ def __teardown__ ():
423+ global CONNECTION
424+ if CONNECTION is not None :
425+ CONNECTION .close ()
426+ CONNECTION = None
395427
396428
397429if __name__ == "__main__" :
398- __setup__ ()
430+ if len (sys .argv ) > 1 :
431+ BATCH_SIZE = int (sys .argv [1 ])
432+ __setup__ (BATCH_SIZE )
399433 print ("\n Compare end-to-end times for reading a "
400434 "CSV file (number, date, string) in chunks and inserting into the Database" )
401- __benchmark__ (1 )
435+ __benchmark__ (BATCH_SIZE )
402436 __cleanup__ ()
437+ __teardown__ ()
0 commit comments