Skip to content

Commit 8ba192a

Browse files
committed
Implement bulk copy support
Closes #350
1 parent cfe0575 commit 8ba192a

10 files changed

Lines changed: 513 additions & 2 deletions

File tree

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,3 +62,7 @@ xxx_*
6262
# The Access unit tests copy empty.accdb and empty.mdb to these names and use them.
6363
test.accdb
6464
test.mdb
65+
66+
# Don't commit bcp test files.
67+
test*.bcp
68+
bcp.errors

src/bcp.cpp

Lines changed: 289 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,289 @@
1+
// Implementation for the Connection.bcp() method.
2+
3+
#include "pyodbc.h"
4+
#include "wrapper.h"
5+
#include "pyodbcmodule.h"
6+
#include "textenc.h"
7+
#include "connection.h"
8+
#include "errors.h"
9+
#include "bcp.h"
10+
11+
#ifdef _MSC_VER
12+
#include <psapi.h>
13+
#define WINAPI_OR_CDECL WINAPI
14+
typedef FARPROC _BCP_FUNC;
15+
#else
16+
#include <dlfcn.h>
17+
#define WINAPI_OR_CDECL /* nothing */
18+
typedef void* _BCP_FUNC;
19+
#endif
20+
21+
#define BCP_DEBUG 0
22+
23+
// ODBC BCP constants.
24+
#define FAIL 0
25+
#define SUCCEED 1
26+
#define DB_IN 1
27+
#define DB_OUT 2
28+
#define BCPMAXERRS 1 // Sets max errors allowed
29+
#define BCPFIRST 2 // Sets first row to be copied out
30+
#define BCPLAST 3 // Sets number of rows to be copied out
31+
32+
// Signatures for BCP calls.
33+
typedef int (WINAPI_OR_CDECL *_BCP_INIT)(HDBC, SQLWCHAR*, SQLWCHAR*, SQLWCHAR*, int);
34+
typedef int (WINAPI_OR_CDECL *_BCP_READFMT)(HDBC, SQLWCHAR*);
35+
typedef int (WINAPI_OR_CDECL *_BCP_EXEC)(HDBC, long*);
36+
typedef int (WINAPI_OR_CDECL *_BCP_CONTROL)(HDBC, int, void*);
37+
38+
// BCP functions
39+
static _BCP_INIT bcp_init = 0;
40+
static _BCP_READFMT bcp_readfmt = 0;
41+
static _BCP_EXEC bcp_exec = 0;
42+
static _BCP_CONTROL bcp_control = 0;
43+
44+
45+
#ifdef __linux__
46+
// Load the driver's library.
47+
static void* _bcplib;
48+
static void* _load_bcplib(HDBC hdbc)
49+
{
50+
// Ask the DM for the file name of the driver's library.
51+
char name[1024];
52+
SQLSMALLINT cch;
53+
SQLRETURN rc = SQLGetInfo(hdbc, SQL_DRIVER_NAME, name, sizeof name, &cch);
54+
if (rc != SQL_SUCCESS)
55+
return NULL;
56+
57+
// If we're lucky, that's all we need.
58+
void *handle = dlopen(name, RTLD_NOLOAD | RTLD_LAZY);
59+
if (handle)
60+
return handle;
61+
62+
// As we really expected, we need the full path for the library.
63+
FILE *fp = fopen("/proc/self/maps", "r");
64+
if (!fp)
65+
return NULL;
66+
char line[1024];
67+
while (fgets(line, sizeof line, fp)) {
68+
char *path = strchr(line, '/');
69+
if (path) {
70+
path[strcspn(path, "\r\n")] = '\0';
71+
char *base = strrchr(path, '/');
72+
base = base ? base + 1 : path;
73+
if (strcmp(base, name) == 0) {
74+
handle = dlopen(path, RTLD_NOLOAD | RTLD_LAZY);
75+
break;
76+
}
77+
}
78+
}
79+
fclose(fp);
80+
return handle;
81+
}
82+
#endif
83+
84+
// Find one of the bcp API functions; different strategies for each platform.
85+
static _BCP_FUNC _find_bcp_function(char* name)
86+
{
87+
#ifdef _WIN32
88+
static size_t count;
89+
static HMODULE mods[256];
90+
static HMODULE odbclib;
91+
if (!count) {
92+
DWORD needed = 0;
93+
if (!EnumProcessModules(GetCurrentProcess(), mods, sizeof(mods), &needed))
94+
return 0;
95+
count = needed / sizeof(HMODULE);
96+
}
97+
if (odbclib)
98+
return GetProcAddress(odbclib, name);
99+
for (size_t i = 0; i < count; ++i) {
100+
_BCP_FUNC func = GetProcAddress(mods[i], name);
101+
if (func) {
102+
odbclib = mods[i];
103+
return func;
104+
}
105+
}
106+
return 0;
107+
#else
108+
#ifdef __linux__
109+
return _bcplib ? dlsym(_bcplib, name) : NULL;
110+
#else
111+
return dlsym(RTLD_DEFAULT, name);
112+
#endif
113+
#endif
114+
}
115+
116+
// Dynamically locate the bcp API functions we need (returns false if we can't find them).
117+
static bool _load_bcp_functions(HDBC hdbc)
118+
{
119+
if (!hdbc) // only really needed for Linux, but this keeps the compiler happy :)
120+
return false;
121+
#ifdef __linux__
122+
_bcplib = _load_bcplib(hdbc);
123+
if (!_bcplib)
124+
return false;
125+
#endif
126+
bcp_init = (_BCP_INIT) _find_bcp_function("bcp_initW");
127+
bcp_readfmt = (_BCP_READFMT)_find_bcp_function("bcp_readfmtW");
128+
bcp_exec = (_BCP_EXEC) _find_bcp_function("bcp_exec");
129+
bcp_control = (_BCP_CONTROL)_find_bcp_function("bcp_control");
130+
#ifdef __linux__
131+
dlclose(_bcplib); // just releases the handle, doesn't unload
132+
_bcplib = NULL;
133+
#endif
134+
return bcp_init && bcp_readfmt && bcp_exec && bcp_control;
135+
}
136+
137+
// Apply a control option if the user provided a value, returning false on failure.
138+
bool _apply_int_option(Connection* conn, PyObject* value, int option, const char* name)
139+
{
140+
if (value == Py_None)
141+
return true;
142+
if (!PyLong_Check(value)) {
143+
PyErr_Format(ProgrammingError, "%s must be an integer", name);
144+
return false;
145+
}
146+
long intval = PyLong_AsLong(value);
147+
if (bcp_control(conn->hdbc, option, (void*)intval) != SUCCEED) {
148+
PyErr_Format(OperationalError, "failure setting %s", name);
149+
return false;
150+
}
151+
return true;
152+
}
153+
154+
// Prepare and execute a bcp operation.
155+
PyObject* _bcp_impl(PyObject* py_conn, const BCP_OPTS& opts)
156+
{
157+
int bcp_rc = SUCCEED;
158+
long row_count = 0;
159+
160+
#if BCP_DEBUG
161+
// Show the arguments.
162+
printf("action : %ld\n", opts.action);
163+
printf("table : "); PyObject_Print(opts.table, stdout, 0); printf("\n");
164+
printf("datafile : "); PyObject_Print(opts.datafile, stdout, 0); printf("\n");
165+
printf("formatfile : "); PyObject_Print(opts.formatfile, stdout, 0); printf("\n");
166+
printf("errorfile : "); PyObject_Print(opts.errorfile, stdout, 0); printf("\n");
167+
printf("firstrow : "); PyObject_Print(opts.firstrow, stdout, 0); printf("\n");
168+
printf("lastrow : "); PyObject_Print(opts.lastrow, stdout, 0); printf("\n");
169+
printf("maxerrors : "); PyObject_Print(opts.maxerrors, stdout, 0); printf("\n");
170+
#endif
171+
172+
// Make sure we have a valid connection.
173+
if (!py_conn || !Connection_Check(py_conn)) {
174+
PyErr_SetString(ProgrammingError, "first argument must be a valid connection");
175+
return 0;
176+
}
177+
Connection* conn = (Connection*)py_conn;
178+
if (conn->hdbc == SQL_NULL_HANDLE) {
179+
PyErr_SetString(ProgrammingError, "attempt to use a closed connection.");
180+
return 0;
181+
}
182+
183+
// Verify that bcp is enabled for this connection.
184+
if (!conn->bcp_enabled) {
185+
PyErr_SetString(ProgrammingError, "bcp not supported by this driver.");
186+
return 0;
187+
}
188+
189+
// Load the BCP function pointers.
190+
if (!_load_bcp_functions(conn->hdbc)) {
191+
PyErr_SetString(OperationalError, "bcp functions not provided by driver");
192+
return 0;
193+
}
194+
195+
// Get the required arguments. Note that we use less generic names in public-facing API.
196+
if (opts.action != DB_IN && opts.action != DB_OUT) {
197+
PyErr_SetString(ProgrammingError, "action must be pyodbc.BCP_IN or pyodbc.BCP_OUT");
198+
return 0;
199+
}
200+
SQLWChar table(opts.table, ENCSTR_UTF16NE);
201+
if (opts.datafile == Py_None) {
202+
PyErr_SetString(ProgrammingError, "datafile is a required argument");
203+
return 0;
204+
}
205+
PyObject* datafile_path = PyOS_FSPath(opts.datafile);
206+
if (!datafile_path)
207+
return 0; // exception already raised by PyOS_FSPath()
208+
if (!PyUnicode_Check(datafile_path)) {
209+
Py_DECREF(datafile_path);
210+
PyErr_SetString(PyExc_TypeError, "datafile must be a str or pathlib.Path");
211+
return 0;
212+
}
213+
SQLWChar datafile(datafile_path, ENCSTR_UTF16NE);
214+
215+
// The error filename is optional.
216+
SQLWChar errorfile;
217+
PyObject* errorfile_path = 0;
218+
if (opts.errorfile != Py_None) {
219+
errorfile_path = PyOS_FSPath(opts.errorfile);
220+
if (!errorfile_path) {
221+
Py_DECREF(datafile_path);
222+
return 0; // exception set by PyOS_FSPath()
223+
}
224+
if (!PyUnicode_Check(errorfile_path)) {
225+
Py_DECREF(datafile_path);
226+
Py_DECREF(errorfile_path);
227+
PyErr_SetString(PyExc_TypeError, "errorfile must be a str or pathlib.Path");
228+
return 0;
229+
}
230+
errorfile.set(errorfile_path, ENCSTR_UTF16NE);
231+
}
232+
233+
// Initialize the bcp job.
234+
Py_BEGIN_ALLOW_THREADS
235+
bcp_rc = bcp_init(conn->hdbc, (SQLWCHAR*)table, (SQLWCHAR*)datafile, (SQLWCHAR*)errorfile, opts.action);
236+
Py_END_ALLOW_THREADS
237+
Py_DECREF(datafile_path);
238+
Py_XDECREF(errorfile_path);
239+
if (conn->hdbc == SQL_NULL_HANDLE) {
240+
// The connection was closed by another thread.
241+
PyErr_SetString(ProgrammingError, "connection was closed.");
242+
return 0;
243+
}
244+
if (bcp_rc != SUCCEED) {
245+
PyErr_SetString(OperationalError, "bcp_init failure");
246+
return 0;
247+
}
248+
249+
// Read the transfer format file if requested.
250+
if (opts.formatfile != Py_None) {
251+
PyObject* formatfile_path = PyOS_FSPath(opts.formatfile);
252+
if (!formatfile_path)
253+
return 0; // exception is already set
254+
if (!PyUnicode_Check(formatfile_path)) {
255+
PyErr_SetString(ProgrammingError, "formatfile must be a str or pathlib.Path");
256+
return 0;
257+
}
258+
SQLWChar formatfile(formatfile_path, ENCSTR_UTF16NE);
259+
Py_BEGIN_ALLOW_THREADS
260+
bcp_rc = bcp_readfmt(conn->hdbc, (SQLWCHAR*)formatfile);
261+
Py_END_ALLOW_THREADS
262+
Py_DECREF(formatfile_path);
263+
if (conn->hdbc == SQL_NULL_HANDLE) {
264+
PyErr_SetString(ProgrammingError, "connection was closed.");
265+
return 0;
266+
}
267+
if (bcp_rc != SUCCEED) {
268+
PyErr_SetString(OperationalError, "bcp_readfmt failure");
269+
return 0;
270+
}
271+
}
272+
273+
// Apply the rest of the options specified (no need to release the GIL for these).
274+
if (!_apply_int_option(conn, opts.firstrow, BCPFIRST, "firstrow" )) return 0;
275+
if (!_apply_int_option(conn, opts.lastrow, BCPLAST, "lastrow" )) return 0;
276+
if (!_apply_int_option(conn, opts.maxerrors, BCPMAXERRS, "maxerrors")) return 0;
277+
278+
// Perform the transfer.
279+
Py_BEGIN_ALLOW_THREADS
280+
bcp_rc = bcp_exec(conn->hdbc, &row_count);
281+
Py_END_ALLOW_THREADS
282+
if (bcp_rc != SUCCEED) {
283+
PyErr_SetString(OperationalError, "bcp_exec failure");
284+
return 0;
285+
}
286+
287+
// Return the number of rows transferred.
288+
return PyLong_FromLong(row_count);
289+
}

src/bcp.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/**
2+
* Types, constants, and signatures needed for BCP.
3+
*/
4+
#ifndef _BCP_H_
5+
#define _BCP_H_
6+
7+
#include <sqlext.h>
8+
9+
// BCP constants.
10+
#define SQL_BCP_ON 1L
11+
#define SQL_COPT_SS_BCP 1219 // Allow BCP usage on connection
12+
13+
// Arguments for a call to the bcp() method.
14+
struct BCP_OPTS {
15+
// Required positional-only arguments.
16+
long action;
17+
PyObject* table;
18+
PyObject* datafile;
19+
20+
// Optional keyword-only arguments.
21+
PyObject* formatfile;
22+
PyObject* errorfile;
23+
PyObject* firstrow;
24+
PyObject* lastrow;
25+
PyObject* maxerrors;
26+
};
27+
PyObject* _bcp_impl(PyObject* conn, const BCP_OPTS& opts);
28+
29+
#endif // _BCP_H_

0 commit comments

Comments
 (0)