Skip to content

Commit db1737d

Browse files
committed
Add logstorage for python logging
1 parent 50d2b28 commit db1737d

7 files changed

Lines changed: 186 additions & 1 deletion

File tree

.github/workflows/code_quality.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,10 @@ jobs:
3232
uses: astral-sh/setup-uv@v7
3333
with:
3434
version: "0.9.0"
35-
python-version: "3.12"
35+
# 3.13: the cmake-format pre-commit hook is pinned to python3.13
36+
# (cmakelang crashes under 3.14). Keeping this in sync means the hook
37+
# resolves to the running interpreter instead of hunting PATH.
38+
python-version: "3.13"
3639

3740
- name: pre-commit (cache)
3841
uses: actions/cache@v4

.pre-commit-config.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,12 @@ repos:
2929
rev: v0.6.13
3030
hooks:
3131
- id: cmake-format
32+
# cmakelang is unmaintained and crashes under Python 3.14
33+
# ("Cannot use capturing groups in re.Scanner"). Pin this hook's
34+
# environment to 3.13 so it never picks up a 3.14 interpreter. The
35+
# code_quality CI job provisions Python 3.13 to match, so the hook
36+
# resolves to the running interpreter there.
37+
language_version: python3.13
3238

3339
- repo: https://github.com/pre-commit/mirrors-mypy
3440
rev: v1.18.2

src/duckdb_py/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ add_library(
1919
importer.cpp
2020
map.cpp
2121
path_like.cpp
22+
python_log_storage.cpp
2223
pyconnection.cpp
2324
pyexpression.cpp
2425
pyfilesystem.cpp
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
//===----------------------------------------------------------------------===//
2+
// DuckDB
3+
//
4+
// duckdb_python/python_log_storage.hpp
5+
//
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#pragma once
10+
11+
#include "duckdb/logging/log_storage.hpp"
12+
#include "duckdb/logging/logging.hpp"
13+
14+
namespace duckdb {
15+
16+
class PythonLogStorage : public LogStorage {
17+
public:
18+
PythonLogStorage() = default;
19+
~PythonLogStorage() override = default;
20+
21+
const string GetStorageName() override {
22+
return "python_log_storage";
23+
}
24+
25+
void WriteLogEntry(timestamp_t timestamp, LogLevel level, const string &log_type, const string &log_message,
26+
const RegisteredLoggingContext &context) override;
27+
void WriteLogEntries(DataChunk &chunk, const RegisteredLoggingContext &context) override;
28+
void FlushAll() override {
29+
}
30+
void Flush(LoggingTargetTable table) override {
31+
}
32+
bool IsEnabled(LoggingTargetTable table) override {
33+
return true;
34+
}
35+
};
36+
37+
} // namespace duckdb

src/duckdb_py/pyconnection.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535
#include "duckdb_python/numpy/numpy_type.hpp"
3636
#include "duckdb/main/prepared_statement.hpp"
3737
#include "duckdb_python/jupyter_progress_bar_display.hpp"
38+
#include "duckdb_python/python_log_storage.hpp"
39+
#include "duckdb/logging/log_manager.hpp"
3840
#include "duckdb_python/pyfilesystem.hpp"
3941
#include "duckdb/main/client_config.hpp"
4042
#include "duckdb/function/table/read_csv.hpp"
@@ -2283,6 +2285,22 @@ shared_ptr<DuckDBPyConnection> DuckDBPyConnection::Connect(const py::object &dat
22832285
auto res = FetchOrCreateInstance(database, config);
22842286
auto &client_context = *res->con.GetConnection().context;
22852287
SetDefaultConfigArguments(client_context);
2288+
{
2289+
auto &db_instance = *res->con.GetDatabase().instance;
2290+
auto &log_manager = db_instance.GetLogManager();
2291+
auto storage = make_shared_ptr<PythonLogStorage>();
2292+
shared_ptr<LogStorage> storage_base = storage;
2293+
// RegisterLogStorage returns false if the name is already registered on this
2294+
// DatabaseInstance. Instances are cached and shared across connections/cursors, so
2295+
// only configure logging on the first registration. SetLogStorage/SetEnableLogging/
2296+
// SetLogLevel are NOT idempotent — re-running them on every Connect() would silently
2297+
// stomp a user's explicit `SET enable_logging` / `SET logging_level` / storage choice.
2298+
if (log_manager.RegisterLogStorage("python_log_storage", storage_base)) {
2299+
log_manager.SetLogStorage(db_instance, "python_log_storage");
2300+
log_manager.SetEnableLogging(true);
2301+
log_manager.SetLogLevel(LogLevel::LOG_WARNING);
2302+
}
2303+
}
22862304
return res;
22872305
}
22882306

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
#include "duckdb_python/python_log_storage.hpp"
2+
#include "duckdb_python/pybind11/pybind_wrapper.hpp"
3+
#include "duckdb/common/types/data_chunk.hpp"
4+
#include "duckdb/common/types/vector.hpp"
5+
#include "duckdb/logging/logging.hpp"
6+
7+
namespace duckdb {
8+
9+
static int LogLevelToPython(LogLevel level) {
10+
switch (level) {
11+
case LogLevel::LOG_TRACE:
12+
case LogLevel::LOG_DEBUG:
13+
return 10; // logging.DEBUG
14+
case LogLevel::LOG_INFO:
15+
return 20; // logging.INFO
16+
case LogLevel::LOG_WARNING:
17+
return 30; // logging.WARNING
18+
case LogLevel::LOG_ERROR:
19+
return 40; // logging.ERROR
20+
case LogLevel::LOG_FATAL:
21+
return 50; // logging.CRITICAL
22+
default:
23+
return 30;
24+
}
25+
}
26+
27+
static int LevelStringToPython(const string &level_str) {
28+
if (level_str == "TRACE" || level_str == "DEBUG") {
29+
return 10;
30+
}
31+
if (level_str == "INFO") {
32+
return 20;
33+
}
34+
if (level_str == "WARNING") {
35+
return 30;
36+
}
37+
if (level_str == "ERROR") {
38+
return 40;
39+
}
40+
if (level_str == "FATAL") {
41+
return 50;
42+
}
43+
return 30;
44+
}
45+
46+
// Both write methods run on engine worker threads and invoke arbitrary user Python (the
47+
// handlers installed on the "duckdb" logger). The engine calls these directly from query
48+
// binding/execution with NO surrounding try/catch (see LogManager::WriteLogEntry), so an
49+
// exception escaping here would fail the user's query. Logging is a side effect — it must
50+
// never do that. Hence every body swallows all exceptions.
51+
//
52+
// Note also that the engine holds LogManager::lock (a non-recursive mutex) across this call.
53+
// A handler that re-enters DuckDB on the same thread and emits another log entry would
54+
// self-deadlock on that lock — outside our control, but worth knowing.
55+
56+
void PythonLogStorage::WriteLogEntry(timestamp_t, LogLevel level, const string &, const string &log_message,
57+
const RegisteredLoggingContext &) {
58+
if (!Py_IsInitialized()) {
59+
return; // interpreter is finalizing — acquiring the GIL would crash
60+
}
61+
try {
62+
py::gil_scoped_acquire gil;
63+
auto logging = py::module::import("logging");
64+
auto logger = logging.attr("getLogger")("duckdb");
65+
logger.attr("log")(LogLevelToPython(level), log_message);
66+
} catch (...) {
67+
// Logging must not disrupt query execution.
68+
}
69+
}
70+
71+
void PythonLogStorage::WriteLogEntries(DataChunk &chunk, const RegisteredLoggingContext &) {
72+
if (!Py_IsInitialized()) {
73+
return; // interpreter is finalizing — acquiring the GIL would crash
74+
}
75+
try {
76+
py::gil_scoped_acquire gil;
77+
auto logging = py::module::import("logging");
78+
auto logger = logging.attr("getLogger")("duckdb");
79+
// DataChunk is in LOG_ENTRIES format: context_id, timestamp, type, log_level, message.
80+
// log_level (idx 3) and message (idx 4) are both VARCHAR; the chunk is freshly
81+
// allocated by the engine so the vectors are flat.
82+
auto level_data = FlatVector::GetData<string_t>(chunk.data[3]);
83+
auto message_data = FlatVector::GetData<string_t>(chunk.data[4]);
84+
for (idx_t i = 0; i < chunk.size(); i++) {
85+
logger.attr("log")(LevelStringToPython(level_data[i].GetString()), message_data[i].GetString());
86+
}
87+
} catch (...) {
88+
// Logging must not disrupt query execution.
89+
}
90+
}
91+
92+
} // namespace duckdb
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import logging
2+
3+
import duckdb
4+
5+
6+
def test_warning_routed_to_python_logging(caplog):
7+
with caplog.at_level(logging.WARNING, logger="duckdb"):
8+
con = duckdb.connect()
9+
# Pin lambda_syntax to DEFAULT so the deprecated arrow (->) form reliably emits a
10+
# DUCKDB_LOG_WARNING. DEFAULT is the current engine default, but it is explicitly
11+
# slated to change ("before DuckDB's next release"); pinning keeps this test
12+
# exercising the warning path across future submodule bumps.
13+
con.execute("SET lambda_syntax='DEFAULT'")
14+
con.execute("SELECT list_transform([1, 2, 3], x -> x + 1)")
15+
deprecation_records = [r for r in caplog.records if "deprecated" in r.message.lower()]
16+
assert deprecation_records, "expected a deprecation warning routed to the 'duckdb' logger"
17+
assert all(r.name == "duckdb" for r in deprecation_records)
18+
assert all(r.levelno == logging.WARNING for r in deprecation_records)
19+
20+
21+
def test_warning_not_emitted_for_clean_queries(caplog):
22+
with caplog.at_level(logging.WARNING, logger="duckdb"):
23+
con = duckdb.connect()
24+
con.execute("SELECT 1 + 1").fetchone()
25+
# Assert the absence of the deprecation warning specifically rather than requiring zero
26+
# records total — an incidental connect-time warning (e.g. the macOS Rosetta notice on
27+
# some hardware) would otherwise make this flaky.
28+
assert not [r for r in caplog.records if "deprecated" in r.message.lower()]

0 commit comments

Comments
 (0)