|
1 | 1 | #include "duckdb_python/python_log_storage.hpp" |
2 | 2 | #include "duckdb_python/pybind11/pybind_wrapper.hpp" |
| 3 | + |
| 4 | +#include "duckdb/common/allocator.hpp" |
| 5 | +#include "duckdb/common/exception.hpp" |
| 6 | +#include "duckdb/common/types/column/column_data_collection.hpp" |
3 | 7 | #include "duckdb/common/types/data_chunk.hpp" |
4 | 8 | #include "duckdb/common/types/vector.hpp" |
5 | | -#include "duckdb/logging/logging.hpp" |
6 | 9 |
|
7 | 10 | namespace duckdb { |
8 | 11 |
|
9 | | -static int LogLevelToPython(LogLevel level) { |
10 | | - switch (level) { |
11 | | - case LogLevel::LOG_TRACE: |
12 | | - case LogLevel::LOG_DEBUG: |
13 | | - return 10; // logging.DEBUG |
14 | | - case LogLevel::LOG_INFO: |
15 | | - return 20; // logging.INFO |
16 | | - case LogLevel::LOG_WARNING: |
17 | | - return 30; // logging.WARNING |
18 | | - case LogLevel::LOG_ERROR: |
19 | | - return 40; // logging.ERROR |
20 | | - case LogLevel::LOG_FATAL: |
21 | | - return 50; // logging.CRITICAL |
22 | | - default: |
23 | | - return 30; |
24 | | - } |
25 | | -} |
26 | | - |
| 12 | +// Maps the engine's textual log level (stored as VARCHAR in the LOG_ENTRIES chunk) to the |
| 13 | +// integer levels of Python's logging module. |
27 | 14 | static int LevelStringToPython(const string &level_str) { |
28 | 15 | if (level_str == "TRACE" || level_str == "DEBUG") { |
29 | | - return 10; |
| 16 | + return 10; // logging.DEBUG |
30 | 17 | } |
31 | 18 | if (level_str == "INFO") { |
32 | | - return 20; |
| 19 | + return 20; // logging.INFO |
33 | 20 | } |
34 | 21 | if (level_str == "WARNING") { |
35 | | - return 30; |
| 22 | + return 30; // logging.WARNING |
36 | 23 | } |
37 | 24 | if (level_str == "ERROR") { |
38 | | - return 40; |
| 25 | + return 40; // logging.ERROR |
39 | 26 | } |
40 | 27 | if (level_str == "FATAL") { |
41 | | - return 50; |
| 28 | + return 50; // logging.CRITICAL |
42 | 29 | } |
43 | 30 | return 30; |
44 | 31 | } |
45 | 32 |
|
46 | | -// Both write methods run on engine worker threads and invoke arbitrary user Python (the |
47 | | -// handlers installed on the "duckdb" logger). The engine calls these directly from query |
48 | | -// binding/execution with NO surrounding try/catch (see LogManager::WriteLogEntry), so an |
49 | | -// exception escaping here would fail the user's query. Logging is a side effect — it must |
50 | | -// never do that. Hence every body swallows all exceptions. |
51 | | -// |
52 | | -// Note also that the engine holds LogManager::lock (a non-recursive mutex) across this call. |
53 | | -// A handler that re-enters DuckDB on the same thread and emits another log entry would |
54 | | -// self-deadlock on that lock — outside our control, but worth knowing. |
| 33 | +PythonLogStorage::PythonLogStorage(DatabaseInstance &db) : BufferingLogStorage(db, 1, true) { |
| 34 | + log_storage_buffers[LoggingTargetTable::LOG_ENTRIES] = |
| 35 | + make_uniq<ColumnDataCollection>(Allocator::DefaultAllocator(), GetSchema(LoggingTargetTable::LOG_ENTRIES)); |
| 36 | + log_storage_buffers[LoggingTargetTable::LOG_CONTEXTS] = |
| 37 | + make_uniq<ColumnDataCollection>(Allocator::DefaultAllocator(), GetSchema(LoggingTargetTable::LOG_CONTEXTS)); |
| 38 | +} |
55 | 39 |
|
56 | | -void PythonLogStorage::WriteLogEntry(timestamp_t, LogLevel level, const string &, const string &log_message, |
57 | | - const RegisteredLoggingContext &) { |
58 | | - if (!Py_IsInitialized()) { |
59 | | - return; // interpreter is finalizing — acquiring the GIL would crash |
60 | | - } |
61 | | - try { |
62 | | - py::gil_scoped_acquire gil; |
63 | | - auto logging = py::module::import("logging"); |
64 | | - auto logger = logging.attr("getLogger")("duckdb"); |
65 | | - logger.attr("log")(LogLevelToPython(level), log_message); |
66 | | - } catch (...) { |
67 | | - // Logging must not disrupt query execution. |
| 40 | +PythonLogStorage::~PythonLogStorage() { |
| 41 | +} |
| 42 | + |
| 43 | +ColumnDataCollection &PythonLogStorage::GetBuffer(LoggingTargetTable table) const { |
| 44 | + auto res = log_storage_buffers.find(table); |
| 45 | + if (res == log_storage_buffers.end()) { |
| 46 | + throw InternalException("PythonLogStorage: failed to find buffer for logging target table"); |
68 | 47 | } |
| 48 | + return *res->second; |
69 | 49 | } |
70 | 50 |
|
71 | | -void PythonLogStorage::WriteLogEntries(DataChunk &chunk, const RegisteredLoggingContext &) { |
| 51 | +void PythonLogStorage::ForwardEntriesToPython(DataChunk &chunk) { |
| 52 | + // This fires from engine worker threads with the GIL released, and from within both the |
| 53 | + // LogManager lock and this storage's lock. It runs arbitrary user Python (logging |
| 54 | + // handlers) and MUST NOT let an exception escape: the engine calls the write path with no |
| 55 | + // try/catch, directly from query binding/execution, so a raising handler would otherwise |
| 56 | + // fail the user's query. Hence we swallow everything here. |
| 57 | + // |
| 58 | + // Caveat: because a lock is held across this call, a handler that re-enters DuckDB on the |
| 59 | + // same thread and emits another log entry can self-deadlock on the non-recursive lock. |
| 60 | + // That is outside our control (and matches the engine's own contract for log storages). |
72 | 61 | if (!Py_IsInitialized()) { |
73 | 62 | return; // interpreter is finalizing — acquiring the GIL would crash |
74 | 63 | } |
75 | 64 | try { |
76 | 65 | py::gil_scoped_acquire gil; |
77 | 66 | auto logging = py::module::import("logging"); |
78 | 67 | auto logger = logging.attr("getLogger")("duckdb"); |
79 | | - // DataChunk is in LOG_ENTRIES format: context_id, timestamp, type, log_level, message. |
80 | | - // log_level (idx 3) and message (idx 4) are both VARCHAR; the chunk is freshly |
81 | | - // allocated by the engine so the vectors are flat. |
| 68 | + // LOG_ENTRIES schema: context_id, timestamp, type, log_level (idx 3), message (idx 4). |
| 69 | + // log_level and message are both VARCHAR; the buffer chunk is flat. |
82 | 70 | auto level_data = FlatVector::GetData<string_t>(chunk.data[3]); |
83 | 71 | auto message_data = FlatVector::GetData<string_t>(chunk.data[4]); |
84 | 72 | for (idx_t i = 0; i < chunk.size(); i++) { |
85 | 73 | logger.attr("log")(LevelStringToPython(level_data[i].GetString()), message_data[i].GetString()); |
86 | 74 | } |
87 | 75 | } catch (...) { |
88 | | - // Logging must not disrupt query execution. |
| 76 | + // Logging must never disrupt query execution. |
89 | 77 | } |
90 | 78 | } |
91 | 79 |
|
| 80 | +void PythonLogStorage::FlushChunk(LoggingTargetTable table, DataChunk &chunk) { |
| 81 | + D_ASSERT(table == LoggingTargetTable::LOG_ENTRIES || table == LoggingTargetTable::LOG_CONTEXTS); |
| 82 | + // Retain the entry for duckdb_logs FIRST, so a misbehaving Python handler can never cost |
| 83 | + // us a stored row. |
| 84 | + log_storage_buffers[table]->Append(chunk); |
| 85 | + // Forward only real log entries (not context metadata) to Python's logging module. |
| 86 | + if (table == LoggingTargetTable::LOG_ENTRIES) { |
| 87 | + ForwardEntriesToPython(chunk); |
| 88 | + } |
| 89 | +} |
| 90 | + |
| 91 | +void PythonLogStorage::ResetAllBuffers() { |
| 92 | + BufferingLogStorage::ResetAllBuffers(); |
| 93 | + for (const auto &buffer : log_storage_buffers) { |
| 94 | + buffer.second->Reset(); |
| 95 | + } |
| 96 | +} |
| 97 | + |
| 98 | +bool PythonLogStorage::CanScan(LoggingTargetTable table) { |
| 99 | + unique_lock<mutex> lck(lock); |
| 100 | + return IsEnabledInternal(table); |
| 101 | +} |
| 102 | + |
| 103 | +unique_ptr<LogStorageScanState> PythonLogStorage::CreateScanState(LoggingTargetTable table) const { |
| 104 | + return make_uniq<PythonLogStorageScanState>(table); |
| 105 | +} |
| 106 | + |
| 107 | +bool PythonLogStorage::Scan(LogStorageScanState &state, DataChunk &result) const { |
| 108 | + unique_lock<mutex> lck(lock); |
| 109 | + auto &python_scan_state = state.Cast<PythonLogStorageScanState>(); |
| 110 | + return GetBuffer(python_scan_state.table).Scan(python_scan_state.scan_state, result); |
| 111 | +} |
| 112 | + |
| 113 | +void PythonLogStorage::InitializeScan(LogStorageScanState &state) const { |
| 114 | + unique_lock<mutex> lck(lock); |
| 115 | + auto &python_scan_state = state.Cast<PythonLogStorageScanState>(); |
| 116 | + GetBuffer(python_scan_state.table) |
| 117 | + .InitializeScan(python_scan_state.scan_state, ColumnDataScanProperties::DISALLOW_ZERO_COPY); |
| 118 | +} |
| 119 | + |
92 | 120 | } // namespace duckdb |
0 commit comments