Skip to content

Commit e47b30b

Browse files
authored
Improve header isolation and parse time of logger.h (#4445)
As we prepare to surface qpu headers in user code, `cudaq.h` is become significantly slower to parse in a prototyping sandbox. This PR addresses a slowdown introduced by logger.h as can be seen in the profile below. The compiled code is: ```cpp #include <cudaq.h> int main() { return 0; } ``` The compile time is 3.6s. ``` time clang++ cudaq_inc.cpp -std=c++20 -I ~/cudaq/cq2/install/cudaq/include real 0m3.603s ``` The profile shows logger.h takes 441ms of parse time. <img width="689" height="519" alt="logger-profile-1" src="https://github.com/user-attachments/assets/2a677887-0ef9-432c-a07a-6b5f3c8aabed" /> With this patch, the compilation time becomes 2.9s ``` time clang++ cudaq_inc.cpp -std=c++20 -I ~/cudaq/cq2/install/cudaq/include real 0m2.936s ``` The profile show logger.h only taking 7ms. <img width="676" height="465" alt="logger-profile-2" src="https://github.com/user-attachments/assets/9034acc9-829e-4960-97a8-f433c41107de" /> This is achieved by replacing the `std::variant` with a `FormatArgument` which only stores a pointer and an out-of-line appending callback. The callback is instantiated in logger.cpp. --------- Signed-off-by: Renaud Kauffmann <rkauffmann@nvidia.com>
1 parent 345669b commit e47b30b

9 files changed

Lines changed: 675 additions & 156 deletions

File tree

runtime/common/CodeGenConfig.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
#include "FmtCore.h"
1111
#include "cudaq/runtime/logger/logger.h"
1212
#include <stdexcept>
13+
#include <tuple>
14+
#include <vector>
1315

1416
namespace {
1517

runtime/cudaq/platform/fermioniq/FermioniqQPU.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
******************************************************************************/
88

99
#include "FermioniqQPU.h"
10+
#include "cudaq/runtime/logger/cudaq_fmt.h"
1011
#include "nlohmann/json.hpp"
1112
#include <memory>
1213

runtime/include/cudaq/runtime/logger/cudaq_fmt.h

Lines changed: 40 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -8,120 +8,75 @@
88

99
#pragma once
1010

11-
#include <chrono>
12-
#include <complex>
11+
#include <array>
1312
#include <cstddef>
14-
#include <cstdint>
15-
#include <functional>
16-
#include <map>
13+
#include <memory>
1714
#include <span>
1815
#include <string>
16+
#include <string_view>
1917
#include <type_traits>
20-
#include <variant>
21-
#include <vector>
18+
#include <utility>
2219

2320
//
24-
// This header provides a wrapper around some of the fmtlib functions, so that
25-
// fmtlib headers, which are not distributed, do not bleed into user code.
21+
// This header provides a wrapper around some of the `fmtlib` functions, so that
22+
// `fmtlib` headers, which are not distributed, do not propagate into user code.
2623
//
2724
// It introduces a cudaq_fmt namespace to make it easy to distinguish between
28-
// what would otherwise be fmt::format and cudaq::fmt::format.
25+
// what would otherwise be `fmt::format` and `cudaq::format`.
2926
//
3027

3128
namespace cudaq_fmt {
32-
struct fmt_arg;
33-
namespace details {
34-
//
35-
// Concepts to check if T is in the variant
36-
//
37-
template <typename T, typename... Types>
38-
concept one_of = ((std::same_as<T, Types> ||
39-
std::same_as<std::reference_wrapper<const T>, Types>) ||
40-
...);
41-
42-
template <typename T, typename Variant>
43-
struct is_variant_member;
44-
45-
template <typename T, typename... Types>
46-
struct is_variant_member<T, std::variant<Types...>>
47-
: std::bool_constant<one_of<T, Types...>> {};
48-
49-
template <typename T, typename Variant>
50-
concept variant_alternative =
51-
is_variant_member<std::decay_t<T>, Variant>::value;
52-
53-
//
54-
// Packed versions of format and print, implemented in logger.cpp
55-
//
56-
std::string format_packed(const std::string_view message,
57-
const std::span<fmt_arg> &arr);
58-
59-
void print_packed(const std::string_view message,
60-
const std::span<fmt_arg> &arr);
61-
} // namespace details
6229

63-
//
64-
// Packed parameter type passing arguments to fmt
65-
// Built-in types need to be passed by value.
66-
// Please store large types (like vectors, strings) as reference_wrappers
67-
//
68-
struct fmt_arg {
69-
using storage_t = std::variant<
70-
bool, char, uint32_t, int32_t, uint64_t, int64_t, float, double,
71-
std::complex<float>, std::complex<double>, std::string_view, const char *,
72-
char *, void *, std::chrono::milliseconds,
73-
std::chrono::system_clock::time_point,
74-
std::reference_wrapper<const std::vector<int32_t>>,
75-
std::reference_wrapper<const std::string>,
76-
std::reference_wrapper<const std::vector<uint32_t>>,
77-
std::reference_wrapper<const std::vector<uint64_t>>,
78-
std::reference_wrapper<const std::vector<float>>,
79-
std::reference_wrapper<const std::vector<double>>,
80-
std::reference_wrapper<const std::vector<std::string>>,
81-
std::reference_wrapper<const std::map<std::string, std::string>>,
82-
std::reference_wrapper<const std::vector<std::complex<float>>>,
83-
std::reference_wrapper<const std::vector<std::complex<double>>>>;
84-
storage_t value;
30+
/// Type-erased reference to a format argument.
31+
///
32+
/// This intentionally stores only a pointer and an out-of-line appending
33+
/// callback. The callback is instantiated in logger.cpp, where heavier headers
34+
/// such as `fmtlib` and `chrono` can be included without forcing
35+
/// every formatting caller to parse them.
36+
struct FormatArgument {
37+
using Appender = void (*)(void *store, const void *value);
8538

8639
template <typename T>
87-
requires details::variant_alternative<T, storage_t>
88-
fmt_arg(const T &v) : value(std::cref(v)) {}
40+
FormatArgument(const T &value)
41+
: value(std::addressof(value)), append(&appendArgument<std::decay_t<T>>) {
42+
}
8943

90-
// On macOS, unsigned long and long are distinct types from uint64_t/int64_t
91-
// (both 64-bit but different type identities). These constructors convert
92-
// to canonical fixed-width types. Constraints ensure they only exist when
93-
// the types are actually distinct (disabled on Linux where they're aliases).
44+
FormatArgument(const char *value) : value(value), append(&appendCString) {}
45+
FormatArgument(char *value) : value(value), append(&appendCString) {}
9446

95-
template <typename T>
96-
requires(std::same_as<T, unsigned long> && !std::same_as<T, uint64_t>)
97-
fmt_arg(T v) : value(static_cast<uint64_t>(v)) {}
98-
99-
template <typename T>
100-
requires(std::same_as<T, long> && !std::same_as<T, int64_t>)
101-
fmt_arg(T v) : value(static_cast<int64_t>(v)) {}
47+
const void *value = nullptr;
48+
Appender append = nullptr;
10249

10350
template <typename T>
104-
requires(std::same_as<T, std::vector<unsigned long>> &&
105-
!std::same_as<unsigned long, uint64_t>)
106-
fmt_arg(const T &v)
107-
: value(std::cref(reinterpret_cast<const std::vector<uint64_t> &>(v))) {}
51+
static void appendArgument(void *store, const void *value);
52+
static void appendCString(void *store, const void *value);
10853
};
10954

55+
namespace details {
56+
std::string format_packed(const std::string_view message,
57+
const std::span<const FormatArgument> &arr);
58+
59+
void print_packed(const std::string_view message,
60+
const std::span<const FormatArgument> &arr);
61+
} // namespace details
62+
11063
//
11164
// Functions substitutes for fmt::format and fmt::print
11265
//
11366
template <typename... Args>
11467
std::string format(const std::string_view message, Args &&...args) {
115-
auto array = std::array<fmt_arg, sizeof...(Args)>{
116-
fmt_arg(std::forward<Args>(args))...};
117-
return details::format_packed(message, array);
68+
auto array = std::array<FormatArgument, sizeof...(Args)>{
69+
FormatArgument(std::forward<Args>(args))...};
70+
return details::format_packed(
71+
message, std::span<const FormatArgument>(array.data(), array.size()));
11872
}
11973

12074
template <typename... Args>
12175
void print(const std::string_view message, Args &&...args) {
122-
auto array = std::array<fmt_arg, sizeof...(Args)>{
123-
fmt_arg(std::forward<Args>(args))...};
124-
return details::print_packed(message, array);
76+
auto array = std::array<FormatArgument, sizeof...(Args)>{
77+
FormatArgument(std::forward<Args>(args))...};
78+
return details::print_packed(
79+
message, std::span<const FormatArgument>(array.data(), array.size()));
12580
}
12681

12782
//

runtime/include/cudaq/runtime/logger/logger.h

Lines changed: 59 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,21 @@
1010

1111
#include "cudaq/runtime/logger/cudaq_fmt.h"
1212
#include "cudaq/runtime/logger/tracer.h"
13+
#include <array>
14+
#include <cstddef>
15+
#include <span>
16+
#include <stdexcept>
17+
#include <string>
18+
#include <string_view>
19+
#include <utility>
1320

1421
namespace cudaq {
1522

1623
/// @brief Returns true if `tag` is enabled. Tags are only enabled/disabled at
1724
/// program startup.
1825
bool isTimingTagEnabled(int tag);
1926

20-
// Keep all spdlog headers hidden in the implementation file
27+
// Keep all spdlog headers hidden in the implementation file.
2128
namespace details {
2229
// This enum must match spdlog::level enums. This is checked via static_assert
2330
// in logger.cpp.
@@ -29,34 +36,54 @@ void debug(const std::string_view msg);
2936
void warn(const std::string_view msg);
3037
void error(const std::string_view msg);
3138
std::string pathToFileName(const std::string_view fullFilePath);
39+
40+
// Test/debug helpers. Production callers configure the level via the
41+
// CUDAQ_LOG_LEVEL environment variable.
42+
void setLogLevel(LogLevel level);
43+
LogLevel getLogLevel();
44+
45+
// Flushes any buffered log output. Useful in tests that need to inspect
46+
// captured stdout immediately after emitting an info/debug message
47+
// (initializeLogger only enables flush_on(warn)).
48+
void flushLogs();
49+
50+
void logMessagePacked(LogLevel logLevel, const std::string_view message,
51+
const std::span<const cudaq_fmt::FormatArgument> &args,
52+
const char *fileName, int lineNo);
53+
54+
void logWithTimestampPacked(
55+
const std::string_view message,
56+
const std::span<const cudaq_fmt::FormatArgument> &args);
57+
58+
template <typename... Args>
59+
std::string formatMessage(const std::string_view message, Args &&...args) {
60+
return cudaq_fmt::format(message, std::forward<Args>(args)...);
61+
}
62+
63+
template <typename... Args>
64+
void logMessage(LogLevel logLevel, const std::string_view message,
65+
const char *fileName, int lineNo, Args &&...args) {
66+
std::array<cudaq_fmt::FormatArgument, sizeof...(Args)> packedArgs{
67+
cudaq_fmt::FormatArgument(std::forward<Args>(args))...};
68+
logMessagePacked(logLevel, message,
69+
std::span<const cudaq_fmt::FormatArgument>(
70+
packedArgs.data(), packedArgs.size()),
71+
fileName, lineNo);
72+
}
3273
} // namespace details
3374

34-
/// This type seeks to enable automated injection of the
35-
/// source location of the `cudaq::info()` or `debug()` call.
36-
/// We do this via a struct of the same name (info), which
37-
/// takes the message and variadic arguments for the message, and
38-
/// finishes with arguments with defaults providing the file name
39-
/// and the source location. We could also add the function name
40-
/// if we want it in the future.
41-
///
42-
/// We then use a template deduction guide to map calls of the
43-
/// templated `cudaq::info()` function to this type.
75+
/// These types seek to enable automated injection of the source location of the
76+
/// `cudaq::info()` or `debug()` call. The actual formatting is out-of-line in
77+
/// logger.cpp so callers do not need to parse `fmt` or `chrono` headers.
4478
#define CUDAQ_LOGGER_DEDUCTION_STRUCT(NAME) \
4579
template <typename... Args> \
4680
struct NAME { \
4781
NAME(const std::string_view message, Args &&...args, \
48-
const char *funcName = __builtin_FUNCTION(), \
4982
const char *fileName = __builtin_FILE(), \
5083
int lineNo = __builtin_LINE()) { \
51-
if (details::should_log(details::LogLevel::NAME)) { \
52-
auto msg = cudaq_fmt::format(message, args...); \
53-
std::string name = funcName; \
54-
auto start = name.find_first_of(" "); \
55-
name = name.substr(start + 1, name.find_first_of("(") - start - 1); \
56-
msg = "[" + details::pathToFileName(fileName) + ":" + \
57-
std::to_string(lineNo) + "] " + msg; \
58-
details::NAME(msg); \
59-
} \
84+
if (details::should_log(details::LogLevel::NAME)) \
85+
details::logMessage(details::LogLevel::NAME, message, fileName, \
86+
lineNo, std::forward<Args>(args)...); \
6087
} \
6188
}; \
6289
template <typename... Args> \
@@ -69,7 +96,7 @@ CUDAQ_LOGGER_DEDUCTION_STRUCT(error);
6996
#ifdef CUDAQ_DEBUG
7097
CUDAQ_LOGGER_DEDUCTION_STRUCT(debug);
7198
#else
72-
// Remove cudaq::debug log messages from Release binaries
99+
// Remove cudaq::debug log messages from Release binaries.
73100
template <typename... Args>
74101
void debug(const std::string_view, Args &&...) {}
75102
#endif
@@ -79,43 +106,17 @@ void debug(const std::string_view, Args &&...) {}
79106
// Note 2: File and line info is not included in the log line.
80107
template <typename... Args>
81108
void log(const std::string_view message, Args &&...args) {
82-
const auto timestamp = std::chrono::system_clock::now();
83-
const auto now_c = std::chrono::system_clock::to_time_t(timestamp);
84-
std::tm now_tm = *std::localtime(&now_c);
85-
cudaq_fmt::print("[{:04}-{:02}-{:02} {:02}:{:02}:{:%S}] {}\n",
86-
now_tm.tm_year + 1900, now_tm.tm_mon + 1, now_tm.tm_mday,
87-
now_tm.tm_hour, now_tm.tm_min,
88-
std::chrono::round<std::chrono::milliseconds>(
89-
timestamp.time_since_epoch()),
90-
cudaq_fmt::format(message, args...));
109+
std::array<cudaq_fmt::FormatArgument, sizeof...(Args)> packedArgs{
110+
cudaq_fmt::FormatArgument(std::forward<Args>(args))...};
111+
details::logWithTimestampPacked(
112+
message, std::span<const cudaq_fmt::FormatArgument>(packedArgs.data(),
113+
packedArgs.size()));
91114
}
92115

93-
/// @brief This type is meant to provided quick tracing
94-
/// of function calls. Instantiate at the beginning
95-
/// of a function and when it goes out of scope at function
96-
/// end, it will call to the trace function and report
97-
/// the function name and the execution time in ms.
98-
//
99-
// Since this traces upon destruction, tracing in tandem
100-
// with ScopeTrace instances within functions called from
101-
// the current one will stack and can be read in reverse order.
102-
// This type keeps track of an integer that prints and indentation
103-
// before the message to demonstrate the call stack.
104-
//
105-
// e.g. for
106-
// void bar() {
107-
// ScopedTrace trace("bar");
108-
// foobar() // <-- also creates a ScopedTrace
109-
// }
110-
// void foo() {
111-
// ScopedTrace trace("foo");
112-
// bar()
113-
// }
114-
//
115-
// will print
116-
// [2022-12-15 18:54:39.346] [trace] -- foobar() executed in 0.026 ms.
117-
// [2022-12-15 18:54:39.347] [trace] - bar executed in 0.604 ms.
118-
// [2022-12-15 18:54:39.347] [trace] foo executed in 2.572 ms.
116+
/// @brief This type is meant to provided quick tracing of function calls.
117+
/// Instantiate at the beginning of a function and when it goes out of scope at
118+
/// function end, it will call to the trace function and report the function
119+
/// name and execution time.
119120
class ScopedTrace {
120121
private:
121122
SpanHandle handle;
@@ -140,7 +141,7 @@ class ScopedTrace {
140141
for (std::size_t i = 0; i < nArgs; i++)
141142
argsMsg += (i != nArgs - 1) ? "{}, " : "{}}})";
142143
}
143-
return cudaq_fmt::format(argsMsg, args...);
144+
return details::formatMessage(argsMsg, std::forward<Args>(args)...);
144145
}
145146
}
146147

0 commit comments

Comments
 (0)