Skip to content

Commit 166df95

Browse files
committed
[formats] add "start-timestamp-field" support
1 parent 88b6602 commit 166df95

15 files changed

Lines changed: 482 additions & 201 deletions

NEWS.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,14 @@ Features:
197197
special `__duration__` field name can be used in the
198198
`line-format` to add a humanized version of the
199199
duration to the pretty-printed message.
200+
* Added the `start-timestamp-field` log format property.
201+
When set, the duration of an operation is computed as the
202+
difference between the `timestamp-field` (treated as the
203+
end time) and the `start-timestamp-field`. This is an
204+
alternative to `duration-field` for logs that record
205+
separate start and end timestamps. The Cloudflare log
206+
format has been updated to use this feature with
207+
`EdgeEndTimestamp` and `EdgeStartTimestamp`.
200208
* The TIMELINE view now shows rows for user-defined tags
201209
and partitions. Each tagged log line appears as a
202210
separate entry in the timeline. Tags whose names start

docs/schemas/format-v1.schema.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,11 @@
130130
"description": "The name of the timestamp field in the log message pattern",
131131
"type": "string"
132132
},
133+
"start-timestamp-field": {
134+
"title": "/<format_name>/start-timestamp-field",
135+
"description": "The name of the field that contains the start time of the operation. The timestamp-field is treated as the end time and the duration is computed as the difference.",
136+
"type": "string"
137+
},
133138
"subsecond-field": {
134139
"title": "/<format_name>/subsecond-field",
135140
"description": "The path to the property in a JSON-lines log message that contains the sub-second time value",

docs/source/formats.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,13 @@ object with the following fields:
296296
operation's time span will extend from the timestamp to the
297297
timestamp plus the duration.
298298

299+
:start-timestamp-field: The name of a field that contains the start time
300+
of the operation. When set, the :code:`timestamp-field` is treated as
301+
the end time and the duration is computed as the difference between the
302+
two. The :code:`timestamp-divisor` is applied to both fields. This is
303+
an alternative to using :code:`duration-field` for logs that record
304+
separate start and end timestamps.
305+
299306
:ordered-by-time: (v0.8.3+) Indicates that the order of messages in the file
300307
is time-based. Files that are not naturally ordered by time will be sorted
301308
in order to display them in the correct order. Note that this sorting can

src/formats/cloudflare_log.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
"info": "hit",
1313
"warning": "miss"
1414
},
15-
"duration-field": "OriginResponseDurationMs",
16-
"duration-divisor": 1000,
1715
"line-format": [
1816
{
1917
"field": "ClientIP",
@@ -54,8 +52,9 @@
5452
"field": "ClientRequestUserAgent"
5553
}
5654
],
57-
"timestamp-field": "EdgeStartTimestamp",
55+
"timestamp-field": "EdgeEndTimestamp",
5856
"timestamp-divisor": 1000000000,
57+
"start-timestamp-field": "EdgeStartTimestamp",
5958
"opid": {
6059
"description": {
6160
"std": {
@@ -93,6 +92,7 @@
9392
"kind": "integer"
9493
},
9594
"EdgeStartTimestamp": {
95+
"hidden": true,
9696
"kind": "string"
9797
},
9898
"RayID": {

src/log_format.cc

Lines changed: 113 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1225,6 +1225,8 @@ struct json_log_userdata {
12251225
std::optional<log_format::scan_error> jlu_scan_error;
12261226
hasher jlu_opid_hasher;
12271227
std::optional<std::chrono::microseconds> jlu_duration;
1228+
std::optional<std::chrono::microseconds> jlu_start_time;
1229+
std::optional<std::chrono::microseconds> jlu_end_time;
12281230
exttm jlu_exttm;
12291231
size_t jlu_read_order_index{0};
12301232
subline_options jlu_subline_opts;
@@ -1286,6 +1288,7 @@ read_json_number(yajlpp_parse_context* ypc,
12861288
timeval tv;
12871289
tv.tv_sec = ts_val / divisor;
12881290
tv.tv_usec = fmod(ts_val, divisor) * (1000000.0 / divisor);
1291+
jlu->jlu_end_time = to_us(tv);
12891292
jlu->jlu_format->lf_date_time.to_localtime(tv.tv_sec, jlu->jlu_exttm);
12901293
tv.tv_sec = tm2sec(&jlu->jlu_exttm.et_tm);
12911294
jlu->jlu_exttm.et_gmtoff
@@ -1299,6 +1302,18 @@ read_json_number(yajlpp_parse_context* ypc,
12991302
}
13001303
jlu->jlu_exttm.et_nsec = tv.tv_usec * 1000;
13011304
jlu->jlu_base_line->set_time(tv);
1305+
} else if (!jlu->jlu_format->lf_start_timestamp_field.empty()
1306+
&& jlu->jlu_format->lf_start_timestamp_field == field_name)
1307+
{
1308+
long long divisor = jlu->jlu_format->elf_timestamp_divisor;
1309+
auto scan_res = scn::scan_value<double>(number_frag.to_string_view());
1310+
if (!scan_res) {
1311+
log_error("invalid number %.*s", (int) numberLen, numberVal);
1312+
return 0;
1313+
}
1314+
auto ts_val = scan_res.value().value();
1315+
jlu->jlu_start_time = std::chrono::microseconds(
1316+
static_cast<int64_t>(ts_val * 1000000.0 / divisor));
13021317
} else if (jlu->jlu_format->lf_subsecond_field == field_name) {
13031318
auto scan_res = scn::scan_value<double>(number_frag.to_string_view());
13041319
if (!scan_res) {
@@ -1539,6 +1554,7 @@ rewrite_json_int(yajlpp_parse_context* ypc, long long val)
15391554

15401555
tv.tv_sec = val / divisor;
15411556
tv.tv_usec = fmod(val, divisor) * (1000000.0 / divisor);
1557+
jlu->jlu_end_time = to_us(tv);
15421558
jlu->jlu_format->lf_date_time.to_localtime(tv.tv_sec,
15431559
jlu->jlu_exttm);
15441560
jlu->jlu_exttm.et_gmtoff
@@ -1551,6 +1567,12 @@ rewrite_json_int(yajlpp_parse_context* ypc, long long val)
15511567
jlu->jlu_exttm.et_flags |= ETF_MILLIS_SET;
15521568
}
15531569
jlu->jlu_exttm.et_nsec = tv.tv_usec * 1000;
1570+
} else if (!jlu->jlu_format->lf_start_timestamp_field.empty()
1571+
&& jlu->jlu_format->lf_start_timestamp_field == field_name)
1572+
{
1573+
long long divisor = jlu->jlu_format->elf_timestamp_divisor;
1574+
jlu->jlu_start_time = std::chrono::microseconds(
1575+
static_cast<int64_t>(val * 1000000.0 / divisor));
15541576
} else if (jlu->jlu_format->lf_subsecond_field == field_name) {
15551577
jlu->jlu_exttm.et_flags &= ~(ETF_MICROS_SET | ETF_MILLIS_SET);
15561578
switch (jlu->jlu_format->lf_subsecond_unit.value()) {
@@ -1605,6 +1627,7 @@ rewrite_json_double(yajlpp_parse_context* ypc, double val)
16051627

16061628
tv.tv_sec = val / divisor;
16071629
tv.tv_usec = fmod(val, divisor) * (1000000.0 / divisor);
1630+
jlu->jlu_end_time = to_us(tv);
16081631
jlu->jlu_format->lf_date_time.to_localtime(tv.tv_sec,
16091632
jlu->jlu_exttm);
16101633
jlu->jlu_exttm.et_gmtoff
@@ -1617,6 +1640,12 @@ rewrite_json_double(yajlpp_parse_context* ypc, double val)
16171640
jlu->jlu_exttm.et_flags |= ETF_MILLIS_SET;
16181641
}
16191642
jlu->jlu_exttm.et_nsec = tv.tv_usec * 1000;
1643+
} else if (!jlu->jlu_format->lf_start_timestamp_field.empty()
1644+
&& jlu->jlu_format->lf_start_timestamp_field == field_name)
1645+
{
1646+
long long divisor = jlu->jlu_format->elf_timestamp_divisor;
1647+
jlu->jlu_start_time = std::chrono::microseconds(
1648+
static_cast<int64_t>(val * 1000000.0 / divisor));
16201649
} else if (jlu->jlu_format->lf_subsecond_field == field_name) {
16211650
jlu->jlu_exttm.et_flags &= ~(ETF_MICROS_SET | ETF_MILLIS_SET);
16221651
switch (jlu->jlu_format->lf_subsecond_unit.value()) {
@@ -1810,6 +1839,13 @@ external_log_format::scan_json(std::vector<logline>& dst,
18101839
ll.get_msg_level());
18111840
}
18121841

1842+
if (jlu.jlu_start_time && jlu.jlu_end_time && !jlu.jlu_duration) {
1843+
if (jlu.jlu_end_time.value() > jlu.jlu_start_time.value()) {
1844+
jlu.jlu_duration
1845+
= jlu.jlu_end_time.value() - jlu.jlu_start_time.value();
1846+
}
1847+
}
1848+
18131849
auto found_opid_desc = false;
18141850
if (this->elf_opid_field.empty()
18151851
&& this->lf_opid_source.value_or(opid_source_t::from_description)
@@ -2126,7 +2162,8 @@ external_log_format::scan(logfile& lf,
21262162
}
21272163

21282164
auto duration_cap = md[fpat->p_duration_field_index];
2129-
if (duration_cap && !opid_cap) {
2165+
auto start_ts_cap = md[fpat->p_start_timestamp_field_index];
2166+
if ((duration_cap || start_ts_cap) && !opid_cap) {
21302167
hasher h;
21312168
h.update(line_sf);
21322169
h.to_string(tmp_opid_buf);
@@ -2144,6 +2181,21 @@ external_log_format::scan(logfile& lf,
21442181
duration = std::chrono::microseconds(
21452182
static_cast<int64_t>(dur_secs * 1000000));
21462183
}
2184+
} else if (start_ts_cap) {
2185+
exttm start_tm;
2186+
timeval start_tv;
2187+
auto dts = this->build_time_scanner();
2188+
if (dts.scan(start_ts_cap->data(),
2189+
start_ts_cap->length(),
2190+
this->get_timestamp_formats(),
2191+
&start_tm,
2192+
start_tv))
2193+
{
2194+
auto start_us = to_us(start_tv);
2195+
if (log_us > start_us) {
2196+
duration = log_us - start_us;
2197+
}
2198+
}
21472199
}
21482200
auto opid_iter
21492201
= sbc.sbc_opids.insert_op(sbc.sbc_allocator,
@@ -2491,6 +2543,27 @@ external_log_format::annotate(logfile* lf,
24912543
values.lvv_duration_value = duration;
24922544
}
24932545
}
2546+
if (!values.lvv_duration_value) {
2547+
auto start_ts_cap = md[pat.p_start_timestamp_field_index];
2548+
if (start_ts_cap) {
2549+
exttm start_tm;
2550+
timeval start_tv;
2551+
auto dts = this->build_time_scanner();
2552+
if (dts.scan(start_ts_cap->data(),
2553+
start_ts_cap->length(),
2554+
this->get_timestamp_formats(),
2555+
&start_tm,
2556+
start_tv))
2557+
{
2558+
auto start_us = to_us(start_tv);
2559+
auto end_us
2560+
= (*lf)[line_number].get_time<std::chrono::microseconds>();
2561+
if (end_us > start_us) {
2562+
values.lvv_duration_value = end_us - start_us;
2563+
}
2564+
}
2565+
}
2566+
}
24942567

24952568
for (size_t lpc = 0; lpc < pat.p_value_by_index.size(); lpc++) {
24962569
const auto& ivd = pat.p_value_by_index[lpc];
@@ -2710,6 +2783,20 @@ read_json_field(yajlpp_parse_context* ypc,
27102783
frag,
27112784
field_name)};
27122785
}
2786+
} else if (!jlu->jlu_format->lf_start_timestamp_field.empty()
2787+
&& jlu->jlu_format->lf_start_timestamp_field == field_name)
2788+
{
2789+
exttm start_tm;
2790+
timeval start_tv;
2791+
const auto* last = jlu->jlu_format->lf_date_time.scan(
2792+
(const char*) str,
2793+
len,
2794+
jlu->jlu_format->get_timestamp_formats(),
2795+
&start_tm,
2796+
start_tv);
2797+
if (last != nullptr) {
2798+
jlu->jlu_start_time = to_us(start_tv);
2799+
}
27132800
} else if (jlu->jlu_format->elf_level_pointer.pp_value != nullptr) {
27142801
if (jlu->jlu_format->elf_level_pointer.pp_value
27152802
->find_in(field_name.to_string_fragment(), PCRE2_NO_UTF_CHECK)
@@ -2871,6 +2958,20 @@ rewrite_json_field(yajlpp_parse_context* ypc,
28712958
jlu->jlu_format->get_value_meta(field_name,
28722959
value_kind_t::VALUE_TEXT),
28732960
std::string{time_buf});
2961+
} else if (!jlu->jlu_format->lf_start_timestamp_field.empty()
2962+
&& jlu->jlu_format->lf_start_timestamp_field == field_name)
2963+
{
2964+
exttm start_tm;
2965+
timeval start_tv;
2966+
const auto* last = jlu->jlu_format->lf_date_time.scan(
2967+
(const char*) str,
2968+
len,
2969+
jlu->jlu_format->get_timestamp_formats(),
2970+
&start_tm,
2971+
start_tv);
2972+
if (last != nullptr) {
2973+
jlu->jlu_start_time = to_us(start_tv);
2974+
}
28742975
} else if (vd != nullptr
28752976
&& vd->vd_meta.lvm_kind == value_kind_t::VALUE_TIMESTAMP)
28762977
{
@@ -3062,6 +3163,14 @@ external_log_format::get_subline(const log_format_file_state& lffs,
30623163
this->jlf_line_values.lvv_allocator);
30633164
}
30643165

3166+
if (jlu.jlu_start_time && jlu.jlu_end_time && !jlu.jlu_duration)
3167+
{
3168+
if (jlu.jlu_end_time.value() > jlu.jlu_start_time.value()) {
3169+
jlu.jlu_duration = jlu.jlu_end_time.value()
3170+
- jlu.jlu_start_time.value();
3171+
}
3172+
}
3173+
30653174
auto use_opid_hasher = false;
30663175
if (this->elf_opid_field.empty()
30673176
&& this->lf_opid_source.value_or(
@@ -4358,6 +4467,9 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors)
43584467
pat.p_duration_field_index = named_cap.get_index();
43594468
duration_found += 1;
43604469
}
4470+
if (name == this->lf_start_timestamp_field) {
4471+
pat.p_start_timestamp_field_index = named_cap.get_index();
4472+
}
43614473

43624474
auto value_iter = this->elf_value_defs.find(name);
43634475
if (value_iter != this->elf_value_defs.end()) {

src/log_format.hh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,7 @@ public:
328328
date_time_scanner lf_date_time;
329329
date_time_scanner lf_time_scanner;
330330
intern_string_t lf_timestamp_field{intern_string::lookup("timestamp", -1)};
331+
intern_string_t lf_start_timestamp_field;
331332
intern_string_t lf_subsecond_field;
332333
std::optional<subsecond_unit> lf_subsecond_unit;
333334
intern_string_t lf_time_field;

src/log_format_ext.hh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ public:
121121
int p_src_file_field_index{-1};
122122
int p_src_line_field_index{-1};
123123
int p_duration_field_index{-1};
124+
int p_start_timestamp_field_index{-1};
124125
int p_timestamp_end{-1};
125126
std::vector<int> p_opid_description_field_indexes;
126127
std::set<size_t> p_matched_samples;

src/log_format_loader.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1081,6 +1081,12 @@ const struct json_path_container format_handlers = {
10811081
.with_description(
10821082
"The name of the timestamp field in the log message pattern")
10831083
.for_field(&log_format::lf_timestamp_field),
1084+
json_path_handler("start-timestamp-field")
1085+
.with_description(
1086+
"The name of the field that contains the start time of the "
1087+
"operation. The timestamp-field is treated as the end time "
1088+
"and the duration is computed as the difference.")
1089+
.for_field(&log_format::lf_start_timestamp_field),
10841090
json_path_handler("subsecond-field")
10851091
.with_description("The path to the property in a JSON-lines log "
10861092
"message that contains the sub-second time value")

test/Makefile.am

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -667,6 +667,6 @@ distclean-local:
667667
$(RM_V)rm -rf ../installer-test-home
668668

669669
expected:
670-
$(top_srcdir)/update_expected_output.sh $(srcdir) $(builddir)
670+
$(top_srcdir)/update_expected_output.py $(srcdir) $(builddir)
671671

672672
.PHONY: expected

0 commit comments

Comments
 (0)