Skip to content

Commit 0b57edd

Browse files
taloricsharang
authored andcommitted
fix: when parent or child is App, fix to ms timediff
1 parent cb36d22 commit 0b57edd

1 file changed

Lines changed: 26 additions & 11 deletions

File tree

app/app/application/l7_flow_tracing.py

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3045,14 +3045,27 @@ def calculate_host_clock_correction(self, child: SpanNode,
30453045
start_time_diff = parent.flow['start_time_us'] - child.flow[
30463046
'start_time_us']
30473047
end_time_diff = parent.flow['end_time_us'] - child.flow['end_time_us']
3048-
# XXX: 先忽略这种情况,因为如果 child 覆盖 parent 时延,但覆盖了同一个方向(即同时往左或同时往右调)那也是合理的
3049-
# 不合理的情况出现在【既要往左也要往右】,这会被 tidy_host_clock_correction 修正
3050-
# XXX: temporary ignore it
3051-
# if start_time_diff > 0 and end_time_diff < 0:
3052-
# 说明 child cover parent,时延统计存在误差
3053-
# if child's duration cover parent's duration, that's maybe calculation error
3054-
# return
3055-
# 除此外,如果 start_time_diff < 0 and end_time_diff > 0,说明 parent cover child
3048+
3049+
if child.signal_source != parent.signal_source and (
3050+
child.signal_source == L7_FLOW_SIGNAL_SOURCE_OTEL
3051+
or parent.signal_source == L7_FLOW_SIGNAL_SOURCE_OTEL):
3052+
# 其中一边是 App,不计算
3053+
if child.tap_side == TAP_SIDE_APP or parent.tap_side == TAP_SIDE_APP:
3054+
return
3055+
# App & Sys Span 即使 agent_id 不一样,也可能是来自同一个 Pod(非本机 agent 接收数据),这种情况下时延差异纯粹来自于统计,而不是主机误差
3056+
# 对这种情况不应该计算误差,但仅对 App Span 有此逻辑,其他类型的数据时差一定来自主机误差
3057+
parent_instance = _get_auto_instance(parent)
3058+
if parent_instance != 0 and parent_instance == _get_auto_instance(
3059+
child):
3060+
return
3061+
# 如果其中一边是 App Span,则只计算毫秒级别的误差
3062+
# 正数向下取整,负数向上取整
3063+
start_time_diff = math.floor(start_time_diff / 1000) * 1000 \
3064+
if start_time_diff > 0 else math.ceil(start_time_diff / 1000) * 1000
3065+
end_time_diff = math.floor(end_time_diff / 1000) * 1000 \
3066+
if end_time_diff > 0 else math.ceil(end_time_diff / 1000) * 1000
3067+
3068+
# 如果 start_time_diff < 0 and end_time_diff > 0,说明 parent cover child
30563069
# 但即使是这样,也要写入 host_clock_correction 中,因为 parent 可能与 parent 的 parent 有误差,此时 child 要做重新计算
30573070
self._set_host_clock_correction(parent.agent_id, child.agent_id,
30583071
start_time_diff, end_time_diff)
@@ -3074,8 +3087,10 @@ def _set_host_clock_correction(self, host_parent: int, host_child: int,
30743087
if host_parent in self.host_relations.get(host_child, []):
30753088
parent = host_child
30763089
child = host_parent
3077-
min_allow_correction *= -1
3078-
max_allow_correction *= -1
3090+
start_time_diff *= -1
3091+
end_time_diff *= -1
3092+
min_allow_correction = min(start_time_diff, end_time_diff)
3093+
max_allow_correction = max(start_time_diff, end_time_diff)
30793094
avg_correction *= -1
30803095

30813096
self.host_relations.setdefault(parent, []).append(child)
@@ -3173,7 +3188,7 @@ def tidy_host_clock_correction(self) -> dict:
31733188
host_clock_correction_dict[child] = correction_value
31743189
for child in self.host_relations.get(child, []):
31753190
stack.append((child, correction_value))
3176-
3191+
31773192
# remove 0 value
31783193
non_zero_host_clock_correction = dict()
31793194
for host, correction in host_clock_correction_dict.items():

0 commit comments

Comments
 (0)