Skip to content

Commit 5af8047

Browse files
authored
[clickhouse] replicated and single-node config should match (#10454)
Follow up to #10443 and #10450 . We should keep both configs in sync
1 parent 15353b1 commit 5af8047

2 files changed

Lines changed: 116 additions & 2 deletions

File tree

clickhouse-admin/types/testutils/replica-server-config.xml

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,38 @@
1414
<profiles>
1515
<default>
1616
<load_balancing>random</load_balancing>
17+
<!--
18+
Omit logs for fast queries. As of this writing, the vast
19+
majority of clickhouse queries are INSERTS that succeed in
20+
<5ms, and aren't operationally interesting. From a test rack:
21+
SELECT
22+
roundToExp2(greatest(query_duration_ms, 1)) AS bucket_start,
23+
bucket_start * 2 AS bucket_end,
24+
count() AS count,
25+
round((100 * count()) / (
26+
SELECT count()
27+
FROM system.query_log
28+
), 4) AS pct
29+
FROM system.query_log
30+
GROUP BY bucket_start, bucket_end
31+
ORDER BY bucket_start ASC
32+
┌─bucket_start─┬─bucket_end─┬───count─┬─────pct─┐
33+
1. │ 1 │ 2 │ 3590120 │ 62.6491 │
34+
2. │ 2 │ 4 │ 1206074 │ 21.0465 │
35+
3. │ 4 │ 8 │ 298972 │ 5.2172 │
36+
4. │ 8 │ 16 │ 109739 │ 1.915 │
37+
5. │ 16 │ 32 │ 114881 │ 2.0047 │
38+
6. │ 32 │ 64 │ 121448 │ 2.1193 │
39+
7. │ 64 │ 128 │ 130456 │ 2.2765 │
40+
8. │ 128 │ 256 │ 87336 │ 1.5241 │
41+
9. │ 256 │ 512 │ 57767 │ 1.0081 │
42+
10. │ 512 │ 1024 │ 12327 │ 0.2151 │
43+
11. │ 1024 │ 2048 │ 1341 │ 0.0234 │
44+
12. │ 2048 │ 4096 │ 56 │ 0.001 │
45+
13. │ 4096 │ 8192 │ 1 │ 0 │
46+
└──────────────┴────────────┴─────────┴─────────┘
47+
-->
48+
<log_queries_min_query_duration_ms>5</log_queries_min_query_duration_ms>
1749
</default>
1850

1951
</profiles>
@@ -49,6 +81,31 @@
4981
<flush_interval_milliseconds>10000</flush_interval_milliseconds>
5082
</query_log>
5183

84+
<!--
85+
Mask long queries generated by oximeter.
86+
87+
As of this writing, oximeter builds queries like
88+
89+
SELECT * FROM oximeter.measurements_* WHERE timeseries_key IN (...)
90+
91+
where the IN clause contains thousands of elements. A single oximeter
92+
query can generate multiple queries of this form. We observe that
93+
these queries comprise ~80% of the `system.query_log` table, which is
94+
itself one of the largest tables in terms of compressed disk use.
95+
This block truncates this long query pattern, since it's not
96+
operationally useful and consumes a significant amount of disk.
97+
98+
Note: this rule will become irrelevant if we change the metrics
99+
data model in clickhouse.
100+
-->
101+
<query_masking_rules>
102+
<rule>
103+
<name>truncate large timeseries_key IN clauses</name>
104+
<regexp>(\btimeseries_key\s+IN\s*\()[^)]{120,}\)</regexp>
105+
<replace>\1...)</replace>
106+
</rule>
107+
</query_masking_rules>
108+
52109
<metric_log>
53110
<database>system</database>
54111
<table>metric_log</table>

clickhouse-admin/types/versions/src/impls/config.rs

Lines changed: 59 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,14 +79,46 @@ impl ReplicaConfig {
7979
let format_schema_path = data_path.clone().join("format_schemas");
8080
let backup_path = data_path.clone().join("backup");
8181
format!(
82-
"<!-- generation:{generation} -->
82+
r#"<!-- generation:{generation} -->
8383
<clickhouse>
8484
{logger}
8585
<path>{data_path}</path>
8686
8787
<profiles>
8888
<default>
8989
<load_balancing>random</load_balancing>
90+
<!--
91+
Omit logs for fast queries. As of this writing, the vast
92+
majority of clickhouse queries are INSERTS that succeed in
93+
<5ms, and aren't operationally interesting. From a test rack:
94+
SELECT
95+
roundToExp2(greatest(query_duration_ms, 1)) AS bucket_start,
96+
bucket_start * 2 AS bucket_end,
97+
count() AS count,
98+
round((100 * count()) / (
99+
SELECT count()
100+
FROM system.query_log
101+
), 4) AS pct
102+
FROM system.query_log
103+
GROUP BY bucket_start, bucket_end
104+
ORDER BY bucket_start ASC
105+
┌─bucket_start─┬─bucket_end─┬───count─┬─────pct─┐
106+
1. │ 1 │ 2 │ 3590120 │ 62.6491 │
107+
2. │ 2 │ 4 │ 1206074 │ 21.0465 │
108+
3. │ 4 │ 8 │ 298972 │ 5.2172 │
109+
4. │ 8 │ 16 │ 109739 │ 1.915 │
110+
5. │ 16 │ 32 │ 114881 │ 2.0047 │
111+
6. │ 32 │ 64 │ 121448 │ 2.1193 │
112+
7. │ 64 │ 128 │ 130456 │ 2.2765 │
113+
8. │ 128 │ 256 │ 87336 │ 1.5241 │
114+
9. │ 256 │ 512 │ 57767 │ 1.0081 │
115+
10. │ 512 │ 1024 │ 12327 │ 0.2151 │
116+
11. │ 1024 │ 2048 │ 1341 │ 0.0234 │
117+
12. │ 2048 │ 4096 │ 56 │ 0.001 │
118+
13. │ 4096 │ 8192 │ 1 │ 0 │
119+
└──────────────┴────────────┴─────────┴─────────┘
120+
-->
121+
<log_queries_min_query_duration_ms>5</log_queries_min_query_duration_ms>
90122
</default>
91123
92124
</profiles>
@@ -122,6 +154,31 @@ impl ReplicaConfig {
122154
<flush_interval_milliseconds>10000</flush_interval_milliseconds>
123155
</query_log>
124156
157+
<!--
158+
Mask long queries generated by oximeter.
159+
160+
As of this writing, oximeter builds queries like
161+
162+
SELECT * FROM oximeter.measurements_* WHERE timeseries_key IN (...)
163+
164+
where the IN clause contains thousands of elements. A single oximeter
165+
query can generate multiple queries of this form. We observe that
166+
these queries comprise ~80% of the `system.query_log` table, which is
167+
itself one of the largest tables in terms of compressed disk use.
168+
This block truncates this long query pattern, since it's not
169+
operationally useful and consumes a significant amount of disk.
170+
171+
Note: this rule will become irrelevant if we change the metrics
172+
data model in clickhouse.
173+
-->
174+
<query_masking_rules>
175+
<rule>
176+
<name>truncate large timeseries_key IN clauses</name>
177+
<regexp>(\btimeseries_key\s+IN\s*\()[^)]{{120,}}\)</regexp>
178+
<replace>\1...)</replace>
179+
</rule>
180+
</query_masking_rules>
181+
125182
<metric_log>
126183
<database>system</database>
127184
<table>metric_log</table>
@@ -199,7 +256,7 @@ impl ReplicaConfig {
199256
{keepers}
200257
201258
</clickhouse>
202-
"
259+
"#
203260
)
204261
}
205262
}

0 commit comments

Comments
 (0)