@@ -79,14 +79,46 @@ impl ReplicaConfig {
7979 let format_schema_path = data_path. clone ( ) . join ( "format_schemas" ) ;
8080 let backup_path = data_path. clone ( ) . join ( "backup" ) ;
8181 format ! (
82- "<!-- generation:{generation} -->
82+ r# "<!-- generation:{generation} -->
8383<clickhouse>
8484{logger}
8585 <path>{data_path}</path>
8686
8787 <profiles>
8888 <default>
8989 <load_balancing>random</load_balancing>
90+ <!--
91+ Omit logs for fast queries. As of this writing, the vast
92+ majority of clickhouse queries are INSERTS that succeed in
93+ <5ms, and aren't operationally interesting. From a test rack:
94+ SELECT
95+ roundToExp2(greatest(query_duration_ms, 1)) AS bucket_start,
96+ bucket_start * 2 AS bucket_end,
97+ count() AS count,
98+ round((100 * count()) / (
99+ SELECT count()
100+ FROM system.query_log
101+ ), 4) AS pct
102+ FROM system.query_log
103+ GROUP BY bucket_start, bucket_end
104+ ORDER BY bucket_start ASC
105+ ┌─bucket_start─┬─bucket_end─┬───count─┬─────pct─┐
106+ 1. │ 1 │ 2 │ 3590120 │ 62.6491 │
107+ 2. │ 2 │ 4 │ 1206074 │ 21.0465 │
108+ 3. │ 4 │ 8 │ 298972 │ 5.2172 │
109+ 4. │ 8 │ 16 │ 109739 │ 1.915 │
110+ 5. │ 16 │ 32 │ 114881 │ 2.0047 │
111+ 6. │ 32 │ 64 │ 121448 │ 2.1193 │
112+ 7. │ 64 │ 128 │ 130456 │ 2.2765 │
113+ 8. │ 128 │ 256 │ 87336 │ 1.5241 │
114+ 9. │ 256 │ 512 │ 57767 │ 1.0081 │
115+ 10. │ 512 │ 1024 │ 12327 │ 0.2151 │
116+ 11. │ 1024 │ 2048 │ 1341 │ 0.0234 │
117+ 12. │ 2048 │ 4096 │ 56 │ 0.001 │
118+ 13. │ 4096 │ 8192 │ 1 │ 0 │
119+ └──────────────┴────────────┴─────────┴─────────┘
120+ -->
121+ <log_queries_min_query_duration_ms>5</log_queries_min_query_duration_ms>
90122 </default>
91123
92124 </profiles>
@@ -122,6 +154,31 @@ impl ReplicaConfig {
122154 <flush_interval_milliseconds>10000</flush_interval_milliseconds>
123155 </query_log>
124156
157+ <!--
158+ Mask long queries generated by oximeter.
159+
160+ As of this writing, oximeter builds queries like
161+
162+ SELECT * FROM oximeter.measurements_* WHERE timeseries_key IN (...)
163+
164+ where the IN clause contains thousands of elements. A single oximeter
165+ query can generate multiple queries of this form. We observe that
166+ these queries comprise ~80% of the `system.query_log` table, which is
167+ itself one of the largest tables in terms of compressed disk use.
168+ This block truncates this long query pattern, since it's not
169+ operationally useful and consumes a significant amount of disk.
170+
171+ Note: this rule will become irrelevant if we change the metrics
172+ data model in clickhouse.
173+ -->
174+ <query_masking_rules>
175+ <rule>
176+ <name>truncate large timeseries_key IN clauses</name>
177+ <regexp>(\btimeseries_key\s+IN\s*\()[^)]{{120,}}\)</regexp>
178+ <replace>\1...)</replace>
179+ </rule>
180+ </query_masking_rules>
181+
125182 <metric_log>
126183 <database>system</database>
127184 <table>metric_log</table>
@@ -199,7 +256,7 @@ impl ReplicaConfig {
199256{keepers}
200257
201258</clickhouse>
202- "
259+ "#
203260 )
204261 }
205262}
0 commit comments