Skip to content

Commit 911cefa

Browse files
authored
Update materialized view monitoring and system views documentation for introspection state log changes (#596)
1 parent 1d5450a commit 911cefa

2 files changed

Lines changed: 137 additions & 20 deletions

File tree

docs/materialized-view-monitoring.md

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ WHERE type = 'MaterializedView'
3636

3737
## States
3838

39-
The `system.stream_state_log` stream contains detailed state information that can be used for monitoring and alerting.
39+
In Timeplus Enterprise 3.0 and later, the `system.introspection_state_log` stream contains detailed state information for monitoring and alerting. In earlier releases, use `system.stream_state_log` instead.
4040

4141
Supported states include:
4242

@@ -57,15 +57,21 @@ Supported states include:
5757
SELECT
5858
*
5959
FROM
60-
table(system.stream_state_log)
60+
table(system.introspection_state_log)
6161
WHERE
62-
dimension = 'materialized_view'
62+
starts_with(dimension, 'materialized_view')
6363
AND state_name IN ('end_sn', 'processed_sn');
6464
```
6565

6666
## System Built-in Views
6767

68-
Timeplus provides built-in system views to help monitor Materialized Views:
68+
Timeplus provides built-in system views to help monitor and debug Materialized Views:
6969

7070
- `system.v_failed_mat_views`: Tracks Materialized Views that have failed.
71-
- `system.v_mat_view_lags`: Shows Materialized Views with processing lag.
71+
- `system.v_mat_view_lags`: Shows Materialized Views with processing lag.
72+
- `system.v_storages`: Shows stream and checkpoint storage usage, which is useful when a Materialized View is accumulating checkpoint data.
73+
- `system.v_stream_applied_lags`: Shows per-node applied lag for replicated streams when a Materialized View is blocked by upstream replication or storage apply delay.
74+
75+
For the full list of built-in troubleshooting views, see [Views in system namespace](/system-views).
76+
77+
If you are running a 2.x release, replace `system.introspection_state_log` in the examples above with `system.stream_state_log`.

docs/system-views.md

Lines changed: 126 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# Views in system namespace
22
Timeplus provides system views that enable effective troubleshooting and monitoring of your streaming data operations.
33

4+
In Timeplus Enterprise 3.0 and later, these built-in views are defined on `system.introspection_state_log`. If you are running an older 2.x release, the equivalent state stream is `system.stream_state_log`.
5+
46
## v_failed_mat_views
57
```sql
68
CREATE OR REPLACE VIEW system.v_failed_mat_views
@@ -9,16 +11,16 @@ WITH running_mvs_in_last_5m AS
911
(
1012
SELECT
1113
database, name
12-
FROM system.stream_state_log
13-
WHERE (_tp_time > (now() - 5m)) AND (dimension = 'materialized_view') AND (state_name = 'status') AND (state_string_value = 'ExecutingPipeline')
14+
FROM system.introspection_state_log
15+
WHERE (_tp_time > (now() - 5m)) AND starts_with(dimension, 'materialized_view') AND (state_name = 'status') AND (state_string_value = 'Executing')
1416
ORDER BY _tp_time DESC -- order here to make sure we have the latest state
1517
SETTINGS query_mode = 'table'
1618
)
1719
SELECT database, name, state_string_value AS state, _tp_time
18-
FROM system.stream_state_log
19-
WHERE (_tp_time > (now() - 5m)) AND (dimension = 'materialized_view') AND (state_name = 'status') AND NOT ((database, name) IN running_mvs_in_last_5m)
20+
FROM system.introspection_state_log
21+
WHERE (_tp_time > (now() - 5m)) AND starts_with(dimension, 'materialized_view') AND (state_name = 'status') AND NOT ((database, name) IN running_mvs_in_last_5m)
2022
SETTINGS query_mode = 'table'
21-
COMMENT 'version 2';
23+
COMMENT 'version 3';
2224
```
2325

2426
## v_mat_view_lags
@@ -28,7 +30,7 @@ AS
2830
WITH last_5m_progressing_status AS
2931
(
3032
SELECT database, name, state_name, dimension, state_value, _tp_time AS ts
31-
FROM system.stream_state_log
33+
FROM system.introspection_state_log
3234
WHERE (_tp_time > (now() - 5m)) AND (state_name IN ('processed_sn', 'ckpt_sn', 'end_sn'))
3335
ORDER BY _tp_time DESC -- order here to make sure we have latest state
3436
SETTINGS query_mode = 'table'
@@ -67,7 +69,7 @@ SELECT
6769
if (ckpt_sn != 0 AND processed_sn != 0, processed_sn - ckpt_sn, 0) AS ckpt_lag,
6870
ts
6971
FROM mv_lagging_aggr_per_mv
70-
COMMENT 'version 2';
72+
COMMENT 'version 3';
7173
```
7274

7375
## v_no_leader_shards
@@ -77,23 +79,23 @@ AS
7779
WITH last_5m_stream_shards AS
7880
(
7981
SELECT database, name, dimension AS shard
80-
FROM system.stream_state_log
82+
FROM system.introspection_state_log
8183
WHERE _tp_time > (now() - 5m) AND state_name = 'committed_sn'
8284
GROUP BY database, name, dimension
8385
SETTINGS query_mode = 'table'
8486
),
8587
last_5m_quorum_status AS
8688
(
8789
SELECT database, name, dimension AS shard
88-
FROM system.stream_state_log
90+
FROM system.introspection_state_log
8991
WHERE _tp_time > (now() - 5m) AND state_name = 'quorum_replication_status'
9092
GROUP BY database, name, dimension
9193
SETTINGS query_mode = 'table'
9294
)
9395
SELECT database, name, shard
9496
FROM last_5m_stream_shards
9597
WHERE (database, name, shard) NOT IN last_5m_quorum_status
96-
COMMENT 'version 1';
98+
COMMENT 'version 2';
9799
```
98100

99101
## v_replication_lags
@@ -111,7 +113,7 @@ WITH recent_replication_statuses AS
111113
array_map(x -> to_uint64(x:node), replica_statuses) AS replica_nodes,
112114
map_cast(array_map(x -> to_uint64(x:node), replica_statuses), replica_statuses) AS replicas_map,
113115
_tp_time AS ts
114-
FROM system.stream_state_log
116+
FROM system.introspection_state_log
115117
WHERE (_tp_time > (now() - 5m)) AND (state_name = 'quorum_replication_status')
116118
ORDER BY _tp_time DESC
117119
SETTINGS query_mode = 'table'
@@ -138,7 +140,7 @@ SELECT
138140
to_int64((replicas_map[leader_node]):next_sn) - to_int64((replicas_map[replica_node]):next_sn) AS lagging,
139141
ts
140142
FROM latest_replication_statuses
141-
COMMENT 'version 2';
143+
COMMENT 'version 3';
142144
```
143145

144146
## v_shard_leaders
@@ -148,15 +150,15 @@ AS
148150
WITH last_5m_quorum_status AS
149151
(
150152
SELECT database, name, dimension AS shard, node_id AS leader, _tp_time AS ts
151-
FROM system.stream_state_log
153+
FROM system.introspection_state_log
152154
WHERE _tp_time > (now() - 5m) AND state_name = 'quorum_replication_status'
153155
ORDER BY _tp_time DESC -- order here to make sure we have latest state
154156
SETTINGS query_mode = 'table'
155157
)
156158
SELECT database, name, shard, earliest(leader) AS leader, earliest(ts) AS ts
157159
FROM last_5m_quorum_status
158160
GROUP BY database, name, shard
159-
COMMENT 'version 1';
161+
COMMENT 'version 2';
160162
```
161163

162164
## v_under_replication_replicas
@@ -174,7 +176,7 @@ WITH recent_replication_statuses AS
174176
array_map(x -> to_uint64(x:node), replica_statuses) AS replica_nodes,
175177
map_from_arrays(array_map(x -> to_uint64(x:node), replica_statuses), replica_statuses) AS replicas_map,
176178
_tp_time AS ts
177-
FROM system.stream_state_log
179+
FROM system.introspection_state_log
178180
WHERE (_tp_time > (now() - 5m)) AND (state_name = 'quorum_replication_status')
179181
ORDER BY _tp_time DESC
180182
SETTINGS query_mode = 'table'
@@ -195,5 +197,114 @@ latest_replication_statuses AS
195197
SELECT database, name, shard, leader_node, replica_node, (replicas_map[replica_node]):state AS state, ts
196198
FROM latest_replication_statuses
197199
WHERE state != 'Replicate'
200+
COMMENT 'version 3';
201+
```
202+
203+
## v_stream_applied_lags
204+
```sql
205+
CREATE OR REPLACE VIEW system.v_stream_applied_lags
206+
AS
207+
WITH applied_sn_data AS
208+
(
209+
SELECT database, name, node_id, state_value AS applied_sn, _tp_time AS ts
210+
FROM system.introspection_state_log
211+
WHERE (_tp_time > (now() - 5m)) AND (state_name = 'applied_sn')
212+
ORDER BY _tp_time DESC
213+
SETTINGS query_mode = 'table'
214+
),
215+
latest_applied_sn_per_node AS
216+
(
217+
SELECT database, name, node_id,
218+
arg_max(applied_sn, ts) AS applied_sn,
219+
arg_max(ts, ts) AS latest_ts
220+
FROM applied_sn_data
221+
GROUP BY database, name, node_id
222+
),
223+
quorum_replication_data AS
224+
(
225+
SELECT
226+
database,
227+
name,
228+
state_string_value:shard AS shard,
229+
node_id AS leader_node,
230+
state_string_value:shard_replication_statuses[*] AS replica_statuses,
231+
array_map(x -> to_uint64(x:node), replica_statuses) AS replica_nodes,
232+
map_cast(array_map(x -> to_uint64(x:node), replica_statuses), replica_statuses) AS replicas_map,
233+
_tp_time AS ts
234+
FROM system.introspection_state_log
235+
WHERE (_tp_time > (now() - 5m)) AND (state_name = 'quorum_replication_status')
236+
ORDER BY _tp_time DESC
237+
SETTINGS query_mode = 'table'
238+
),
239+
latest_replication_statuses AS
240+
(
241+
SELECT
242+
database,
243+
name,
244+
to_int(shard) AS shard,
245+
arg_max(leader_node, ts) AS leader_node,
246+
arg_max(replica_nodes, ts) AS replica_nodes,
247+
arg_max(replicas_map, ts) AS replicas_map,
248+
arg_max(ts, ts) AS latest_ts
249+
FROM quorum_replication_data
250+
GROUP BY database, name, shard
251+
),
252+
combined_sn_data AS
253+
(
254+
SELECT
255+
a.database,
256+
a.name,
257+
a.node_id,
258+
a.applied_sn,
259+
r.shard,
260+
to_int64((r.replicas_map[a.node_id]):next_sn) - 1 AS log_replicated_sn,
261+
log_replicated_sn - a.applied_sn AS lags
262+
FROM latest_applied_sn_per_node a
263+
JOIN latest_replication_statuses r ON a.database = r.database AND a.name = r.name
264+
)
265+
SELECT
266+
database,
267+
name,
268+
shard,
269+
node_id,
270+
log_replicated_sn,
271+
applied_sn AS storage_applied_sn,
272+
lags
273+
FROM combined_sn_data
274+
ORDER BY lags DESC
275+
COMMENT 'version 1';
276+
```
277+
278+
## v_storages
279+
```sql
280+
CREATE OR REPLACE VIEW system.v_storages
281+
AS
282+
WITH storage_sizes AS
283+
(
284+
SELECT
285+
database, name, uuid, dimension AS store_type, any(state_string_value) AS stream_type, sum(state_value) AS total_bytes
286+
FROM
287+
system.introspection_state_log
288+
WHERE
289+
(_tp_time > (now() - 5m)) AND (state_name = 'disk_size') AND (database != 'neutron')
290+
GROUP BY
291+
database, name, uuid, dimension
292+
UNION ALL
293+
SELECT
294+
database, name, uuid, 'ckpt_size' AS store_type, dimension AS stream_type, sum(state_value) AS total_bytes
295+
FROM
296+
system.introspection_state_log
297+
WHERE
298+
(_tp_time > (now() - 5m)) AND state_name = 'checkpoint_storage_size'
299+
GROUP BY
300+
database, name, uuid, dimension
301+
)
302+
SELECT
303+
database, name, uuid, store_type, stream_type, total_bytes
304+
FROM
305+
storage_sizes
306+
ORDER BY
307+
total_bytes DESC
308+
SETTINGS query_mode = 'table'
198309
COMMENT 'version 2';
199310
```

0 commit comments

Comments
 (0)