2020 --step 60 # alignment step in seconds for rate metrics (default 60)
2121
2222"""
23+
2324import argparse
2425import csv
2526import os
4748 "commands" : "redis.googleapis.com/commands/calls" ,
4849 "memory_usage" : "redis.googleapis.com/stats/memory/usage" ,
4950 "max_memory" : "redis.googleapis.com/stats/memory/maxmemory" ,
51+ "replication_role" : "redis.googleapis.com/replication/role" ,
5052}
5153# Valkey (Memorystore for Valkey) - use node-level for commands & usage; instance-level for size.
5254VALKEY_METRICS = {
@@ -81,6 +83,14 @@ def _pick(labels: Dict[str, str], keys) -> Optional[str]:
8183 return None
8284
8385
86+ def _point_value (point , default = 0 ):
87+ """Extract a numeric value from a GCP monitoring point, handling both int64 and double types."""
88+ try :
89+ return point .value .int64_value or point .value .double_value
90+ except Exception :
91+ return default
92+
93+
8494def _time_interval (duration_sec : int ) -> monitoring_v3 .TimeInterval :
8595 now = time .time ()
8696 seconds = int (now )
@@ -199,16 +209,7 @@ def _accumulate_commands(results, table, product_name: str, project_id: str):
199209 t = point .interval .start_time .timestamp ()
200210 if t not in entry ["points" ]:
201211 entry ["points" ][t ] = {}
202- # Support both int/double values
203- pv = 0.0
204- try :
205- pv = point .value .double_value
206- except Exception :
207- try :
208- pv = float (point .value .int64_value )
209- except Exception :
210- pv = 0.0
211- entry ["points" ][t ][cmd ] = pv
212+ entry ["points" ][t ][cmd ] = float (_point_value (point , default = 0.0 ))
212213
213214
214215def _apply_processed_categories (table ):
@@ -244,13 +245,7 @@ def _attach_memory_usage(results, table, key_name="BytesUsedForCache"):
244245 # take the max usage observed
245246 maxv = 0
246247 for point in ts .points :
247- try :
248- v = int (point .value .int64_value )
249- except Exception :
250- try :
251- v = int (point .value .double_value )
252- except Exception :
253- v = 0
248+ v = int (_point_value (point ))
254249 if v > maxv :
255250 maxv = v
256251 prev = entry .get (key_name , 0 )
@@ -270,13 +265,7 @@ def _attach_capacity_scalar(results, table, key_name="MaxMemory"):
270265 )
271266 v_max = 0
272267 for point in ts .points :
273- try :
274- v = int (point .value .int64_value )
275- except Exception :
276- try :
277- v = int (point .value .double_value )
278- except Exception :
279- v = 0
268+ v = int (_point_value (point ))
280269 if v > v_max :
281270 v_max = v
282271 if v_max > cap_by_inst [inst_key ]:
@@ -288,6 +277,30 @@ def _attach_capacity_scalar(results, table, key_name="MaxMemory"):
288277 nodes [node_id ][key_name ] = cap_by_inst [inst_key ]
289278
290279
280+ def _attach_node_role (results , table ):
281+ """Set NodeRole using the dedicated replication/role metric.
282+
283+ The 'role' label on commands/calls is metadata — not its purpose to report
284+ node role — and has been observed returning 'replica' for both nodes on
285+ Standard Tier instances (GTI-608, ~93 affected clusters).
286+
287+ replication/role is the GCP-designated metric for this: 1 = primary, 0 = replica.
288+ See: https://cloud.google.com/memorystore/docs/redis/supported-monitoring-metrics
289+ """
290+ for ts in results :
291+ rlabels = dict (ts .resource .labels )
292+ inst_key = rlabels .get ("instance_id" ) or "unknown"
293+ node_id = rlabels .get ("node_id" ) or "unknown"
294+ if inst_key not in table or node_id not in table [inst_key ]:
295+ continue
296+
297+ if not ts .points :
298+ continue
299+
300+ role_val = int (_point_value (ts .points [0 ]))
301+ table [inst_key ][node_id ]["NodeRole" ] = "Master" if role_val == 1 else "Replica"
302+
303+
291304def _flatten_rows (table , project_id : str , instance_type : str ) -> List [Dict [str , Any ]]:
292305 rows = []
293306 for inst_key , nodes in table .items ():
@@ -359,6 +372,16 @@ def collect_for_product(
359372 except Exception :
360373 pass
361374
375+ # Node role (Redis only - uses authoritative replication/role metric)
376+ if "replication_role" in metric_map :
377+ try :
378+ role_results = _list_ts (
379+ client , project_name , metric_map ["replication_role" ], interval
380+ )
381+ _attach_node_role (role_results , table )
382+ except Exception :
383+ pass
384+
362385 # Compute command categories
363386 _apply_processed_categories (table )
364387
0 commit comments