Skip to content

Commit 954cf97

Browse files
committed
Merge branch 'main' of github.com:Redislabs-Solution-Architects/msstats into gti-608-fix-clusterid-collision
# Conflicts: # memorystore.py
2 parents 3c694b6 + 76aa806 commit 954cf97

2 files changed

Lines changed: 47 additions & 25 deletions

File tree

memorystore.py

Lines changed: 46 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
"commands": "redis.googleapis.com/commands/calls",
4949
"memory_usage": "redis.googleapis.com/stats/memory/usage",
5050
"max_memory": "redis.googleapis.com/stats/memory/maxmemory",
51+
"replication_role": "redis.googleapis.com/replication/role",
5152
}
5253
# Valkey (Memorystore for Valkey) - use node-level for commands & usage; instance-level for size.
5354
VALKEY_METRICS = {
@@ -102,6 +103,14 @@ def _resolve_inst_key(rlabels: dict, project_id: str = "") -> str:
102103
return f"{project_id}/{raw_id}" if project_id else raw_id
103104

104105

106+
def _point_value(point, default=0):
107+
"""Extract a numeric value from a GCP monitoring point, handling both int64 and double types."""
108+
try:
109+
return point.value.int64_value or point.value.double_value
110+
except Exception:
111+
return default
112+
113+
105114
def _time_interval(duration_sec: int) -> monitoring_v3.TimeInterval:
106115
now = time.time()
107116
seconds = int(now)
@@ -215,16 +224,7 @@ def _accumulate_commands(results, table, product_name: str, project_id: str):
215224
t = point.interval.start_time.timestamp()
216225
if t not in entry["points"]:
217226
entry["points"][t] = {}
218-
# Support both int/double values
219-
pv = 0.0
220-
try:
221-
pv = point.value.double_value
222-
except Exception:
223-
try:
224-
pv = float(point.value.int64_value)
225-
except Exception:
226-
pv = 0.0
227-
entry["points"][t][cmd] = pv
227+
entry["points"][t][cmd] = float(_point_value(point, default=0.0))
228228

229229

230230
def _apply_processed_categories(table):
@@ -255,13 +255,7 @@ def _attach_memory_usage(results, table, project_id="", key_name="BytesUsedForCa
255255
# take the max usage observed
256256
maxv = 0
257257
for point in ts.points:
258-
try:
259-
v = int(point.value.int64_value)
260-
except Exception:
261-
try:
262-
v = int(point.value.double_value)
263-
except Exception:
264-
v = 0
258+
v = int(_point_value(point))
265259
if v > maxv:
266260
maxv = v
267261
prev = entry.get(key_name, 0)
@@ -276,13 +270,7 @@ def _attach_capacity_scalar(results, table, project_id="", key_name="MaxMemory")
276270
inst_key = _resolve_inst_key(rlabels, project_id)
277271
v_max = 0
278272
for point in ts.points:
279-
try:
280-
v = int(point.value.int64_value)
281-
except Exception:
282-
try:
283-
v = int(point.value.double_value)
284-
except Exception:
285-
v = 0
273+
v = int(_point_value(point))
286274
if v > v_max:
287275
v_max = v
288276
if v_max > cap_by_inst[inst_key]:
@@ -294,6 +282,30 @@ def _attach_capacity_scalar(results, table, project_id="", key_name="MaxMemory")
294282
nodes[node_id][key_name] = cap_by_inst[inst_key]
295283

296284

285+
def _attach_node_role(results, table, project_id=""):
286+
"""Set NodeRole using the dedicated replication/role metric.
287+
288+
The 'role' label on commands/calls is metadata — not its purpose to report
289+
node role — and has been observed returning 'replica' for both nodes on
290+
Standard Tier instances (GTI-608, ~93 affected clusters).
291+
292+
replication/role is the GCP-designated metric for this: 1 = primary, 0 = replica.
293+
See: https://cloud.google.com/memorystore/docs/redis/supported-monitoring-metrics
294+
"""
295+
for ts in results:
296+
rlabels = dict(ts.resource.labels)
297+
inst_key = _resolve_inst_key(rlabels, project_id)
298+
node_id = rlabels.get("node_id") or rlabels.get("shard_id") or "unknown"
299+
if inst_key not in table or node_id not in table[inst_key]:
300+
continue
301+
302+
if not ts.points:
303+
continue
304+
305+
role_val = int(_point_value(ts.points[0]))
306+
table[inst_key][node_id]["NodeRole"] = "Master" if role_val == 1 else "Replica"
307+
308+
297309
def _flatten_rows(table, project_id: str, instance_type: str) -> List[Dict[str, Any]]:
298310
rows = []
299311
for inst_key, nodes in table.items():
@@ -367,6 +379,16 @@ def collect_for_product(
367379
except Exception:
368380
pass
369381

382+
# Node role (Redis only - uses authoritative replication/role metric)
383+
if "replication_role" in metric_map:
384+
try:
385+
role_results = _list_ts(
386+
client, project_name, metric_map["replication_role"], interval
387+
)
388+
_attach_node_role(role_results, table, project_id=project_id)
389+
except Exception:
390+
pass
391+
370392
# Compute command categories
371393
_apply_processed_categories(table)
372394

msstats.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -867,7 +867,7 @@ def main():
867867
help="Duration of the metric window in seconds. Default is 604800 (7 days).",
868868
)
869869

870-
(options, _) = parser.parse_args()
870+
options, _ = parser.parse_args()
871871

872872
if not os.path.isdir(options.outDir):
873873
os.makedirs(options.outDir)

0 commit comments

Comments
 (0)