|
| 1 | +import click |
| 2 | +import sys |
| 3 | + |
| 4 | +from swsscommon import swsscommon |
| 5 | +from tabulate import tabulate |
| 6 | + |
| 7 | + |
| 8 | +TASK_STATS_QUERY_CHANNEL = "ORCH_TASK_STATS_QUERY" |
| 9 | +TASK_STATS_REPLY_CHANNEL = "ORCH_TASK_STATS_REPLY" |
| 10 | +REPLY_TIMEOUT_MS = 10000 |
| 11 | + |
| 12 | + |
| 13 | +def _ms(ns): |
| 14 | + """Convert ns -> ms as a float (no unit suffix).""" |
| 15 | + return ns / 1_000_000.0 |
| 16 | + |
| 17 | + |
| 18 | +def _fmt_quartet(median_ns, q1_ns, q3_ns, max_ns): |
| 19 | + """Render a 'median/q1/q3/max' quartet in ms, two decimals each.""" |
| 20 | + return (f"{_ms(median_ns):.2f}/" |
| 21 | + f"{_ms(q1_ns):.2f}/" |
| 22 | + f"{_ms(q3_ns):.2f}/" |
| 23 | + f"{_ms(max_ns):.2f}") |
| 24 | + |
| 25 | + |
| 26 | +def _query_orchagent(op): |
| 27 | + """Send a query to orchagent over APPL_DB notification channels and |
| 28 | + return (op_ret, data_ret, fvs) on success. |
| 29 | +
|
| 30 | + Raises RuntimeError on timeout or transport error. |
| 31 | + """ |
| 32 | + db = swsscommon.DBConnector("APPL_DB", 0) |
| 33 | + producer = swsscommon.NotificationProducer(db, TASK_STATS_QUERY_CHANNEL) |
| 34 | + consumer = swsscommon.NotificationConsumer(db, TASK_STATS_REPLY_CHANNEL) |
| 35 | + |
| 36 | + sel = swsscommon.Select() |
| 37 | + sel.addSelectable(consumer) |
| 38 | + |
| 39 | + producer.send(op, "", swsscommon.FieldValuePairs([])) |
| 40 | + |
| 41 | + state, _ = sel.select(REPLY_TIMEOUT_MS) |
| 42 | + if state == swsscommon.Select.TIMEOUT: |
| 43 | + raise RuntimeError( |
| 44 | + f"Timed out after {REPLY_TIMEOUT_MS} ms waiting for orchagent reply") |
| 45 | + if state != swsscommon.Select.OBJECT: |
| 46 | + raise RuntimeError(f"Select error waiting for orchagent reply: {state}") |
| 47 | + |
| 48 | + op_ret, data_ret, fvs = consumer.pop() |
| 49 | + if op_ret != "ok": |
| 50 | + raise RuntimeError( |
| 51 | + f"orchagent returned error: op={op_ret} data={data_ret}") |
| 52 | + return op_ret, data_ret, fvs |
| 53 | + |
| 54 | + |
| 55 | +def _parse_stats(fvs): |
| 56 | + """Parse the FieldValueTuples returned by orchagent into a list of |
| 57 | + dicts. Each value is 14 pipe-separated fields: |
| 58 | + count | total_run_ns |
| 59 | + | median_run_ns | q1_run_ns | q3_run_ns | max_run_ns |
| 60 | + | high_outliers | low_outliers |
| 61 | + | sched_count | total_sched_ns |
| 62 | + | median_sched_ns | q1_sched_ns | q3_sched_ns | max_sched_ns |
| 63 | + """ |
| 64 | + rows = [] |
| 65 | + for name, blob in fvs: |
| 66 | + parts = blob.split("|") |
| 67 | + if len(parts) != 14: |
| 68 | + # Skip malformed rows rather than failing the whole table. |
| 69 | + continue |
| 70 | + try: |
| 71 | + count = int(parts[0]) |
| 72 | + total_ns = int(parts[1]) |
| 73 | + median_ns = int(parts[2]) |
| 74 | + q1_ns = int(parts[3]) |
| 75 | + q3_ns = int(parts[4]) |
| 76 | + max_ns = int(parts[5]) |
| 77 | + high_outliers = int(parts[6]) |
| 78 | + low_outliers = int(parts[7]) |
| 79 | + sched_count = int(parts[8]) |
| 80 | + total_sched_ns = int(parts[9]) |
| 81 | + sched_median_ns = int(parts[10]) |
| 82 | + sched_q1_ns = int(parts[11]) |
| 83 | + sched_q3_ns = int(parts[12]) |
| 84 | + sched_max_ns = int(parts[13]) |
| 85 | + except ValueError: |
| 86 | + continue |
| 87 | + rows.append({ |
| 88 | + "name": name, |
| 89 | + "count": count, |
| 90 | + "total_ns": total_ns, |
| 91 | + "median_ns": median_ns, |
| 92 | + "q1_ns": q1_ns, |
| 93 | + "q3_ns": q3_ns, |
| 94 | + "max_ns": max_ns, |
| 95 | + "high_outliers": high_outliers, |
| 96 | + "low_outliers": low_outliers, |
| 97 | + "sched_count": sched_count, |
| 98 | + "total_sched_ns": total_sched_ns, |
| 99 | + "sched_median_ns": sched_median_ns, |
| 100 | + "sched_q1_ns": sched_q1_ns, |
| 101 | + "sched_q3_ns": sched_q3_ns, |
| 102 | + "sched_max_ns": sched_max_ns, |
| 103 | + }) |
| 104 | + return rows |
| 105 | + |
| 106 | + |
| 107 | +def _render_table(rows): |
| 108 | + # Sort: total_ns descending, ties broken by name ascending. |
| 109 | + rows = sorted(rows, |
| 110 | + key=lambda r: (-r["total_ns"], r["name"])) |
| 111 | + |
| 112 | + table = [] |
| 113 | + for r in rows: |
| 114 | + if r["count"] == 0: |
| 115 | + run_quartet = "-" |
| 116 | + total_run = "-" |
| 117 | + else: |
| 118 | + run_quartet = _fmt_quartet(r["median_ns"], |
| 119 | + r["q1_ns"], |
| 120 | + r["q3_ns"], |
| 121 | + r["max_ns"]) |
| 122 | + total_run = f"{_ms(r['total_ns']):.2f}" |
| 123 | + |
| 124 | + if r["sched_count"] == 0: |
| 125 | + sched_quartet = "-" |
| 126 | + total_sched = "-" |
| 127 | + else: |
| 128 | + sched_quartet = _fmt_quartet(r["sched_median_ns"], |
| 129 | + r["sched_q1_ns"], |
| 130 | + r["sched_q3_ns"], |
| 131 | + r["sched_max_ns"]) |
| 132 | + total_sched = f"{_ms(r['total_sched_ns']):.2f}" |
| 133 | + |
| 134 | + # TOTAL column shows "<run>/<sched>" so a viewer can see at a |
| 135 | + # glance how much wall-clock the loop spent inside the task vs |
| 136 | + # waiting before scheduling it. |
| 137 | + total_str = f"{total_run}/{total_sched}" |
| 138 | + |
| 139 | + outliers = r["high_outliers"] + r["low_outliers"] |
| 140 | + |
| 141 | + table.append([ |
| 142 | + r["name"], |
| 143 | + run_quartet, |
| 144 | + r["count"], |
| 145 | + outliers, |
| 146 | + sched_quartet, |
| 147 | + total_str, |
| 148 | + ]) |
| 149 | + |
| 150 | + headers = [ |
| 151 | + "TASK", |
| 152 | + "RUN TIME\nmedian/q1/q3/max\n(in msec)", |
| 153 | + "RUNS", |
| 154 | + "OUTLIERS", |
| 155 | + "SCHED LATENCY\nmedian/q1/q3/max\n(in msec)", |
| 156 | + "TOTAL\nrun/sched\n(in msec)", |
| 157 | + ] |
| 158 | + return tabulate(table, headers=headers, tablefmt="plain") |
| 159 | + |
| 160 | + |
| 161 | +@click.group() |
| 162 | +def orchagent(): |
| 163 | + """Show orchagent runtime information""" |
| 164 | + pass |
| 165 | + |
| 166 | + |
| 167 | +@orchagent.command("tasks") |
| 168 | +def tasks(): |
| 169 | + """Show per-Executor execution timing in orchagent""" |
| 170 | + try: |
| 171 | + _, _, fvs = _query_orchagent("show") |
| 172 | + except RuntimeError as e: |
| 173 | + click.echo(f"Error: {e}", err=True) |
| 174 | + sys.exit(1) |
| 175 | + |
| 176 | + rows = _parse_stats(fvs) |
| 177 | + click.echo(_render_table(rows)) |
0 commit comments