Skip to content

Commit 801928b

Browse files
OTel/DDog updates 2
Signed-off-by: Lukasz Gryglicki <lgryglicki@cncf.io> Assisted by [OpenAI](https://platform.openai.com/) Assisted by [GitHub Copilot](https://github.com/features/copilot)
1 parent 9232fe1 commit 801928b

5 files changed

Lines changed: 96 additions & 11 deletions

File tree

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,7 @@ cla-backend-go/golang-api.log
276276
utils/otel_dd_go/otel_dd
277277
audit.json
278278
spans*.json
279-
*api_usage.csv
279+
*api_usage*.csv
280280

281281
*.exe
282282
*.exe~

cla-backend-legacy/internal/server/server.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"os"
99
"strings"
1010
"sync"
11+
"time"
1112

1213
"github.com/linuxfoundation/easycla/cla-backend-legacy/internal/api"
1314
"github.com/linuxfoundation/easycla/cla-backend-legacy/internal/logging"
@@ -116,6 +117,13 @@ func wrapHTTPHandlerWithTelemetryBestEffort(next http.Handler) (wrapped http.Han
116117
return telemetry.WrapHTTPHandler(next)
117118
}
118119

120+
func flushTelemetryAfterResponse(next http.Handler) http.Handler {
121+
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
122+
next.ServeHTTP(w, r)
123+
telemetry.ForceFlushBestEffort(250 * time.Millisecond)
124+
})
125+
}
126+
119127
// NewHTTPHandler builds the HTTP handler for both Lambda (via adapter) and local runs.
120128
//
121129
// Note: router-level middleware already handles request logging and CORS.
@@ -126,5 +134,5 @@ func NewHTTPHandler() http.Handler {
126134
if !otelEnabled {
127135
return router
128136
}
129-
return wrapHTTPHandlerWithTelemetryBestEffort(router)
137+
return flushTelemetryAfterResponse(wrapHTTPHandlerWithTelemetryBestEffort(router))
130138
}

cla-backend-legacy/internal/telemetry/datadog_otlp.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,32 @@ func InitDatadogOTel(cfg DatadogOTelConfig) (err error) {
138138
return ddInitErr
139139
}
140140

141+
type forceFlusher interface {
142+
ForceFlush(context.Context) error
143+
}
144+
145+
// ForceFlushBestEffort asks the global tracer provider to flush queued spans.
146+
// This is intentionally fail-open for Lambda: flushing must never affect API behavior.
147+
func ForceFlushBestEffort(timeout time.Duration) {
148+
defer func() {
149+
if r := recover(); r != nil {
150+
log.Warnf("LG:otel-datadog-forceflush-panic recovered=%v", r)
151+
}
152+
}()
153+
154+
ff, ok := otel.GetTracerProvider().(forceFlusher)
155+
if !ok {
156+
return
157+
}
158+
159+
ctx, cancel := context.WithTimeout(context.Background(), timeout)
160+
defer cancel()
161+
162+
if err := ff.ForceFlush(ctx); err != nil {
163+
log.Debugf("LG:otel-datadog-forceflush-err err=%v", err)
164+
}
165+
}
166+
141167
// NewHTTPClient returns an HTTP client whose outbound requests are instrumented
142168
// and propagate trace context when OTel is enabled.
143169
//

utils/otel_dd/api_usage_stats_ddog.py

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
Example:
2121
./utils/otel_dd/api_usage_stats_ddog.py --from now-60m --to now > api_usage.csv
2222
./utils/otel_dd/api_usage_stats_ddog.py --no-skip-e2e | head
23-
./utils/otel_dd/api_usage_stats_ddog.py --from now-24h --to now > api_usage.csv
23+
./utils/otel_dd/api_usage_stats_ddog.py --from now-24h --to now --env prod > api_usage.csv
2424
./utils/otel_dd/api_usage_stats_ddog.py --verbose | head
2525
"""
2626

@@ -34,6 +34,7 @@
3434
import re
3535
import sys
3636
import urllib.error
37+
import urllib.parse
3738
import urllib.request
3839
from typing import Any, Dict, List, Optional, Tuple
3940

@@ -143,6 +144,24 @@ def is_e2e_true(span: Dict[str, Any]) -> bool:
143144
return str(v).strip().lower() == "true"
144145

145146

147+
def api_path_from_candidate(value: Any) -> Optional[str]:
148+
if not isinstance(value, str):
149+
return None
150+
raw = value.strip()
151+
if not raw:
152+
return None
153+
154+
if raw.startswith("http://") or raw.startswith("https://"):
155+
parsed = urllib.parse.urlparse(raw)
156+
path = parsed.path or ""
157+
else:
158+
path = raw.split("?", 1)[0]
159+
160+
if not path.startswith("/v"):
161+
return None
162+
return path
163+
164+
146165
def extract_route(span: Dict[str, Any], *, sanitize_routes: bool = False) -> Optional[str]:
147166
"""
148167
Prefer templated HTTP route:
@@ -160,17 +179,28 @@ def extract_route(span: Dict[str, Any], *, sanitize_routes: bool = False) -> Opt
160179
value = route.strip()
161180
return sanitize_api_path(value) if sanitize_routes else value
162181

182+
for candidate in (
183+
http.get("target"),
184+
http.get("path"),
185+
(http.get("url_details") or {}).get("path") if isinstance(http.get("url_details"), dict) else None,
186+
http.get("url"),
187+
):
188+
path = api_path_from_candidate(candidate)
189+
if path:
190+
return sanitize_api_path(path) if sanitize_routes else path
191+
163192
resource_name = attrs.get("resource_name")
164193
if isinstance(resource_name, str):
165194
rn = resource_name.strip()
166195
# Often "METHOD /path"
167196
parts = rn.split(None, 1)
168197
if len(parts) == 2 and parts[1].startswith("/"):
169-
value = parts[1].strip()
198+
value = parts[1].strip().split("?", 1)[0]
170199
return sanitize_api_path(value) if sanitize_routes else value
171200
# Sometimes just "/path"
172201
if rn.startswith("/"):
173-
return sanitize_api_path(rn) if sanitize_routes else rn
202+
value = rn.split("?", 1)[0]
203+
return sanitize_api_path(value) if sanitize_routes else value
174204

175205
return None
176206

@@ -320,15 +350,19 @@ def main() -> int:
320350

321351
kept = 0
322352
skipped_e2e = 0
353+
skipped_e2e_routes: Dict[str, int] = {}
323354
skipped_missing_route = 0
324355
skipped_missing_ts = 0
325356

326357
for span in spans:
358+
route = extract_route(span, sanitize_routes=args.sanitize_routes)
359+
327360
if skip_e2e and is_e2e_true(span):
328361
skipped_e2e += 1
362+
if route:
363+
skipped_e2e_routes[route] = skipped_e2e_routes.get(route, 0) + 1
329364
continue
330365

331-
route = extract_route(span, sanitize_routes=args.sanitize_routes)
332366
if not route:
333367
skipped_missing_route += 1
334368
continue
@@ -355,10 +389,16 @@ def main() -> int:
355389
w.writerow([route, cnt, fmt_ts(tmin), fmt_ts(tmax)])
356390

357391
if args.verbose:
392+
eprint(f"[ddog] query: {query}")
393+
eprint(f"[ddog] from/to: {args.time_from} -> {args.time_to}")
358394
eprint(f"[ddog] spans fetched: {len(spans)}")
359395
eprint(f"[ddog] spans kept: {kept}")
360396
if skip_e2e:
361397
eprint(f"[ddog] e2e skipped: {skipped_e2e}")
398+
if skipped_e2e_routes:
399+
eprint("[ddog] top skipped e2e routes:")
400+
for route, cnt in sorted(skipped_e2e_routes.items(), key=lambda x: (-x[1], x[0]))[:25]:
401+
eprint(f"[ddog] {cnt:5d} {route}")
362402
eprint(f"[ddog] no-route: {skipped_missing_route}")
363403
eprint(f"[ddog] no-ts: {skipped_missing_ts}")
364404
eprint(f"[ddog] routes: {len(stats)}")

utils/otel_dd/check_spans_in_ddog.sh

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,12 @@
22
set -euo pipefail
33

44
# Example:
5-
# ./utils/otel_dd/check_spans_in_ddog.sh --env prod --skip-e2e \
6-
# | jq -r '.data[].attributes.custom.http.route' | sort | uniq
5+
# ./utils/otel_dd/check_spans_in_ddog.sh --env prod --skip-e2e | jq -r '.data[].attributes.custom.http.route' | sort | uniq
76

87
usage() {
98
cat <<'EOF' >&2
109
Usage:
11-
check_spans_in_ddog.sh [--env <dev|prod|...>] [--stage <dev|prod|...>] [--skip-e2e|--no-skip-e2e]
10+
check_spans_in_ddog.sh [--env <dev|prod|...>] [--stage <dev|prod|...>] [--from <time>] [--to <time>] [--skip-e2e|--no-skip-e2e]
1211
1312
Env vars:
1413
DD_SITE, DD_API_KEY, DD_APP_KEY (required)
@@ -24,6 +23,8 @@ EOF
2423
SKIP_E2E=0
2524
DD_ENV="${DD_ENV:-${ENV:-${STAGE:-dev}}}"
2625
DD_SERVICE="${DD_SERVICE:-easycla-backend}"
26+
TIME_FROM="now-60m"
27+
TIME_TO="now"
2728

2829
while [[ $# -gt 0 ]]; do
2930
case "$1" in
@@ -34,6 +35,16 @@ while [[ $# -gt 0 ]]; do
3435
DD_ENV="$2"
3536
shift 2
3637
;;
38+
--from)
39+
[[ $# -ge 2 ]] || { echo "ERROR: --from requires a value" >&2; usage; exit 2; }
40+
TIME_FROM="$2"
41+
shift 2
42+
;;
43+
--to)
44+
[[ $# -ge 2 ]] || { echo "ERROR: --to requires a value" >&2; usage; exit 2; }
45+
TIME_TO="$2"
46+
shift 2
47+
;;
3748
--service)
3849
[[ $# -ge 2 ]] || { echo "ERROR: --service requires a value" >&2; usage; exit 2; }
3950
DD_SERVICE="$2"
@@ -51,11 +62,11 @@ done
5162
QUERY="service:${DD_SERVICE} env:${DD_ENV}"
5263

5364
# Build request JSON safely with jq (avoids quoting bugs)
54-
payload="$(jq -n --arg query "$QUERY" '{
65+
payload="$(TIME_FROM="$TIME_FROM" TIME_TO="$TIME_TO" jq -n --arg query "$QUERY" '{
5566
data: {
5667
type: "search_request",
5768
attributes: {
58-
filter: { from: "now-60m", to: "now", query: $query },
69+
filter: { from: env.TIME_FROM, to: env.TIME_TO, query: $query },
5970
sort: "timestamp",
6071
page: { limit: 5000 }
6172
}

0 commit comments

Comments
 (0)