Skip to content

Commit 5548dfd

Browse files
committed
open telemetry PoC - client/server and explict token latency
Small issues and also collect stats from /metrics router remove examples + add allTokenLatency metric Fix description of histograms pipeline latency histogram before token measurments
1 parent 0a3e006 commit 5548dfd

3 files changed

Lines changed: 183 additions & 44 deletions

File tree

microservices-connector/cmd/router/main.go

Lines changed: 115 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,18 @@ import (
3636

3737
mcv1alpha3 "github.com/opea-project/GenAIInfra/microservices-connector/api/v1alpha3"
3838
flag "github.com/spf13/pflag"
39+
40+
// Prometheus and opentelemetry imports
41+
"github.com/prometheus/client_golang/prometheus/promhttp"
42+
43+
"go.opentelemetry.io/otel"
44+
"go.opentelemetry.io/otel/exporters/prometheus"
45+
api "go.opentelemetry.io/otel/metric"
46+
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
47+
48+
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
49+
50+
"go.opentelemetry.io/otel/metric"
3951
)
4052

4153
const (
@@ -60,8 +72,8 @@ var (
6072
TLSHandshakeTimeout: time.Minute,
6173
ExpectContinueTimeout: 30 * time.Second,
6274
}
63-
callClient = &http.Client{
64-
Transport: transport,
75+
callClient = http.Client{
76+
Transport: otelhttp.NewTransport(transport),
6577
Timeout: 30 * time.Second,
6678
}
6779
)
@@ -80,6 +92,69 @@ type ReadCloser struct {
8092
*bytes.Reader
8193
}
8294

95+
var (
96+
firstTokenLatencyMeasure metric.Float64Histogram
97+
nextTokenLatencyMeasure metric.Float64Histogram
98+
allTokenLatencyMeasure metric.Float64Histogram
99+
pipelineLatencyMeasure metric.Float64Histogram
100+
)
101+
102+
func init() {
103+
104+
// The exporter embeds a default OpenTelemetry Reader and
105+
// implements prometheus.Collector, allowing it to be used as
106+
// both a Reader and Collector.
107+
exporter, err := prometheus.New()
108+
if err != nil {
109+
log.Error(err, "metrics: cannot init prometheus collector")
110+
}
111+
provider := sdkmetric.NewMeterProvider(sdkmetric.WithReader(exporter))
112+
otel.SetMeterProvider(provider)
113+
114+
// ppalucki: Own metrics defintion bellow
115+
const meterName = "entrag-telemetry"
116+
meter := provider.Meter(meterName)
117+
118+
firstTokenLatencyMeasure, err = meter.Float64Histogram(
119+
"llm.first.token.latency",
120+
metric.WithUnit("ms"),
121+
metric.WithDescription("Measures the duration of first token generation."),
122+
api.WithExplicitBucketBoundaries(1, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16364),
123+
)
124+
if err != nil {
125+
log.Error(err, "metrics: cannot register first token histogram measure")
126+
}
127+
nextTokenLatencyMeasure, err = meter.Float64Histogram(
128+
"llm.next.token.latency",
129+
metric.WithUnit("ms"),
130+
metric.WithDescription("Measures the duration of generating all but first tokens."),
131+
api.WithExplicitBucketBoundaries(1, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16364),
132+
)
133+
if err != nil {
134+
log.Error(err, "metrics: cannot register next token histogram measure")
135+
}
136+
137+
allTokenLatencyMeasure, err = meter.Float64Histogram(
138+
"llm.all.token.latency",
139+
metric.WithUnit("ms"),
140+
metric.WithDescription("Measures the duration to generate response with all tokens."),
141+
api.WithExplicitBucketBoundaries(1, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16364),
142+
)
143+
if err != nil {
144+
log.Error(err, "metrics: cannot register all token histogram measure")
145+
}
146+
147+
pipelineLatencyMeasure, err = meter.Float64Histogram(
148+
"llm.pipeline.latency",
149+
metric.WithUnit("ms"),
150+
metric.WithDescription("Measures the duration to going through pipeline steps until first token is being generated (including read data time from client)."),
151+
api.WithExplicitBucketBoundaries(1, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16364),
152+
)
153+
if err != nil {
154+
log.Error(err, "metrics: cannot register pipeline histogram measure")
155+
}
156+
}
157+
83158
func (ReadCloser) Close() error {
84159
// Typically, you would release resources here, but for bytes.Reader, there's nothing to do.
85160
return nil
@@ -536,6 +611,7 @@ func mcGraphHandler(w http.ResponseWriter, req *http.Request) {
536611
go func() {
537612
defer close(done)
538613

614+
allTokensStartTime := time.Now()
539615
inputBytes, err := io.ReadAll(req.Body)
540616
if err != nil {
541617
log.Error(err, "failed to read request body")
@@ -544,6 +620,9 @@ func mcGraphHandler(w http.ResponseWriter, req *http.Request) {
544620
}
545621

546622
responseBody, statusCode, err := routeStep(defaultNodeName, *mcGraph, inputBytes, inputBytes, req.Header)
623+
624+
pipelineLatencyMeasure.Record(ctx, float64(time.Since(allTokensStartTime))/float64(time.Millisecond))
625+
547626
if err != nil {
548627
log.Error(err, "failed to process request")
549628
w.Header().Set("Content-Type", "application/json")
@@ -561,9 +640,22 @@ func mcGraphHandler(w http.ResponseWriter, req *http.Request) {
561640
}()
562641

563642
w.Header().Set("Content-Type", "application/json")
643+
firstTokenCollected := false
564644
buffer := make([]byte, BufferSize)
565645
for {
646+
647+
// measure time of reading another portion of response
648+
tokenStartTime := time.Now()
566649
n, err := responseBody.Read(buffer)
650+
elapsedTimeMilisecond := float64(time.Since(tokenStartTime)) / float64(time.Millisecond)
651+
652+
if !firstTokenCollected {
653+
firstTokenCollected = true
654+
firstTokenLatencyMeasure.Record(ctx, elapsedTimeMilisecond)
655+
} else {
656+
nextTokenLatencyMeasure.Record(ctx, elapsedTimeMilisecond)
657+
}
658+
567659
if err != nil && err != io.EOF {
568660
log.Error(err, "failed to read from response body")
569661
http.Error(w, "failed to read from response body", http.StatusInternalServerError)
@@ -586,6 +678,10 @@ func mcGraphHandler(w http.ResponseWriter, req *http.Request) {
586678
return
587679
}
588680
}
681+
682+
allTokensElapsedTimeMilisecond := float64(time.Since(allTokensStartTime)) / float64(time.Millisecond)
683+
allTokenLatencyMeasure.Record(ctx, allTokensElapsedTimeMilisecond)
684+
589685
}()
590686

591687
select {
@@ -729,8 +825,23 @@ func handleMultipartError(writer *multipart.Writer, err error) {
729825

730826
func initializeRoutes() *http.ServeMux {
731827
mux := http.NewServeMux()
732-
mux.HandleFunc("/", mcGraphHandler)
733-
mux.HandleFunc("/dataprep", mcDataHandler)
828+
829+
// Wrap connector handlers with otelhttp wrappers
830+
// "http.server.request.size" - Int64Counter - "Measures the size of HTTP request messages" (Incoming request bytes total)
831+
// "http.server.response.size" - Int64Counter - "Measures the size of HTTP response messages" (Incoming response bytes total)
832+
// "http.server.duration" - Float64histogram "Measures the duration of inbound HTTP requests." (Incoming end to end duration, milliseconds)
833+
handleFunc := func(pattern string, handlerFunc func(http.ResponseWriter, *http.Request), operation string) {
834+
handler := otelhttp.NewHandler(otelhttp.WithRouteTag(pattern, http.HandlerFunc(handlerFunc)), operation)
835+
mux.Handle(pattern, handler)
836+
}
837+
838+
handleFunc("/", mcGraphHandler, "mcGraphHandler")
839+
handleFunc("/dataprep", mcDataHandler, "mcDataHandler")
840+
841+
promHandler := promhttp.Handler()
842+
handleFunc("/metrics", promHandler.ServeHTTP, "metrics")
843+
log.Info("Metrics exposed on /metrics.")
844+
734845
return mux
735846
}
736847

microservices-connector/go.mod

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,15 @@ require (
66
github.com/onsi/ginkgo/v2 v2.14.0
77
github.com/onsi/gomega v1.30.0
88
github.com/pkg/errors v0.9.1
9+
github.com/prometheus/client_golang v1.19.1
910
github.com/spf13/pflag v1.0.5
10-
github.com/stretchr/testify v1.8.4
11+
github.com/stretchr/testify v1.9.0
1112
github.com/tidwall/gjson v1.17.1
13+
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0
14+
go.opentelemetry.io/otel v1.28.0
15+
go.opentelemetry.io/otel/exporters/prometheus v0.50.0
16+
go.opentelemetry.io/otel/metric v1.28.0
17+
go.opentelemetry.io/otel/sdk/metric v1.28.0
1218
k8s.io/api v0.29.2
1319
k8s.io/apimachinery v0.29.2
1420
k8s.io/client-go v0.29.2
@@ -18,13 +24,15 @@ require (
1824

1925
require (
2026
github.com/beorn7/perks v1.0.1 // indirect
21-
github.com/cespare/xxhash/v2 v2.2.0 // indirect
27+
github.com/cespare/xxhash/v2 v2.3.0 // indirect
2228
github.com/davecgh/go-spew v1.1.1 // indirect
2329
github.com/emicklei/go-restful/v3 v3.11.0 // indirect
2430
github.com/evanphx/json-patch v4.12.0+incompatible // indirect
2531
github.com/evanphx/json-patch/v5 v5.9.0 // indirect
32+
github.com/felixge/httpsnoop v1.0.4 // indirect
2633
github.com/fsnotify/fsnotify v1.7.0 // indirect
27-
github.com/go-logr/logr v1.4.1 // indirect
34+
github.com/go-logr/logr v1.4.2 // indirect
35+
github.com/go-logr/stdr v1.2.2 // indirect
2836
github.com/go-logr/zapr v1.3.0 // indirect
2937
github.com/go-openapi/jsonpointer v0.19.6 // indirect
3038
github.com/go-openapi/jsonreference v0.20.2 // indirect
@@ -46,24 +54,25 @@ require (
4654
github.com/modern-go/reflect2 v1.0.2 // indirect
4755
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
4856
github.com/pmezard/go-difflib v1.0.0 // indirect
49-
github.com/prometheus/client_golang v1.19.1 // indirect
50-
github.com/prometheus/client_model v0.6.0 // indirect
51-
github.com/prometheus/common v0.53.0 // indirect
52-
github.com/prometheus/procfs v0.12.0 // indirect
57+
github.com/prometheus/client_model v0.6.1 // indirect
58+
github.com/prometheus/common v0.55.0 // indirect
59+
github.com/prometheus/procfs v0.15.1 // indirect
5360
github.com/tidwall/match v1.1.1 // indirect
5461
github.com/tidwall/pretty v1.2.0 // indirect
62+
go.opentelemetry.io/otel/sdk v1.28.0 // indirect
63+
go.opentelemetry.io/otel/trace v1.28.0 // indirect
5564
go.uber.org/multierr v1.11.0 // indirect
5665
go.uber.org/zap v1.27.0 // indirect
5766
golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e // indirect
58-
golang.org/x/net v0.25.0 // indirect
59-
golang.org/x/oauth2 v0.20.0 // indirect
60-
golang.org/x/sys v0.20.0 // indirect
61-
golang.org/x/term v0.20.0 // indirect
62-
golang.org/x/text v0.15.0 // indirect
67+
golang.org/x/net v0.26.0 // indirect
68+
golang.org/x/oauth2 v0.21.0 // indirect
69+
golang.org/x/sys v0.21.0 // indirect
70+
golang.org/x/term v0.21.0 // indirect
71+
golang.org/x/text v0.16.0 // indirect
6372
golang.org/x/time v0.5.0 // indirect
64-
golang.org/x/tools v0.21.0 // indirect
73+
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect
6574
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
66-
google.golang.org/protobuf v1.34.1 // indirect
75+
google.golang.org/protobuf v1.34.2 // indirect
6776
gopkg.in/inf.v0 v0.9.1 // indirect
6877
gopkg.in/yaml.v2 v2.4.0 // indirect
6978
gopkg.in/yaml.v3 v3.0.1 // indirect

0 commit comments

Comments
 (0)