Skip to content

Commit b1df2e2

Browse files
committed
feat(metrics provider): add Dynatrace DQL
Closes #1879 Signed-off-by: Mara Furland <mara@fur.land>
1 parent d4cc9bf commit b1df2e2

6 files changed

Lines changed: 1059 additions & 1 deletion

File tree

artifacts/flagger/crd.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1352,6 +1352,7 @@ spec:
13521352
- newrelic
13531353
- graphite
13541354
- dynatrace
1355+
- dynatraceDQL
13551356
- keptn
13561357
- splunk
13571358
address:

docs/gitbook/usage/metrics.md

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -621,7 +621,62 @@ spec:
621621
|> yield(name: "count")
622622
```
623623
624-
## Dynatrace
624+
## Dynatrace DQL
625+
626+
You can create custom metric checks using the Dynatrace DQL provider.
627+
628+
Create a secret with your Dynatrace token:
629+
630+
```yaml
631+
apiVersion: v1
632+
kind: Secret
633+
metadata:
634+
name: dynatrace
635+
namespace: istio-system
636+
data:
637+
dynatrace_token: ZHQwYz...
638+
```
639+
640+
Dynatrace metric template example:
641+
642+
```yaml
643+
apiVersion: flagger.app/v1beta1
644+
kind: MetricTemplate
645+
metadata:
646+
name: response-time-95pct
647+
namespace: istio-system
648+
spec:
649+
provider:
650+
type: dynatraceDQL
651+
address: https://xxxxxxxx.live.dynatrace.com
652+
secretRef:
653+
name: dynatrace
654+
query: |
655+
timeseries reqs = sum(dt.service.request.count),
656+
filter: {
657+
matchesValue(k8s.namespace.name, "{{ namespace }}")
658+
AND matchesValue(k8s.workload.name, "{{ target }}")
659+
}
660+
| fields r = arraySum(reqs)
661+
| fields r = coalesce(r, 0)
662+
663+
```
664+
665+
Reference the template in the canary analysis:
666+
667+
```yaml
668+
analysis:
669+
metrics:
670+
- name: "response-time-95pct"
671+
templateRef:
672+
name: response-time-95pct
673+
namespace: istio-system
674+
thresholdRange:
675+
max: 1000
676+
interval: 1m
677+
```
678+
679+
## Dynatrace Metrics API
625680
626681
You can create custom metric checks using the Dynatrace provider.
627682
Lines changed: 288 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,288 @@
1+
/*
2+
Copyright 2020 The Flux authors
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package providers
18+
19+
import (
20+
"bytes"
21+
"context"
22+
"encoding/json"
23+
"fmt"
24+
"io"
25+
"net/http"
26+
"strings"
27+
"time"
28+
29+
flaggerv1 "github.com/fluxcd/flagger/pkg/apis/flagger/v1beta1"
30+
)
31+
32+
// https://developer.dynatrace.com/develop/platform-services/services/grail-service/
33+
const (
34+
dynatraceDQLAPIPath = "/platform/storage/query/v1"
35+
36+
dynatraceDQLAPITokenSecretKey = "dynatrace_token"
37+
dynatraceDQLAuthorizationHeaderKey = "Authorization"
38+
dynatraceDQLAuthorizationHeaderType = "Bearer"
39+
40+
//dynatraceDeltaMultiplierOnMetricInterval = 10
41+
)
42+
43+
// DynatraceDQLProvider executes dynatrace queries
44+
type DynatraceDQLProvider struct {
45+
apiRoot string
46+
47+
timeout time.Duration
48+
token string
49+
fromDelta time.Duration
50+
}
51+
52+
// NewDynatraceDQLProvider takes a canary spec, a provider spec and the credentials map, and
53+
// returns a Dynatrace client ready to execute queries against the API
54+
func NewDynatraceDQLProvider(metricInterval string,
55+
provider flaggerv1.MetricTemplateProvider,
56+
credentials map[string][]byte) (*DynatraceDQLProvider, error) {
57+
58+
address := provider.Address
59+
if address == "" {
60+
return nil, fmt.Errorf("dynatrace endpoint is not set")
61+
} else if strings.HasSuffix(address, "/") {
62+
address = address[:len(address)-1]
63+
}
64+
65+
dt := DynatraceDQLProvider{
66+
timeout: 5 * time.Second,
67+
apiRoot: address + dynatraceDQLAPIPath,
68+
}
69+
70+
if b, ok := credentials[dynatraceDQLAPITokenSecretKey]; ok {
71+
dt.token = string(b)
72+
} else {
73+
return nil, fmt.Errorf("dynatrace credentials does not contain dynatrace_token")
74+
}
75+
76+
md, err := time.ParseDuration(metricInterval)
77+
if err != nil {
78+
return nil, fmt.Errorf("error parsing metric interval: %w", err)
79+
}
80+
if md > 0 {
81+
dt.fromDelta = md * -1
82+
} else {
83+
dt.fromDelta = md
84+
}
85+
86+
return &dt, nil
87+
}
88+
89+
func (p *DynatraceDQLProvider) _queryPoll(requestToken string) (*QueryPollResponse, []byte, error) {
90+
url := p.apiRoot + "/query:poll"
91+
92+
req, err := http.NewRequest("GET", url, nil)
93+
if err != nil {
94+
return nil, nil, fmt.Errorf("error http.NewRequest: %w", err)
95+
}
96+
97+
q := req.URL.Query()
98+
q.Add("request-token", requestToken)
99+
req.URL.RawQuery = q.Encode()
100+
101+
b, err := p._doRequest(req)
102+
if err != nil {
103+
return nil, nil, err
104+
}
105+
106+
var res QueryPollResponse
107+
err = json.Unmarshal(b, &res)
108+
if err != nil {
109+
return nil, b, fmt.Errorf("error unmarshaling result: %w, '%s'", err, string(b))
110+
}
111+
112+
return &res, b, nil
113+
}
114+
func (p *DynatraceDQLProvider) _queryExecute(query ExecuteRequest) (*QueryStartResponse, []byte, error) {
115+
url := p.apiRoot + "/query:execute"
116+
117+
marshalled, err := json.Marshal(query)
118+
if err != nil {
119+
return nil, nil, fmt.Errorf("failed to marshal request json: %w", err)
120+
}
121+
122+
b, err := p._doJSONPost(marshalled, url)
123+
if err != nil {
124+
return nil, nil, err
125+
}
126+
127+
var res QueryStartResponse
128+
err = json.Unmarshal(b, &res)
129+
if err != nil {
130+
return nil, b, fmt.Errorf("error unmarshaling result: %w, '%s'", err, string(b))
131+
}
132+
133+
return &res, b, nil
134+
}
135+
func (p *DynatraceDQLProvider) _queryVerify(query VerifyRequest) (*VerifyResponse, []byte, error) {
136+
url := p.apiRoot + "/query:verify"
137+
138+
marshalled, err := json.Marshal(query)
139+
if err != nil {
140+
return nil, nil, fmt.Errorf("failed to marshal request json: %w", err)
141+
}
142+
143+
b, err := p._doJSONPost(marshalled, url)
144+
if err != nil {
145+
return nil, nil, err
146+
}
147+
148+
var res VerifyResponse
149+
err = json.Unmarshal(b, &res)
150+
if err != nil {
151+
return nil, b, fmt.Errorf("error unmarshaling result: %w, '%s'", err, string(b))
152+
}
153+
154+
return &res, b, nil
155+
}
156+
func (p *DynatraceDQLProvider) _doJSONPost(body []byte, url string) ([]byte, error) {
157+
req, err := http.NewRequest("POST", url, bytes.NewReader(body))
158+
if err != nil {
159+
return nil, fmt.Errorf("error http.NewRequest: %w", err)
160+
}
161+
162+
req.Header.Set("Content-Type", "application/json")
163+
164+
return p._doRequest(req)
165+
}
166+
167+
func (p *DynatraceDQLProvider) _doRequest(req *http.Request) ([]byte, error) {
168+
req.Header.Set(dynatraceDQLAuthorizationHeaderKey, fmt.Sprintf("%s %s", dynatraceDQLAuthorizationHeaderType, p.token))
169+
req.Header.Set("Accept", "application/json")
170+
171+
ctx, cancel := context.WithTimeout(req.Context(), p.timeout)
172+
defer cancel()
173+
r, err := http.DefaultClient.Do(req.WithContext(ctx))
174+
if err != nil {
175+
return nil, fmt.Errorf("request failed: %w", err)
176+
}
177+
178+
defer r.Body.Close()
179+
b, err := io.ReadAll(r.Body)
180+
if err != nil {
181+
return nil, fmt.Errorf("error reading body: %w", err)
182+
}
183+
184+
if r.StatusCode < 200 || 300 <= r.StatusCode {
185+
return nil, fmt.Errorf("bad status code %d: body: %s: %w", r.StatusCode, string(b), err)
186+
}
187+
return b, nil
188+
}
189+
190+
// RunQuery executes the dynatrace query against DynatraceDQLProvider.dynatraceDQLAPIPath/query:execute and query:poll
191+
// and returns the the first result as float64
192+
func (p *DynatraceDQLProvider) RunQuery(query string) (float64, error) {
193+
// First call query:execute to start the query
194+
// Then call query:poll till it returns a result
195+
// guaranteed to be under a minute
196+
197+
now := time.Now()
198+
start := now.Add(p.fromDelta)
199+
200+
tz := "UTC"
201+
nowStr := now.Format(time.RFC3339)
202+
fromStr := start.Format(time.RFC3339)
203+
executeRequest := ExecuteRequest{
204+
Query: query,
205+
Timezone: &tz,
206+
DefaultTimeframeEnd: &nowStr,
207+
DefaultTimeframeStart: &fromStr,
208+
}
209+
210+
res, b, err := p._queryExecute(executeRequest)
211+
if err != nil {
212+
return 0, fmt.Errorf("error posting query:execute: %w", err)
213+
}
214+
215+
var result *QueryResult
216+
switch res.State {
217+
case CANCELLED:
218+
fallthrough
219+
case RESULTGONE:
220+
fallthrough
221+
case FAILED:
222+
return 0, fmt.Errorf("query:execute failed, in state: %s: %s", res.State, string(b))
223+
case NOTSTARTED:
224+
fallthrough
225+
case RUNNING:
226+
for result == nil {
227+
pollRes, pollB, err := p._queryPoll(*res.RequestToken)
228+
if err != nil {
229+
return 0, fmt.Errorf("error getting query:poll: %w", err)
230+
} else {
231+
switch pollRes.State {
232+
case CANCELLED:
233+
fallthrough
234+
case RESULTGONE:
235+
fallthrough
236+
case FAILED:
237+
return 0, fmt.Errorf("query:poll failed, in state: %s: %s", pollRes.State, string(b))
238+
case SUCCEEDED:
239+
result = pollRes.Result
240+
b = pollB
241+
case NOTSTARTED:
242+
fallthrough
243+
case RUNNING:
244+
time.Sleep(100 * time.Millisecond)
245+
}
246+
}
247+
}
248+
case SUCCEEDED:
249+
result = res.Result
250+
}
251+
252+
if len(result.Records) < 1 {
253+
return 0, fmt.Errorf("invalid response: no results: %s: %w", string(b), ErrNoValuesFound)
254+
}
255+
256+
record := result.Records[len(result.Records)-1]
257+
val, ok := (*record)["r"]
258+
if !ok {
259+
return 0, fmt.Errorf("invalid response data doesn't contain 'r' property: %s: %w", string(b), ErrNoValuesFound)
260+
}
261+
262+
var ret float64
263+
err = json.Unmarshal(*val, &ret)
264+
if err != nil {
265+
return 0, fmt.Errorf("error unmarshaling final data value into float64: %w, '%s'", err, string(*val))
266+
}
267+
268+
return ret, nil
269+
}
270+
271+
// IsOnline calls DynatraceDQLProvider.dynatraceDQLAPIPath/query:verify with
272+
// token and returns an error if the endpoint fails
273+
func (p *DynatraceDQLProvider) IsOnline() (bool, error) {
274+
query := VerifyRequest{
275+
Query: `timeseries{cpu=avg(dt.host.cpu.usage),filter:matchesValue(dt.smartscape.host,"HOST-001109335619D5DD")},from:now()-5m|fields r=arraySum(cpu)`,
276+
}
277+
278+
res, b, err := p._queryVerify(query)
279+
if err != nil {
280+
return false, fmt.Errorf("error posting query:verify: %w", err)
281+
}
282+
283+
if !res.Valid {
284+
return false, fmt.Errorf("query:verify says our valid query is invalid: %s", string(b))
285+
}
286+
287+
return true, nil
288+
}

0 commit comments

Comments
 (0)