Skip to content

Commit 3281cdd

Browse files
authored
Add cli for checking CI test flakiness (#30)
1 parent 8019ba6 commit 3281cdd

11 files changed

Lines changed: 845 additions & 10 deletions

File tree

.github/workflows/go.yml

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,9 @@ jobs:
2929
- name: Compress binaries
3030
uses: svenstaro/upx-action@v2
3131
with:
32-
file: junit2jira
32+
files: |
33+
junit2jira
34+
flakechecker
3335
3436
- name: Test
3537
run: go test -v ./...
@@ -38,7 +40,9 @@ jobs:
3840
uses: actions/upload-artifact@v3
3941
with:
4042
name: junit2jira
41-
path: junit2jira
43+
path: |
44+
junit2jira
45+
flakechecker
4246
4347
release:
4448
if: startsWith(github.ref, 'refs/tags/')
@@ -55,3 +59,4 @@ jobs:
5559
with:
5660
files: |
5761
junit2jira
62+
flakechecker

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
/flakechecker
12
/junit2jira
23
.idea
34
# Binaries for programs and plugins

README.md

Lines changed: 54 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# junit2jira
22

3-
Convert test failures to jira issues
3+
Utility tools for handling test failures
44

55
### Build
66
```shell
@@ -14,6 +14,16 @@ go test ./...
1414

1515
### Usage
1616

17+
This repo provides two cli tools:
18+
- junit2jira
19+
- flakechecker
20+
21+
### junit2jira
22+
23+
`junit2jira` supports conversion of test failures to jira issues. It also posts Slack messages for new failures and imports test results into DB.
24+
25+
*Usage*
26+
1727
```shell
1828
Usage of junit2jira:
1929
-base-link string
@@ -51,7 +61,7 @@ Usage of junit2jira:
5161
print version information and exit
5262
```
5363
54-
## Example usage
64+
*Example usage*
5565
```shell
5666
JIRA_TOKEN="..." junit2jira \
5767
-jira-url "https://..." \
@@ -65,3 +75,45 @@ JIRA_TOKEN="..." junit2jira \
6575
-timestamp $(date --rfc-3339=seconds)
6676
-csv-output -
6777
```
78+
79+
### flakechecker
80+
81+
`flakechecker` helps prevent unnecessary CI pipeline failures by suppressing known flaky tests that are within the allowed failure thresholds.
82+
83+
`flakechecker` relies on several components:
84+
- collected test results from `junit2jira`: we generate a table of flaky tests, including their failure ratios for the last 30 executions.
85+
- flaky test configuration: we define and provide a `flakechecker` configuration with allowed failure ratio thresholds for known flaky tests.
86+
- CI pipeline integration script: `flakechecker` is executed as the last step in a CI pipeline, and provided results allow the CI pipeline script to report success or failure.
87+
88+
The `flakechecker` expects at least one failed test. It will return an error if it is executed on test results without any failures.
89+
90+
`flakechecker` decision making:
91+
- it checks if a failed test in a CI pipeline is listed as flaky in the provided configuration.
92+
- if the test is not found in the flaky tests config -> it will cause the CI pipeline to fail. (test not found)
93+
- if the test is found in the configuration, `flakechecker` will fetch information about the fail ratio for that test from the database. If we have fewer than 30 executions for that test -> it will cause the CI pipeline to fail. (insufficient historical test results)
94+
- if the test's failure ratio in the database exceeds the threshold defined in the config -> it will cause the CI pipeline to fail. (flake ratio is above the allowed threshold)
95+
- if a flaky test's failure ratio is below the defined threshold -> it will report the test as a success in the CI pipeline. (test suppression)
96+
97+
The `flakechecker` will apply this logic for each failed test in the CI pipeline.
98+
99+
*Usage*
100+
101+
```
102+
Usage of flakechecker:
103+
-config-file string
104+
Config file with allowed flakes.
105+
-debug
106+
Enable debug log level.
107+
-job-name string
108+
Name of CI job.
109+
-junit-reports-dir string
110+
Directory containing JUnit report XML files.
111+
-v short alias for -version
112+
-version
113+
print version information and exit
114+
```
115+
116+
*Example usage*
117+
```
118+
flakechecker --config-file flake-config.yml --job-name "${JOB_NAME}" -junit-reports-dir "${ARTIFACT_DIR}"
119+
```

cmd/flakechecker/bq_client.go

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
package main
2+
3+
import (
4+
"cloud.google.com/go/bigquery"
5+
"context"
6+
"github.com/pkg/errors"
7+
log "github.com/sirupsen/logrus"
8+
"time"
9+
)
10+
11+
const projectID = "acs-san-stackroxci"
12+
const queryTimeout = 1 * time.Minute
13+
const queryStrGetFailureRatio = `
14+
SELECT
15+
TotalAll,
16+
FailRatio
17+
FROM
18+
` + "`acs-san-stackroxci.ci_metrics.stackrox_tests__recent_flaky_tests`" + `
19+
WHERE
20+
JobName = @jobName
21+
AND Classname = @className
22+
AND Name = @testName
23+
`
24+
25+
type recentFlakyTestInfo struct {
26+
TotalAll int
27+
FailRatio int
28+
}
29+
30+
type biqQueryClient interface {
31+
GetRatioForTest(config flakeDetectionPolicyConfig, testName string) (int, int, error)
32+
}
33+
34+
type bigQueryClient struct {
35+
client *bigquery.Client
36+
}
37+
38+
func getNewBigQueryClient() (biqQueryClient, error) {
39+
ctx := context.Background()
40+
41+
client, err := bigquery.NewClient(ctx, projectID)
42+
if err != nil {
43+
return nil, errors.Wrap(err, "creating BigQuery client")
44+
}
45+
46+
return &bigQueryClient{client: client}, nil
47+
}
48+
49+
func (c *bigQueryClient) GetRatioForTest(config flakeDetectionPolicyConfig, testName string) (int, int, error) {
50+
query := c.client.Query(queryStrGetFailureRatio)
51+
query.Parameters = []bigquery.QueryParameter{
52+
{Name: "jobName", Value: config.RatioJobName},
53+
{Name: "className", Value: config.ClassName},
54+
{Name: "testName", Value: testName},
55+
}
56+
57+
ctx, cancelBigQueryRequest := context.WithTimeout(context.Background(), queryTimeout)
58+
defer cancelBigQueryRequest()
59+
60+
resIter, err := query.Read(ctx)
61+
if err != nil {
62+
return 0, 0, errors.Wrap(err, "query data from BigQuery")
63+
}
64+
65+
// We need only first flakyTestInfo. No need to loop over iterator.
66+
var flakyTestInfo recentFlakyTestInfo
67+
if errNext := resIter.Next(&flakyTestInfo); errNext != nil {
68+
return 0, 0, errors.Wrapf(errNext, "read BigQuery result for flaky test for query params: %v - query: %s", query.Parameters, queryStrGetFailureRatio)
69+
}
70+
71+
if resIter.TotalRows > 1 {
72+
log.Warnf("Expected to find one row in DB, but got more for query params: %v - query: %s", query.Parameters, queryStrGetFailureRatio)
73+
}
74+
75+
return flakyTestInfo.TotalAll, flakyTestInfo.FailRatio, nil
76+
}

cmd/flakechecker/flake_config.go

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
package main
2+
3+
import (
4+
"fmt"
5+
"github.com/pkg/errors"
6+
"gopkg.in/yaml.v3"
7+
"io"
8+
"os"
9+
"regexp"
10+
)
11+
12+
// flakeDetectionPolicyConfig represents configuration used by flakechecker to evaluate failed tests.
13+
type flakeDetectionPolicyConfig struct {
14+
// JobNameRegex is a regular expression for the name of the CI job that should be evaluated by flakechecker.
15+
// (i.e. CI jobs for PRs should be evaluated, but not CI jobs for commits already merged to "main" branch)
16+
JobNameRegex string `yaml:"jobNameRegex"`
17+
// ClassName is class name of the test that should be isolated. Usually class name for Groovy tests,
18+
// package name for golang tests, etc.
19+
ClassName string `yaml:"className"`
20+
// TestNameRegex is a regular expression used to match test names. Some test names contain detailed information
21+
// (i.e. version 4.4.4), but we want to use ratio for all tests in that group (i.e. 4.4.z).
22+
// Using a regex allow us to group tests as needed.
23+
TestNameRegex string `yaml:"testNameRegex"`
24+
// TestNameRegex is CI job name that should be used for ratio calculation.
25+
// i.e. we take CI runs for commits on "main" branch as input for evaluation of flake ratio.
26+
RatioJobName string `yaml:"ratioJobName"`
27+
// RatioThreshold is the maximum failure percentage that is used to distinguish a flaky test from
28+
// a completely broken test. This information is usually fetched from historical executions and data
29+
// collected in DB. If measured flakiness exceeds this threshold, we no longer want to suppress test failure,
30+
// because we suspect it might have regressed above what we consider acceptable.
31+
RatioThreshold int `yaml:"ratioThreshold"`
32+
}
33+
34+
type flakeDetectionPolicy struct {
35+
config flakeDetectionPolicyConfig
36+
compiledJobNameRegex *regexp.Regexp
37+
compiledTestNameRegex *regexp.Regexp
38+
}
39+
40+
func newFlakeDetectionPolicy(config flakeDetectionPolicyConfig) (*flakeDetectionPolicy, error) {
41+
compiledJobNameRegex, err := regexp.Compile(fmt.Sprintf("^%s$", config.JobNameRegex))
42+
if err != nil {
43+
return nil, errors.Wrap(err, fmt.Sprintf("invalid flake config match job regex: %v", config.JobNameRegex))
44+
}
45+
46+
compiledTestNameRegex, err := regexp.Compile(fmt.Sprintf("^%s$", config.TestNameRegex))
47+
if err != nil {
48+
return nil, errors.Wrap(err, fmt.Sprintf("invalid flake config test name regex: %v", config.TestNameRegex))
49+
}
50+
51+
return &flakeDetectionPolicy{
52+
config: config,
53+
compiledJobNameRegex: compiledJobNameRegex,
54+
compiledTestNameRegex: compiledTestNameRegex,
55+
}, nil
56+
}
57+
58+
func (r *flakeDetectionPolicy) matchJobName(jobName string) bool {
59+
return r.compiledJobNameRegex.MatchString(jobName)
60+
}
61+
62+
func (r *flakeDetectionPolicy) matchClassName(classname string) bool {
63+
return classname == r.config.ClassName
64+
}
65+
66+
func (r *flakeDetectionPolicy) matchTestName(testName string) bool {
67+
return r.compiledTestNameRegex.MatchString(testName)
68+
}
69+
70+
func findFlakeConfigForTest(flakeCheckerRecs []*flakeDetectionPolicy, jobName string, className string, testName string) (*flakeDetectionPolicy, error) {
71+
for _, flakeCheckerRec := range flakeCheckerRecs {
72+
if flakeCheckerRec.matchJobName(jobName) && flakeCheckerRec.matchClassName(className) && flakeCheckerRec.matchTestName(testName) {
73+
return flakeCheckerRec, nil
74+
}
75+
}
76+
77+
return nil, errors.Wrap(errors.Errorf("%q / %q / %q", jobName, className, testName), errDescNoMatch)
78+
}
79+
80+
func loadFlakeConfigFile(fileName string) ([]*flakeDetectionPolicy, error) {
81+
ymlConfigFile, err := os.Open(fileName)
82+
if err != nil {
83+
return nil, errors.Wrap(err, fmt.Sprintf("open flake config file: %s", fileName))
84+
}
85+
defer ymlConfigFile.Close()
86+
87+
ymlConfigFileData, err := io.ReadAll(ymlConfigFile)
88+
if err != nil {
89+
return nil, errors.Wrap(err, fmt.Sprintf("read flake config file: %s", fileName))
90+
}
91+
92+
flakeConfigs := make([]flakeDetectionPolicyConfig, 0)
93+
err = yaml.Unmarshal(ymlConfigFileData, &flakeConfigs)
94+
if err != nil {
95+
return nil, errors.Wrap(err, fmt.Sprintf("parse flake config file: %s", fileName))
96+
}
97+
98+
detectionPolicies := make([]*flakeDetectionPolicy, 0, len(flakeConfigs))
99+
for _, flakeConfig := range flakeConfigs {
100+
detectionPolicy, errNewPolicy := newFlakeDetectionPolicy(flakeConfig)
101+
if errNewPolicy != nil {
102+
return nil, errors.Wrap(err, fmt.Sprintf("create flake detection policy from config: %v", flakeConfig))
103+
}
104+
105+
detectionPolicies = append(detectionPolicies, detectionPolicy)
106+
}
107+
108+
return detectionPolicies, nil
109+
}
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
package main
2+
3+
import (
4+
"github.com/stretchr/testify/assert"
5+
"testing"
6+
)
7+
8+
func newFlakeDetectionPolicyMust(config flakeDetectionPolicyConfig) *flakeDetectionPolicy {
9+
policy, err := newFlakeDetectionPolicy(config)
10+
if err != nil {
11+
panic(err)
12+
}
13+
14+
return policy
15+
}
16+
17+
func TestLoadFlakeConfigFile(t *testing.T) {
18+
samples := []struct {
19+
name string
20+
fileName string
21+
22+
expectError bool
23+
expectErrorStr string
24+
expectConfig []*flakeDetectionPolicy
25+
}{
26+
{
27+
name: "no config file",
28+
fileName: "no_config.yml",
29+
expectError: true,
30+
expectErrorStr: "open flake config file: no_config.yml: open no_config.yml: no such file or directory",
31+
expectConfig: nil,
32+
},
33+
{
34+
name: "valid config file",
35+
fileName: "testdata/flake-config.yml",
36+
expectError: false,
37+
expectConfig: []*flakeDetectionPolicy{
38+
newFlakeDetectionPolicyMust(flakeDetectionPolicyConfig{
39+
JobNameRegex: "pr-.*",
40+
ClassName: "TestLoadFlakeConfigFile",
41+
TestNameRegex: "TestLoadFlakeConf.*",
42+
RatioJobName: "main-branch-tests",
43+
RatioThreshold: 5,
44+
}),
45+
newFlakeDetectionPolicyMust(flakeDetectionPolicyConfig{
46+
JobNameRegex: "pull-request-tests",
47+
ClassName: "TestLoadFlakeConfigFile",
48+
TestNameRegex: "TestLoadFlakeConfigFile",
49+
RatioJobName: "main-branch-tests",
50+
RatioThreshold: 10,
51+
}),
52+
},
53+
},
54+
}
55+
56+
for _, sample := range samples {
57+
t.Run(sample.name, func(tt *testing.T) {
58+
config, err := loadFlakeConfigFile(sample.fileName)
59+
60+
if sample.expectError {
61+
assert.EqualError(tt, err, sample.expectErrorStr)
62+
} else {
63+
assert.NoError(tt, err)
64+
}
65+
assert.Equal(tt, sample.expectConfig, config)
66+
})
67+
}
68+
}

0 commit comments

Comments
 (0)