Skip to content

Commit e65c372

Browse files
committed
pkg: add pfpstatus package
the pfpstatus package is an extended version of the package originally added in `openshift-kni/scheduler-plugins/pkg-kni` which wants to make it easier to troubleshoot PFP desync issues. It is meant to be consumed by any program which produces or consumes PFPs. Signed-off-by: Francesco Romani <fromani@redhat.com>
1 parent d232724 commit e65c372

25 files changed

Lines changed: 2984 additions & 0 deletions

File tree

go.mod

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ require (
1010
github.com/google/cadvisor v0.52.1
1111
github.com/google/go-cmp v0.6.0
1212
github.com/jaypipes/ghw v0.12.0
13+
github.com/k8stopologyawareschedwg/podfingerprint v0.2.3
1314
github.com/onsi/ginkgo v1.14.0
1415
github.com/onsi/gomega v1.35.1
1516
github.com/safchain/ethtool v0.3.0
@@ -33,6 +34,7 @@ require (
3334
)
3435

3536
require (
37+
github.com/OneOfOne/xxhash v1.2.8 // indirect
3638
github.com/StackExchange/wmi v1.2.1 // indirect
3739
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
3840
github.com/emicklei/go-restful/v3 v3.11.0 // indirect

go.sum

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
github.com/OneOfOne/xxhash v1.2.8 h1:31czK/TI9sNkxIKfaUfGlU47BAxQ0ztGgd9vPyqimf8=
2+
github.com/OneOfOne/xxhash v1.2.8/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdIIOT9Um7Q=
13
github.com/StackExchange/wmi v1.2.1 h1:VIkavFPXSjcnS+O8yTq7NI32k0R5Aj+v39y29VYDOSA=
24
github.com/StackExchange/wmi v1.2.1/go.mod h1:rcmrprowKIVzvc+NUiLncP2uuArMWLCbu9SBzvHz7e8=
35
github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
@@ -73,6 +75,8 @@ github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8Hm
7375
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
7476
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
7577
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
78+
github.com/k8stopologyawareschedwg/podfingerprint v0.2.3 h1:nVKmzn6g7CHhDtB+jG85Db/B/ALRY9VLh5ueAfVPEHU=
79+
github.com/k8stopologyawareschedwg/podfingerprint v0.2.3/go.mod h1:E0bjgihZTSwRIJVjNV45Lm0C/j5w+YkGSnfpYXoI8Ws=
7680
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
7781
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
7882
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=

pkg/pfpstatus/README.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# PFPStatus
2+
3+
PFPStatus offer facilities to store and expose [podfingerprint](https://github.com/k8stopologyawareschedwg/podfingerprint) status info.
4+
5+
The status info is exposed read-only; external actors cannot alter the PFP status.
6+
The PFP status exposes the namespace/name pairs of the pods whose container have exclusive resources assigned and which are
7+
detected by the system. While the information discolosure is minimal, this may be undesirable, so caution is advised.
8+
9+
## Exposing status info
10+
11+
The status info can be exposed by
12+
- dumping the content as JSON files, one per node, in the filesystem.
13+
We recommend to use a fixed size tmpfs to avoid unbounded storage consumption.
14+
The writer dumps the status content atomically and intentionally ignores write errors because assumes fixed size storage.

pkg/pfpstatus/pfpstatus.go

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
/*
2+
* Copyright 2025 Red Hat, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package pfpstatus
18+
19+
import (
20+
"context"
21+
"fmt"
22+
"os"
23+
"sync"
24+
"time"
25+
26+
"github.com/go-logr/logr"
27+
28+
"github.com/k8stopologyawareschedwg/podfingerprint"
29+
30+
"github.com/openshift-kni/debug-tools/pkg/pfpstatus/record"
31+
)
32+
33+
const (
34+
PFPStatusDumpEnvVar string = "PFP_STATUS_DUMP"
35+
)
36+
37+
const (
38+
DefaultDumpDirectory string = "/run/pfpstatus"
39+
)
40+
41+
const (
42+
defaultMaxNodes = 5000
43+
defaultMaxSamplesPerNode = 10
44+
defaultDumpPeriod = 10 * time.Second
45+
)
46+
47+
type StorageParams struct {
48+
Enabled bool
49+
Directory string
50+
Period time.Duration
51+
}
52+
53+
type Params struct {
54+
Storage StorageParams
55+
}
56+
57+
type environ struct {
58+
mu sync.Mutex
59+
rec *record.Recorder
60+
lh logr.Logger
61+
}
62+
63+
func DefaultParams() Params {
64+
return Params{
65+
Storage: StorageParams{
66+
Enabled: false,
67+
Directory: DefaultDumpDirectory,
68+
Period: 10 * time.Second,
69+
},
70+
}
71+
}
72+
73+
func ParamsFromEnv(lh logr.Logger, params *Params) {
74+
dumpDir, ok := os.LookupEnv(PFPStatusDumpEnvVar)
75+
if !ok || dumpDir == "" {
76+
params.Storage.Enabled = false
77+
} else {
78+
params.Storage.Enabled = true
79+
params.Storage.Directory = dumpDir
80+
}
81+
82+
// let's try to keep the amount of code we do in init() at minimum.
83+
// This may happen if the container didn't have the directory mounted
84+
if !existsBaseDirectory(dumpDir) {
85+
lh.Info("base directory not found, will discard everything", "baseDirectory", dumpDir)
86+
params.Storage.Enabled = false
87+
}
88+
}
89+
90+
func Setup(logh logr.Logger, params Params) {
91+
if !params.Storage.Enabled {
92+
logh.Info("no backend enabled, nothing to do")
93+
return
94+
}
95+
96+
logh.Info("Setup in progress", "params", fmt.Sprintf("%+#v", params))
97+
98+
rec, err := record.NewRecorder(defaultMaxNodes, defaultMaxSamplesPerNode, time.Now)
99+
if err != nil {
100+
logh.Error(err, "cannot create a status recorder")
101+
return
102+
}
103+
104+
ctx := context.Background()
105+
env := environ{
106+
rec: rec,
107+
lh: logh,
108+
}
109+
110+
ch := make(chan podfingerprint.Status)
111+
podfingerprint.SetCompletionSink(ch)
112+
go collectLoop(ctx, &env, ch)
113+
if params.Storage.Enabled {
114+
go dumpLoop(ctx, &env, params.Storage)
115+
}
116+
}
117+
118+
func collectLoop(ctx context.Context, env *environ, updates <-chan podfingerprint.Status) {
119+
env.lh.V(4).Info("collect loop started")
120+
defer env.lh.V(4).Info("collect loop finished")
121+
for {
122+
select {
123+
case <-ctx.Done():
124+
return
125+
case st := <-updates:
126+
env.mu.Lock()
127+
_ = env.rec.Push(st) // intentionally ignore error
128+
env.mu.Unlock()
129+
}
130+
}
131+
}

pkg/pfpstatus/pfpstatus_file.go

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/*
2+
* Copyright 2025 Red Hat, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package pfpstatus
18+
19+
import (
20+
"context"
21+
"os"
22+
"sync"
23+
"time"
24+
25+
"github.com/openshift-kni/debug-tools/pkg/pfpstatus/record"
26+
)
27+
28+
func dumpLoop(ctx context.Context, env *environ, params StorageParams) {
29+
env.lh.V(4).Info("dump loop started")
30+
defer env.lh.V(4).Info("dump loop finished")
31+
ticker := time.NewTicker(params.Period)
32+
defer ticker.Stop()
33+
for {
34+
select {
35+
case <-ctx.Done():
36+
return
37+
case <-ticker.C:
38+
env.mu.Lock()
39+
snapshot := env.rec.Content()
40+
env.mu.Unlock()
41+
42+
var wg sync.WaitGroup
43+
for nodeName, statuses := range snapshot {
44+
wg.Add(1)
45+
go func(fileName string, statuses []record.RecordedStatus) {
46+
defer wg.Done()
47+
err := record.DumpToFile(params.Directory, fileName, statuses)
48+
if err != nil {
49+
env.lh.V(6).Error(err, "dumping file", "dir", params.Directory, "file", fileName, "statusCount", len(statuses))
50+
}
51+
}(record.NodeNameToFileName(nodeName), statuses)
52+
}
53+
wg.Wait()
54+
}
55+
}
56+
}
57+
58+
func existsBaseDirectory(baseDir string) bool {
59+
info, err := os.Stat(baseDir)
60+
if err != nil {
61+
return false
62+
}
63+
return info.IsDir()
64+
}

pkg/pfpstatus/pfpstatus_test.go

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
/*
2+
* Copyright 2025 Red Hat, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package pfpstatus
18+
19+
import (
20+
"testing"
21+
22+
"github.com/go-logr/logr/testr"
23+
"github.com/google/go-cmp/cmp"
24+
)
25+
26+
func TestParamsFromEnv(t *testing.T) {
27+
type envvar struct {
28+
name string
29+
value string
30+
}
31+
type testCase struct {
32+
name string
33+
envs []envvar
34+
expected Params
35+
}
36+
37+
testCases := []testCase{
38+
{
39+
name: "no env",
40+
expected: Params{},
41+
},
42+
{
43+
name: "enable using existing var",
44+
envs: []envvar{
45+
{
46+
name: PFPStatusDumpEnvVar,
47+
value: "/tmp", // we know this will always exist
48+
},
49+
},
50+
expected: Params{
51+
Storage: StorageParams{
52+
Enabled: true,
53+
Directory: "/tmp",
54+
},
55+
},
56+
},
57+
}
58+
59+
for _, tcase := range testCases {
60+
t.Run(tcase.name, func(t *testing.T) {
61+
for _, env := range tcase.envs {
62+
t.Setenv(env.name, env.value)
63+
}
64+
var got Params
65+
ParamsFromEnv(testr.New(t), &got)
66+
67+
if diff := cmp.Diff(got, tcase.expected); diff != "" {
68+
t.Errorf("compare failed: %s", diff)
69+
}
70+
})
71+
}
72+
}

pkg/pfpstatus/record/codec.go

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/*
2+
* Copyright 2025 Red Hat, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package record
18+
19+
import (
20+
"encoding/json"
21+
"os"
22+
"path/filepath"
23+
"strings"
24+
)
25+
26+
func NodeNameToFileName(name string) string {
27+
return strings.ReplaceAll(name, ".", "_")
28+
}
29+
30+
func FileNameToNodeName(name string) string {
31+
return strings.ReplaceAll(name, "_", ".")
32+
}
33+
34+
// DumpToFile encodes in JSON and dumps the given `obj` to a file.
35+
// `dir` is the base directory on which the file must be created
36+
// `file` is the name of the file to create in `dir`.
37+
// The old file, if present, is always updated atomically.
38+
func DumpToFile(dir, file string, obj any) error {
39+
data, err := json.Marshal(obj)
40+
if err != nil {
41+
return err
42+
}
43+
44+
dst, err := os.CreateTemp(dir, "__"+file)
45+
if err != nil {
46+
return err
47+
}
48+
defer os.Remove(dst.Name()) // either way, we need to get rid of this
49+
50+
_, err = dst.Write(data)
51+
if err != nil {
52+
return err
53+
}
54+
55+
err = dst.Close()
56+
if err != nil {
57+
return err
58+
}
59+
60+
return os.Rename(dst.Name(), filepath.Join(dir, file))
61+
}
62+
63+
// LoadFromFile decodes the JSON data found in `file` placed in `dir`
64+
// and stores it into `obj`, which must be a pointer.
65+
func LoadFromFile(dir, file string, obj any) error {
66+
data, err := os.ReadFile(filepath.Join(dir, file))
67+
if err != nil {
68+
return err
69+
}
70+
return json.Unmarshal(data, obj)
71+
}

0 commit comments

Comments
 (0)