Skip to content

Commit 5fb653c

Browse files
authored
Merge pull request #1820 from reddevillg/fix_lib_locate
fix: Use system search path when locate driver library
2 parents e0bcfd4 + 9859c30 commit 5fb653c

4 files changed

Lines changed: 100 additions & 53 deletions

File tree

internal/discover/graphics.go

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ func newGraphicsLibrariesDiscoverer(logger logger.Interface, driver *root.Driver
121121
if err != nil {
122122
return nil, fmt.Errorf("failed to get driver version: %w", err)
123123
}
124-
cudaLibRoot, err := driver.GetDriverLibDirectory()
124+
cudaLibRoots, err := driver.GetDriverLibDirectories()
125125
if err != nil {
126126
return nil, fmt.Errorf("failed to get libcuda.so parent directory: %w", err)
127127
}
@@ -152,7 +152,7 @@ func newGraphicsLibrariesDiscoverer(logger logger.Interface, driver *root.Driver
152152
lookup.NewFileLocator(
153153
lookup.WithLogger(logger),
154154
lookup.WithRoot(driver.Root),
155-
lookup.WithSearchPaths(buildXOrgSearchPaths(cudaLibRoot)...),
155+
lookup.WithSearchPaths(buildXOrgSearchPaths(cudaLibRoots...)...),
156156
lookup.WithCount(1),
157157
),
158158
driver.Root,
@@ -239,8 +239,17 @@ func (d graphicsDriverLibraries) isDriverLibrary(filename string, libraryName st
239239
return match
240240
}
241241

242-
// buildXOrgSearchPaths returns the ordered list of search paths for XOrg files.
243-
func buildXOrgSearchPaths(libRoot string) []string {
242+
// buildXOrgSearchPaths returns search paths from all roots
243+
func buildXOrgSearchPaths(roots ...string) []string {
244+
var paths []string
245+
for _, root := range roots {
246+
paths = append(paths, buildXOrgSearchPathsAtRoot(root)...)
247+
}
248+
return paths
249+
}
250+
251+
// buildXOrgSearchPathsAtRoot returns the ordered list of search paths for XOrg files.
252+
func buildXOrgSearchPathsAtRoot(libRoot string) []string {
244253
var paths []string
245254
if libRoot != "" {
246255
paths = append(paths,

internal/lookup/root/cuda_test.go

Lines changed: 44 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -34,69 +34,87 @@ func TestLocate(t *testing.T) {
3434

3535
testCases := []struct {
3636
description string
37-
libcudaPath string
38-
expected string
37+
libPaths []string
38+
expected []string
3939
expectedError error
4040
}{
4141
{
4242
description: "no libcuda does not resolve library",
43-
libcudaPath: "",
44-
expected: "",
43+
libPaths: nil,
44+
expected: nil,
4545
expectedError: lookup.ErrNotFound,
4646
},
4747
{
4848
description: "no-ldcache searches /usr/lib64",
49-
libcudaPath: "/usr/lib64/libcuda.so.123.34",
50-
expected: "/usr/lib64",
49+
libPaths: []string{"/usr/lib64/libcuda.so.123.34"},
50+
expected: []string{"/usr/lib64"},
5151
expectedError: nil,
5252
},
5353
{
5454
description: "no-ldcache searches /usr/lib64 for libnvidia-ml.so.",
55-
libcudaPath: "/usr/lib64/libnvidia-ml.so.123.34",
56-
expected: "/usr/lib64",
55+
libPaths: []string{"/usr/lib64/libnvidia-ml.so.123.34"},
56+
expected: []string{"/usr/lib64"},
57+
expectedError: nil,
58+
},
59+
{
60+
description: "locates two driver library directories",
61+
libPaths: []string{
62+
"/usr/lib64/libcuda.so.123.34",
63+
"/usr/lib/x86_64-linux-gnu/libnvidia-ml.so.123.34",
64+
},
65+
expected: []string{
66+
"/usr/lib64",
67+
"/usr/lib/x86_64-linux-gnu",
68+
},
5769
expectedError: nil,
5870
},
5971
}
6072

6173
for _, tc := range testCases {
6274
t.Run(tc.description, func(t *testing.T) {
63-
driverRoot, err := setupDriverRoot(t, tc.libcudaPath)
75+
driverRoot, err := setupDriverRoot(t, tc.libPaths)
6476
require.NoError(t, err)
6577

6678
l := New(
6779
WithLogger(logger),
6880
WithDriverRoot(driverRoot),
6981
)
7082

71-
driverLibraryPath, err := l.GetDriverLibDirectory()
72-
require.ErrorIs(t, err, tc.expectedError)
83+
driverLibraryPaths, err := l.GetDriverLibDirectories()
84+
if tc.expectedError != nil {
85+
require.ErrorIs(t, err, tc.expectedError)
86+
return
87+
}
88+
89+
require.NoError(t, err)
7390

7491
// NOTE: We need to strip `/private` on MacOs due to symlink resolution
75-
stripped := strings.TrimPrefix(driverLibraryPath, "/private")
92+
stripped := make([]string, len(driverLibraryPaths))
93+
for i, p := range driverLibraryPaths {
94+
stripped[i] = strings.TrimPrefix(p, "/private")
95+
}
7696

77-
require.Equal(t, tc.expected, stripped)
97+
require.ElementsMatch(t, tc.expected, stripped)
7898
})
7999
}
80100
}
81101

82102
// setupDriverRoot creates a folder that can be used to represent a driver root.
83-
// The path to libcuda can be specified and an empty file is created at this location in the driver root.
84-
func setupDriverRoot(t *testing.T, libCudaPath string) (string, error) {
103+
// Library paths can be specified and empty files are created at these locations in the driver root.
104+
func setupDriverRoot(t *testing.T, libPaths []string) (string, error) {
85105
driverRoot := t.TempDir()
86106

87-
if libCudaPath == "" {
88-
return driverRoot, nil
89-
}
90-
91-
if err := os.MkdirAll(filepath.Join(driverRoot, filepath.Dir(libCudaPath)), 0755); err != nil {
92-
return "", fmt.Errorf("falied to create required driver root folder: %w", err)
93-
}
107+
for _, libPath := range libPaths {
108+
if err := os.MkdirAll(filepath.Join(driverRoot, filepath.Dir(libPath)), 0755); err != nil {
109+
return "", fmt.Errorf("failed to create required driver root folder: %w", err)
110+
}
94111

95-
libCuda, err := os.Create(filepath.Join(driverRoot, libCudaPath))
96-
if err != nil {
97-
return "", fmt.Errorf("failed to create dummy libcuda.so: %w", err)
112+
f, err := os.Create(filepath.Join(driverRoot, libPath))
113+
if err != nil {
114+
return "", fmt.Errorf("failed to create dummy library file: %w", err)
115+
}
116+
f.Close()
98117
}
99-
defer libCuda.Close()
100118

101119
return filepath.EvalSymlinks(driverRoot)
102120
}

internal/lookup/root/root.go

Lines changed: 41 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"fmt"
2222
"os"
2323
"path/filepath"
24+
"slices"
2425
"strings"
2526
"sync"
2627

@@ -43,8 +44,8 @@ type Driver struct {
4344

4445
// version caches the driver version.
4546
version string
46-
// driverLibDirectory caches the path to parent of the driver libraries
47-
driverLibDirectory string
47+
// driverLibDirectories caches the paths to parent of the driver libraries
48+
driverLibDirectories []string
4849
}
4950

5051
// New creates a new Driver root using the specified options.
@@ -70,13 +71,13 @@ func New(opts ...Option) *Driver {
7071
}
7172

7273
d := &Driver{
73-
logger: o.logger,
74-
Root: o.Root,
75-
DevRoot: o.DevRoot,
76-
librarySearchPaths: o.librarySearchPaths,
77-
configSearchPaths: o.configSearchPaths,
78-
version: driverVersion,
79-
driverLibDirectory: "",
74+
logger: o.logger,
75+
Root: o.Root,
76+
DevRoot: o.DevRoot,
77+
librarySearchPaths: o.librarySearchPaths,
78+
configSearchPaths: o.configSearchPaths,
79+
version: driverVersion,
80+
driverLibDirectories: nil,
8081
}
8182

8283
return d
@@ -97,34 +98,36 @@ func (r *Driver) Version() (string, error) {
9798
return r.version, nil
9899
}
99100

100-
// GetDriverLibDirectory returns the cached directory where the driver libs are
101-
// found if possible.
101+
// GetDriverLibDirectories returns the cached directories where the driver libs
102+
// are found if possible.
102103
// If this has not yet been initialized, the path is first detected and then returned.
103-
func (r *Driver) GetDriverLibDirectory() (string, error) {
104+
func (r *Driver) GetDriverLibDirectories() ([]string, error) {
104105
r.Lock()
105106
defer r.Unlock()
106107

107-
if r.driverLibDirectory == "" {
108+
if len(r.driverLibDirectories) == 0 {
108109
if err := r.updateInfo(); err != nil {
109-
return "", err
110+
return nil, err
110111
}
111112
}
112113

113-
return r.driverLibDirectory, nil
114+
return r.driverLibDirectories, nil
114115
}
115116

116117
func (r *Driver) DriverLibraryLocator(additionalDirs ...string) (lookup.Locator, error) {
117-
libcudasoParentDirPath, err := r.GetDriverLibDirectory()
118+
libcudasoParentDirPaths, err := r.GetDriverLibDirectories()
118119
if err != nil {
119120
return nil, fmt.Errorf("failed to get libcuda.so parent directory: %w", err)
120121
}
121122

122-
searchPaths := []string{libcudasoParentDirPath}
123+
searchPaths := slices.Clone(libcudasoParentDirPaths)
123124
for _, dir := range additionalDirs {
124125
if strings.HasPrefix(dir, "/") {
125126
searchPaths = append(searchPaths, dir)
126127
} else {
127-
searchPaths = append(searchPaths, filepath.Join(libcudasoParentDirPath, dir))
128+
for _, libcudasoParentDirPath := range libcudasoParentDirPaths {
129+
searchPaths = append(searchPaths, filepath.Join(libcudasoParentDirPath, dir))
130+
}
128131
}
129132
}
130133

@@ -141,16 +144,33 @@ func (r *Driver) DriverLibraryLocator(additionalDirs ...string) (lookup.Locator,
141144
}
142145

143146
func (r *Driver) updateInfo() error {
144-
driverLibPath, version, err := r.inferVersion()
147+
_, version, err := r.inferVersion()
145148
if err != nil {
146149
return err
147150
}
148151
if r.version != "" && r.version != version {
149152
return fmt.Errorf("unexpected version detected: %v != %v", r.version, version)
150153
}
151154

155+
versionedDriverLibPaths, err := r.Libraries().Locate("lib*.so." + version)
156+
if err != nil {
157+
return fmt.Errorf("failed to locate versioned driver libraries: %w", err)
158+
}
159+
160+
var uniqueDirs []string
161+
seen := make(map[string]bool)
162+
163+
for _, path := range versionedDriverLibPaths {
164+
dir := filepath.Dir(path)
165+
if seen[dir] {
166+
continue
167+
}
168+
seen[dir] = true
169+
uniqueDirs = append(uniqueDirs, r.RelativeToRoot(dir))
170+
}
171+
152172
r.version = version
153-
r.driverLibDirectory = r.RelativeToRoot(filepath.Dir(driverLibPath))
173+
r.driverLibDirectories = uniqueDirs
154174

155175
return nil
156176
}
@@ -167,7 +187,7 @@ func (r *Driver) inferVersion() (string, string, error) {
167187
for _, driverLib := range []string{"libcuda.so.", "libnvidia-ml.so."} {
168188
driverLibPaths, err := r.Libraries().Locate(driverLib + versionSuffix)
169189
if err != nil {
170-
errs = errors.Join(errs, fmt.Errorf("failed to locate libcuda.so: %w", err))
190+
errs = errors.Join(errs, fmt.Errorf("failed to locate %q: %w", driverLib, err))
171191
continue
172192
}
173193
driverLibPath := driverLibPaths[0]

pkg/nvcdi/driver-nvml.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,13 +111,13 @@ func (l *nvcdilib) NewDriverLibraryDiscoverer(version string) (discover.Discover
111111
disableDeviceNodeModification := l.hookCreator.Create(DisableDeviceNodeModificationHook)
112112
discoverers = append(discoverers, disableDeviceNodeModification)
113113

114-
driverLibDirectory, err := l.driver.GetDriverLibDirectory()
114+
driverLibDirectories, err := l.driver.GetDriverLibDirectories()
115115
if err != nil {
116116
return nil, fmt.Errorf("failed to get libcuda.so parent directory path: %w", err)
117117
}
118118
environmentVariable := &discover.EnvVar{
119119
Name: "NVIDIA_CTK_LIBCUDA_DIR",
120-
Value: driverLibDirectory,
120+
Value: strings.Join(driverLibDirectories, string(filepath.ListSeparator)),
121121
}
122122
discoverers = append(discoverers, environmentVariable)
123123

0 commit comments

Comments
 (0)