Skip to content

Commit 4cac6ec

Browse files
committed
storage/overlay: implement SplitFDStreamDriver
Implement the SplitFDStreamDriver interface for the overlay driver, enabling efficient layer operations with reflink support. Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
1 parent b364dcc commit 4cac6ec

2 files changed

Lines changed: 340 additions & 0 deletions

File tree

Lines changed: 282 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,282 @@
1+
//go:build linux
2+
3+
package overlay
4+
5+
import (
6+
"archive/tar"
7+
"bytes"
8+
"errors"
9+
"fmt"
10+
"io"
11+
"os"
12+
"path/filepath"
13+
"strings"
14+
15+
"github.com/sirupsen/logrus"
16+
"go.podman.io/storage/pkg/archive"
17+
"go.podman.io/storage/pkg/chrootarchive"
18+
"go.podman.io/storage/pkg/directory"
19+
"go.podman.io/storage/pkg/fileutils"
20+
"go.podman.io/storage/pkg/idtools"
21+
"go.podman.io/storage/pkg/splitfdstream"
22+
"go.podman.io/storage/pkg/unshare"
23+
"golang.org/x/sys/unix"
24+
)
25+
26+
// ErrSplitFDStreamNotSupported is returned when splitfdstream operations
27+
// are not supported for a layer (e.g., composefs layers).
28+
var ErrSplitFDStreamNotSupported = errors.New("splitfdstream not supported for this layer")
29+
30+
// ApplySplitFDStream applies changes from a split FD stream to the specified layer.
31+
// The split FD stream format allows mixing inline data with external file descriptor
32+
// references, enabling efficient streaming of layer content with zero-copy for large files.
33+
// Uses reflink when possible for efficient file copying.
34+
// This API is experimental and can be changed without bumping the major version number.
35+
func (d *Driver) ApplySplitFDStream(options *splitfdstream.ApplySplitFDStreamOpts) (int64, error) {
36+
if err := options.Validate(); err != nil {
37+
return 0, fmt.Errorf("invalid options: %w", err)
38+
}
39+
40+
var diffPath string
41+
42+
if options.StagingDir != "" {
43+
// Extracting to staging directory
44+
diffPath = options.StagingDir
45+
logrus.Debugf("overlay: ApplySplitFDStream applying to staging dir %s", diffPath)
46+
} else {
47+
// Extracting to layer diff directory (options.LayerID is guaranteed non-empty by Validate())
48+
49+
dir := d.dir(options.LayerID)
50+
if err := fileutils.Exists(dir); err != nil {
51+
return 0, fmt.Errorf("layer %s does not exist: %w", options.LayerID, err)
52+
}
53+
54+
// Check if this is a composefs layer - splitfdstream is not supported for composefs
55+
composefsData := d.getComposefsData(options.LayerID)
56+
if err := fileutils.Exists(composefsData); err == nil {
57+
return 0, fmt.Errorf("%w: layer %s uses composefs", ErrSplitFDStreamNotSupported, options.LayerID)
58+
}
59+
60+
var err error
61+
diffPath, err = d.getDiffPath(options.LayerID)
62+
if err != nil {
63+
return 0, fmt.Errorf("failed to get diff path for layer %s: %w", options.LayerID, err)
64+
}
65+
66+
logrus.Debugf("overlay: ApplySplitFDStream applying to layer %s at %s", options.LayerID, diffPath)
67+
}
68+
69+
// Set up ID mappings
70+
idMappings := options.IDMappings
71+
if idMappings == nil {
72+
idMappings = &idtools.IDMappings{}
73+
}
74+
75+
// Use chrootarchive to extract inside a chroot/pivot_root, giving
76+
// splitfdstream the same security boundary as the regular applyDiff path.
77+
err := chrootarchive.UntarSplitFDStream(options.Stream, options.FileDescriptors, diffPath, &archive.TarOptions{
78+
UIDMaps: idMappings.UIDs(),
79+
GIDMaps: idMappings.GIDs(),
80+
IgnoreChownErrors: options.IgnoreChownErrors || d.options.ignoreChownErrors,
81+
WhiteoutFormat: d.getWhiteoutFormat(),
82+
ForceMask: options.ForceMask,
83+
InUserNS: unshare.IsRootless(),
84+
})
85+
if err != nil {
86+
return 0, fmt.Errorf("failed to extract split FD stream: %w", err)
87+
}
88+
89+
return directory.Size(diffPath)
90+
}
91+
92+
// GetSplitFDStream generates a split FD stream from the layer differences.
93+
// The returned ReadCloser contains the splitfdstream-formatted data, and the
94+
// []*os.File slice contains the external file descriptors referenced by the stream.
95+
// Regular files are passed as external file descriptors for reflink-based copying.
96+
// The caller is responsible for closing both the ReadCloser and all file descriptors.
97+
// This API is experimental and can be changed without bumping the major version number.
98+
func (d *Driver) GetSplitFDStream(id, parent string, options *splitfdstream.GetSplitFDStreamOpts) (io.ReadCloser, []*os.File, error) {
99+
if options == nil {
100+
return nil, nil, fmt.Errorf("options cannot be nil")
101+
}
102+
103+
dir := d.dir(id)
104+
if err := fileutils.Exists(dir); err != nil {
105+
return nil, nil, fmt.Errorf("layer %s does not exist: %w", id, err)
106+
}
107+
108+
// Check if this is a composefs layer - splitfdstream is not supported for composefs yet
109+
composefsData := d.getComposefsData(id)
110+
if err := fileutils.Exists(composefsData); err == nil {
111+
return nil, nil, fmt.Errorf("%w: layer %s uses composefs", ErrSplitFDStreamNotSupported, id)
112+
} else if !errors.Is(err, unix.ENOENT) {
113+
return nil, nil, err
114+
}
115+
116+
logrus.Debugf("overlay: GetSplitFDStream for layer %s with parent %s", id, parent)
117+
118+
// Set up ID mappings
119+
idMappings := options.IDMappings
120+
if idMappings == nil {
121+
idMappings = &idtools.IDMappings{}
122+
}
123+
124+
// Get the diff path for file access
125+
diffPath, err := d.getDiffPath(id)
126+
if err != nil {
127+
return nil, nil, fmt.Errorf("failed to get diff path for layer %s: %w", id, err)
128+
}
129+
130+
// Get lower directories for whiteout handling
131+
lowerDirs, err := d.getLowerDiffPaths(id)
132+
if err != nil {
133+
return nil, nil, fmt.Errorf("failed to get lower diff paths: %w", err)
134+
}
135+
136+
// Generate tar stream directly from the diff directory
137+
// This ensures all files in the tar actually exist in diffPath
138+
// (unlike Diff() which may use naive diff and mount the overlay)
139+
tarStream, err := archive.TarWithOptions(diffPath, &archive.TarOptions{
140+
Compression: archive.Uncompressed,
141+
UIDMaps: idMappings.UIDs(),
142+
GIDMaps: idMappings.GIDs(),
143+
WhiteoutFormat: d.getWhiteoutFormat(),
144+
WhiteoutData: lowerDirs,
145+
})
146+
if err != nil {
147+
return nil, nil, fmt.Errorf("failed to generate tar stream from diff directory: %w", err)
148+
}
149+
defer tarStream.Close()
150+
151+
// Buffer the splitfdstream data in memory
152+
var buf bytes.Buffer
153+
var fds []*os.File
154+
writer := splitfdstream.NewWriter(&buf)
155+
156+
// Convert tar stream to splitfdstream
157+
err = d.convertTarToSplitFDStream(tarStream, writer, diffPath, &fds)
158+
if err != nil {
159+
// Close any opened FDs on error
160+
for _, f := range fds {
161+
f.Close()
162+
}
163+
return nil, nil, fmt.Errorf("failed to convert tar to splitfdstream: %w", err)
164+
}
165+
166+
logrus.Debugf("overlay: GetSplitFDStream complete for layer %s: streamSize=%d, numFDs=%d", id, buf.Len(), len(fds))
167+
return io.NopCloser(bytes.NewReader(buf.Bytes())), fds, nil
168+
}
169+
170+
// convertTarToSplitFDStream converts a tar stream to a splitfdstream by parsing
171+
// tar headers and replacing file content with file descriptor references.
172+
func (d *Driver) convertTarToSplitFDStream(tarStream io.ReadCloser, writer *splitfdstream.SplitFDStreamWriter, diffPath string, fds *[]*os.File) error {
173+
tr := tar.NewReader(tarStream)
174+
175+
// Open diff directory for safe file access
176+
diffDirFd, err := unix.Open(diffPath, unix.O_RDONLY|unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
177+
if err != nil {
178+
return fmt.Errorf("failed to open diff directory %s: %w", diffPath, err)
179+
}
180+
defer unix.Close(diffDirFd)
181+
182+
for {
183+
header, err := tr.Next()
184+
if err == io.EOF {
185+
break
186+
}
187+
if err != nil {
188+
return fmt.Errorf("failed to read tar header: %w", err)
189+
}
190+
191+
// Write the tar header as inline data
192+
if err := d.writeTarHeaderInline(writer, header); err != nil {
193+
return fmt.Errorf("failed to write tar header for %s: %w", header.Name, err)
194+
}
195+
196+
// Handle file content
197+
if header.Typeflag == tar.TypeReg && header.Size > 0 {
198+
// Try to open file and write FD reference
199+
ok, err := d.tryWriteFileAsFDReference(writer, diffDirFd, header, fds)
200+
if err != nil {
201+
return fmt.Errorf("failed to write FD reference for %s: %w", header.Name, err)
202+
}
203+
if !ok {
204+
return fmt.Errorf("file %q not found in diff directory", header.Name)
205+
}
206+
// Skip the content in the tar stream since we're using FD reference
207+
if _, err := io.CopyN(io.Discard, tr, header.Size); err != nil {
208+
return fmt.Errorf("failed to skip content for %s: %w", header.Name, err)
209+
}
210+
}
211+
// For non-regular files or empty files, there's no content to handle
212+
}
213+
214+
return nil
215+
}
216+
217+
// writeTarHeaderInline writes a tar header as inline data to the splitfdstream.
218+
func (d *Driver) writeTarHeaderInline(writer *splitfdstream.SplitFDStreamWriter, header *tar.Header) error {
219+
var headerBuf bytes.Buffer
220+
tw := tar.NewWriter(&headerBuf)
221+
if err := tw.WriteHeader(header); err != nil {
222+
return fmt.Errorf("failed to serialize tar header: %w", err)
223+
}
224+
225+
headerBytes := headerBuf.Bytes()
226+
if len(headerBytes) > 0 {
227+
if err := writer.WriteInline(headerBytes); err != nil {
228+
return fmt.Errorf("failed to write inline header: %w", err)
229+
}
230+
}
231+
232+
return nil
233+
}
234+
235+
// tryWriteFileAsFDReference tries to open a file and write an FD reference to the splitfdstream.
236+
// Returns (true, nil) if the file was successfully written as FD reference.
237+
// Returns (false, nil) if the file doesn't exist in the diff directory (caller should write inline).
238+
// Returns (false, error) on other errors.
239+
func (d *Driver) tryWriteFileAsFDReference(writer *splitfdstream.SplitFDStreamWriter, diffDirFd int, header *tar.Header, fds *[]*os.File) (bool, error) {
240+
// Clean the file name to prevent path traversal
241+
cleanName := filepath.Clean(header.Name)
242+
if strings.Contains(cleanName, "..") {
243+
return false, fmt.Errorf("invalid file path: %s", header.Name)
244+
}
245+
246+
// Open the file safely using openat2
247+
fd, err := unix.Openat2(diffDirFd, cleanName, &unix.OpenHow{
248+
Flags: unix.O_RDONLY | unix.O_CLOEXEC,
249+
Resolve: unix.RESOLVE_NO_SYMLINKS | unix.RESOLVE_BENEATH,
250+
})
251+
if err != nil {
252+
return false, fmt.Errorf("failed to open file %s: %w", cleanName, err)
253+
}
254+
255+
// Verify it's still a regular file
256+
var fdStat unix.Stat_t
257+
if err := unix.Fstat(fd, &fdStat); err != nil {
258+
unix.Close(fd)
259+
return false, fmt.Errorf("failed to fstat opened file %s: %w", cleanName, err)
260+
}
261+
if fdStat.Mode&unix.S_IFMT != unix.S_IFREG {
262+
unix.Close(fd)
263+
return false, fmt.Errorf("file %s is not a regular file", cleanName)
264+
}
265+
266+
// Create os.File from fd
267+
f := os.NewFile(uintptr(fd), cleanName)
268+
if f == nil {
269+
unix.Close(fd)
270+
return false, fmt.Errorf("failed to create File from fd for %s", cleanName)
271+
}
272+
273+
fdIndex := len(*fds)
274+
*fds = append(*fds, f)
275+
276+
// Write FD reference
277+
if err := writer.WriteExternal(fdIndex); err != nil {
278+
return false, fmt.Errorf("failed to write external FD reference: %w", err)
279+
}
280+
281+
return true, nil
282+
}
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
//go:build linux
2+
3+
package overlay
4+
5+
import (
6+
"testing"
7+
8+
"go.podman.io/storage/pkg/splitfdstream"
9+
)
10+
11+
func TestApplySplitFDStreamStub(t *testing.T) {
12+
driver := &Driver{
13+
home: t.TempDir(),
14+
}
15+
16+
// Test with nil options
17+
_, err := driver.ApplySplitFDStream(nil)
18+
if err == nil {
19+
t.Error("Expected error with nil options")
20+
}
21+
22+
// Test with valid options but non-existent layer
23+
opts := &splitfdstream.ApplySplitFDStreamOpts{LayerID: "non-existent-layer"}
24+
_, err = driver.ApplySplitFDStream(opts)
25+
if err == nil {
26+
t.Error("Expected error for non-existent layer")
27+
}
28+
}
29+
30+
func TestGetSplitFDStreamStub(t *testing.T) {
31+
driver := &Driver{
32+
home: t.TempDir(),
33+
}
34+
35+
// Test with nil options
36+
_, _, err := driver.GetSplitFDStream("test-layer", "parent-layer", nil)
37+
if err == nil {
38+
t.Error("Expected error with nil options")
39+
}
40+
41+
// Test with valid options but non-existent layer
42+
opts := &splitfdstream.GetSplitFDStreamOpts{}
43+
_, _, err = driver.GetSplitFDStream("non-existent-layer", "parent-layer", opts)
44+
if err == nil {
45+
t.Error("Expected error for non-existent layer")
46+
}
47+
}
48+
49+
// TestOverlayImplementsSplitFDStreamDriver verifies that the overlay driver
50+
// implements the SplitFDStreamDriver interface via type assertion.
51+
func TestOverlayImplementsSplitFDStreamDriver(t *testing.T) {
52+
driver := &Driver{}
53+
54+
// Verify the driver implements SplitFDStreamDriver
55+
if _, ok := interface{}(driver).(splitfdstream.SplitFDStreamDriver); !ok {
56+
t.Error("Expected overlay driver to implement SplitFDStreamDriver interface")
57+
}
58+
}

0 commit comments

Comments
 (0)