|
| 1 | +package chrootarchive |
| 2 | + |
| 3 | +import ( |
| 4 | + "bytes" |
| 5 | + "flag" |
| 6 | + "fmt" |
| 7 | + "io" |
| 8 | + "net" |
| 9 | + "os" |
| 10 | + "path/filepath" |
| 11 | + "runtime" |
| 12 | + |
| 13 | + "go.podman.io/storage/pkg/archive" |
| 14 | + "go.podman.io/storage/pkg/fileutils" |
| 15 | + "go.podman.io/storage/pkg/idtools" |
| 16 | + "go.podman.io/storage/pkg/reexec" |
| 17 | + "go.podman.io/storage/pkg/splitfdstream" |
| 18 | + "go.podman.io/storage/pkg/system" |
| 19 | + "go.podman.io/storage/pkg/unshare" |
| 20 | + "golang.org/x/sys/unix" |
| 21 | +) |
| 22 | + |
| 23 | +// splitFDStreamSocketDescriptor is the fd for the Unix socket used to |
| 24 | +// receive file descriptors via SCM_RIGHTS in the re-exec child. |
| 25 | +const splitFDStreamSocketDescriptor = 5 |
| 26 | + |
| 27 | +func init() { |
| 28 | + reexec.Register("storage-untar-splitfdstream", untarSplitFDStream) |
| 29 | +} |
| 30 | + |
| 31 | +// UntarSplitFDStream extracts a splitfdstream into dest inside a chroot. |
| 32 | +// The stream provides tar headers as inline data, with file content |
| 33 | +// delivered via the fds slice (for reflink-based copying). |
| 34 | +// FDs are streamed to the child process one-at-a-time over a Unix socket |
| 35 | +// using SCM_RIGHTS, avoiding EMFILE from inheriting too many FDs at exec. |
| 36 | +func UntarSplitFDStream(stream io.Reader, fds []*os.File, dest string, options *archive.TarOptions) error { |
| 37 | + if stream == nil { |
| 38 | + return fmt.Errorf("empty stream") |
| 39 | + } |
| 40 | + if options == nil { |
| 41 | + options = &archive.TarOptions{} |
| 42 | + options.InUserNS = unshare.IsRootless() |
| 43 | + } |
| 44 | + |
| 45 | + idMappings := idtools.NewIDMappingsFromMaps(options.UIDMaps, options.GIDMaps) |
| 46 | + rootIDs := idMappings.RootPair() |
| 47 | + |
| 48 | + dest = filepath.Clean(dest) |
| 49 | + if err := fileutils.Exists(dest); os.IsNotExist(err) { |
| 50 | + if err := idtools.MkdirAllAndChownNew(dest, 0o755, rootIDs); err != nil { |
| 51 | + return err |
| 52 | + } |
| 53 | + } |
| 54 | + |
| 55 | + destVal, err := newUnpackDestination(dest, dest) |
| 56 | + if err != nil { |
| 57 | + return err |
| 58 | + } |
| 59 | + defer destVal.Close() |
| 60 | + |
| 61 | + return invokeUnpackSplitFDStream(stream, fds, destVal, options) |
| 62 | +} |
| 63 | + |
| 64 | +// untarSplitFDStream is the re-exec entry point for "storage-untar-splitfdstream". |
| 65 | +// It runs inside a chroot and receives FDs lazily via SCM_RIGHTS from a Unix |
| 66 | +// socket, then calls archive.UnpackFromIterator for full extraction logic. |
| 67 | +func untarSplitFDStream() { |
| 68 | + runtime.LockOSThread() |
| 69 | + flag.Parse() |
| 70 | + |
| 71 | + var options archive.TarOptions |
| 72 | + |
| 73 | + // Read TarOptions from fd 3 (same as regular untar) |
| 74 | + if err := json.NewDecoder(os.NewFile(tarOptionsDescriptor, "options")).Decode(&options); err != nil { |
| 75 | + fatal(err) |
| 76 | + } |
| 77 | + |
| 78 | + dst := flag.Arg(0) |
| 79 | + var root string |
| 80 | + if len(flag.Args()) > 1 { |
| 81 | + root = flag.Arg(1) |
| 82 | + } |
| 83 | + |
| 84 | + // Handle the root fd (same pattern as regular untar) |
| 85 | + if root == procPathForFd(rootFileDescriptor) { |
| 86 | + rootFd := os.NewFile(rootFileDescriptor, "tar-root") |
| 87 | + defer rootFd.Close() |
| 88 | + if err := unix.Fchdir(int(rootFd.Fd())); err != nil { |
| 89 | + fatal(err) |
| 90 | + } |
| 91 | + root = "." |
| 92 | + } else if root == "" { |
| 93 | + root = dst |
| 94 | + } |
| 95 | + |
| 96 | + if err := chroot(root); err != nil { |
| 97 | + fatal(err) |
| 98 | + } |
| 99 | + |
| 100 | + // We need to be able to set any perms |
| 101 | + oldMask, err := system.Umask(0) |
| 102 | + if err != nil { |
| 103 | + fatal(err) |
| 104 | + } |
| 105 | + defer func() { |
| 106 | + _, _ = system.Umask(oldMask) |
| 107 | + }() |
| 108 | + |
| 109 | + if unshare.IsRootless() { |
| 110 | + options.InUserNS = true |
| 111 | + } |
| 112 | + |
| 113 | + // Set up FD receiver from the Unix socket (fd 5) |
| 114 | + sockFile := os.NewFile(splitFDStreamSocketDescriptor, "fd-socket") |
| 115 | + sockConn, err := net.FileConn(sockFile) |
| 116 | + sockFile.Close() // FileConn dups the fd |
| 117 | + if err != nil { |
| 118 | + fatal(fmt.Errorf("failed to create net.Conn from fd socket: %w", err)) |
| 119 | + } |
| 120 | + unixConn, ok := sockConn.(*net.UnixConn) |
| 121 | + if !ok { |
| 122 | + sockConn.Close() |
| 123 | + fatal(fmt.Errorf("fd socket is not a Unix connection")) |
| 124 | + } |
| 125 | + defer unixConn.Close() |
| 126 | + |
| 127 | + fdPasser := splitfdstream.NewFDPasser(unixConn) |
| 128 | + |
| 129 | + // Create an iterator that receives FDs lazily via SCM_RIGHTS |
| 130 | + recv := func() (*os.File, error) { |
| 131 | + _, fds, err := fdPasser.ReceiveFileDescriptors(1) |
| 132 | + if err != nil { |
| 133 | + return nil, fmt.Errorf("failed to receive FD via SCM_RIGHTS: %w", err) |
| 134 | + } |
| 135 | + if len(fds) != 1 { |
| 136 | + // Close any unexpected FDs |
| 137 | + for _, f := range fds { |
| 138 | + f.Close() |
| 139 | + } |
| 140 | + return nil, fmt.Errorf("expected 1 FD, got %d", len(fds)) |
| 141 | + } |
| 142 | + return fds[0], nil |
| 143 | + } |
| 144 | + |
| 145 | + iter := splitfdstream.NewIteratorWithFDReceiver(os.Stdin, recv) |
| 146 | + if err := archive.UnpackFromIterator(iter, dst, &options); err != nil { |
| 147 | + fatal(err) |
| 148 | + } |
| 149 | + |
| 150 | + // Fully consume stdin in case it is zero padded |
| 151 | + if _, err := flush(os.Stdin); err != nil { |
| 152 | + fatal(err) |
| 153 | + } |
| 154 | + |
| 155 | + os.Exit(0) |
| 156 | +} |
| 157 | + |
| 158 | +// invokeUnpackSplitFDStream forks a re-exec child process that chroots into |
| 159 | +// dest and unpacks the splitfdstream using archive.UnpackFromIterator. |
| 160 | +// FDs are sent to the child one-at-a-time over a Unix socket using SCM_RIGHTS. |
| 161 | +func invokeUnpackSplitFDStream(stream io.Reader, fds []*os.File, dest *unpackDestination, options *archive.TarOptions) error { |
| 162 | + // Create pipe for TarOptions (fd 3) |
| 163 | + optR, optW, err := os.Pipe() |
| 164 | + if err != nil { |
| 165 | + return fmt.Errorf("splitfdstream options pipe: %w", err) |
| 166 | + } |
| 167 | + |
| 168 | + // Create Unix socketpair for passing FDs via SCM_RIGHTS |
| 169 | + sockFDs, err := unix.Socketpair(unix.AF_UNIX, unix.SOCK_STREAM, 0) |
| 170 | + if err != nil { |
| 171 | + optR.Close() |
| 172 | + optW.Close() |
| 173 | + return fmt.Errorf("splitfdstream socketpair: %w", err) |
| 174 | + } |
| 175 | + parentSockFile := os.NewFile(uintptr(sockFDs[0]), "fd-socket-parent") |
| 176 | + childSockFile := os.NewFile(uintptr(sockFDs[1]), "fd-socket-child") |
| 177 | + |
| 178 | + cmd := reexec.Command("storage-untar-splitfdstream", dest.dest, procPathForFd(rootFileDescriptor)) |
| 179 | + cmd.Stdin = stream |
| 180 | + |
| 181 | + // ExtraFiles: [optionsPipe(fd3), rootFD(fd4), socketEnd(fd5)] |
| 182 | + cmd.ExtraFiles = append(cmd.ExtraFiles, optR) // fd 3 |
| 183 | + cmd.ExtraFiles = append(cmd.ExtraFiles, dest.root) // fd 4 |
| 184 | + cmd.ExtraFiles = append(cmd.ExtraFiles, childSockFile) // fd 5 |
| 185 | + |
| 186 | + output := bytes.NewBuffer(nil) |
| 187 | + cmd.Stdout = output |
| 188 | + cmd.Stderr = output |
| 189 | + |
| 190 | + if err := cmd.Start(); err != nil { |
| 191 | + optW.Close() |
| 192 | + optR.Close() |
| 193 | + parentSockFile.Close() |
| 194 | + childSockFile.Close() |
| 195 | + return fmt.Errorf("splitfdstream untar error on re-exec cmd: %w", err) |
| 196 | + } |
| 197 | + |
| 198 | + // Close the child's end in the parent |
| 199 | + childSockFile.Close() |
| 200 | + |
| 201 | + // Write TarOptions JSON to the pipe |
| 202 | + if err := json.NewEncoder(optW).Encode(options); err != nil { |
| 203 | + optW.Close() |
| 204 | + parentSockFile.Close() |
| 205 | + return fmt.Errorf("splitfdstream options json encode failed: %w", err) |
| 206 | + } |
| 207 | + optW.Close() |
| 208 | + |
| 209 | + // Send FDs one-at-a-time over the socket using SCM_RIGHTS. |
| 210 | + // The child receives them lazily as it processes external chunks. |
| 211 | + parentConn, err := net.FileConn(parentSockFile) |
| 212 | + parentSockFile.Close() // FileConn dups the fd |
| 213 | + if err != nil { |
| 214 | + return fmt.Errorf("splitfdstream parent socket: %w", err) |
| 215 | + } |
| 216 | + parentUnix, ok := parentConn.(*net.UnixConn) |
| 217 | + if !ok { |
| 218 | + parentConn.Close() |
| 219 | + return fmt.Errorf("splitfdstream parent socket is not Unix") |
| 220 | + } |
| 221 | + |
| 222 | + fdPasser := splitfdstream.NewFDPasser(parentUnix) |
| 223 | + for _, f := range fds { |
| 224 | + // Send one FD with a 1-byte dummy message (required by sendmsg) |
| 225 | + if err := fdPasser.SendFileDescriptors([]*os.File{f}, []byte{0}); err != nil { |
| 226 | + parentUnix.Close() |
| 227 | + return fmt.Errorf("splitfdstream send FD: %w", err) |
| 228 | + } |
| 229 | + } |
| 230 | + parentUnix.Close() // signal EOF to child |
| 231 | + |
| 232 | + if err := cmd.Wait(); err != nil { |
| 233 | + // Exhaust input to avoid blocking the producer |
| 234 | + if _, discardErr := io.Copy(io.Discard, stream); discardErr != nil { |
| 235 | + return fmt.Errorf("splitfdstream unpacking failed (error: %w; output: %s)\nexhausting input failed (error: %w)", err, output, discardErr) |
| 236 | + } |
| 237 | + return fmt.Errorf("splitfdstream unpacking failed (error: %w; output: %s)", err, output) |
| 238 | + } |
| 239 | + return nil |
| 240 | +} |
0 commit comments