Skip to content

Commit cc28c34

Browse files
committed
LinuxDerivationBuilder: Use FD-based mounting API when available
We now use open_tree/move_mount syscalls when available. glibc version support shouldn't be too much of a concern at this point - the wrappers have been added in glibc 2.36. There also was not a whole lot of validation of the sandbox paths, which is now fixed too (`.`, `..` filename are rejected, everything escaping the chroot is too).
1 parent a3f46f2 commit cc28c34

5 files changed

Lines changed: 185 additions & 26 deletions

File tree

src/libstore/linux/build/linux-derivation-builder.cc

Lines changed: 148 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,17 @@
1010
#include "nix/store/filetransfer.hh"
1111
#include "nix/util/cgroup.hh"
1212
#include "nix/util/linux-namespaces.hh"
13+
#include "nix/util/file-system-at.hh"
1314
#include "nix/util/logging.hh"
1415
#include "nix/util/serialise.hh"
1516
#include "linux/fchmodat2-compat.hh"
1617

1718
#include <algorithm>
1819
#include <string_view>
1920
#include <cstdint>
20-
#include <fcntl.h>
21+
#include <atomic>
2122

23+
#include <fcntl.h>
2224
#include <sys/ioctl.h>
2325
#include <net/if.h>
2426
#include <netinet/ip.h>
@@ -205,35 +207,148 @@ static void setupLandlock()
205207

206208
#endif
207209

208-
static void doBind(const std::filesystem::path & source, const std::filesystem::path & target, bool optional = false)
210+
static void doBind(
211+
const std::filesystem::path & source,
212+
Descriptor chrootRootDirFd,
213+
const std::filesystem::path & target,
214+
const std::filesystem::path & chrootRootDirPath,
215+
bool optional = false)
209216
{
210-
debug("bind mounting %1% to %2%", PathFmt(source), PathFmt(target));
217+
/* `target` denotes a relative path inside the chroot directory. All operations happen
218+
relative to chrootRootDirFd. Bail out if the path is not what we expect. */
219+
if (target.empty() || target.is_absolute())
220+
throw Error("invalid path to bind mount in the chroot: %s", PathFmt(target));
221+
222+
/* Sanity check against insane setups. This would never be passed in
223+
sandbox paths for dependencies, but the user might specify it in
224+
extra-sandbox-paths. */
225+
if (const auto filename = target.filename().native(); filename == "." || filename == "..")
226+
throw Error("sandbox path to bind mount in the chroot has an invalid filename: %s", PathFmt(target));
227+
228+
debug("bind mounting %1% to %2%", PathFmt(source), PathFmt(chrootRootDirPath / target));
229+
230+
auto fallbackBindMount = [&](Descriptor sourceFd, Descriptor destFd) {
231+
auto selfProcSourcePath = std::filesystem::path("/proc/self/fd") / std::to_string(sourceFd);
232+
auto selfProcDestPath = std::filesystem::path("/proc/self/fd") / std::to_string(destFd);
233+
234+
if (mount(selfProcSourcePath.c_str(), selfProcDestPath.c_str(), "", MS_BIND | MS_REC, 0) == -1)
235+
throw SysError(
236+
"bind mount from %1% (%3%) to %2% (%4%) failed",
237+
PathFmt(source),
238+
PathFmt(chrootRootDirPath / target),
239+
PathFmt(selfProcSourcePath),
240+
PathFmt(selfProcDestPath));
241+
};
242+
243+
auto bindMount = [&](Descriptor sourceFd, Descriptor destFd) {
244+
static std::atomic_flag openTreeUnsupported{};
245+
if (openTreeUnsupported.test())
246+
return fallbackBindMount(sourceFd, destFd);
247+
248+
AutoCloseFD treeFd =
249+
::open_tree(sourceFd, "", OPEN_TREE_CLONE | AT_RECURSIVE | OPEN_TREE_CLOEXEC | AT_EMPTY_PATH);
250+
251+
if (!treeFd) {
252+
if (errno != ENOSYS)
253+
throw SysError("opening bind mount source %1%", PathFmt(source));
211254

212-
auto bindMount = [&]() {
213-
if (mount(source.c_str(), target.c_str(), "", MS_BIND | MS_REC, 0) == -1)
214-
throw SysError("bind mount from %1% to %2% failed", PathFmt(source), PathFmt(target));
255+
/* Cache and try the old fallback via procfs. */
256+
openTreeUnsupported.test_and_set();
257+
return fallbackBindMount(sourceFd, destFd);
258+
}
259+
260+
if (::move_mount(treeFd.get(), "", destFd, "", MOVE_MOUNT_T_EMPTY_PATH | MOVE_MOUNT_F_EMPTY_PATH) == -1)
261+
throw SysError("bind mount from %1% to %2% failed", PathFmt(source), PathFmt(chrootRootDirPath / target));
215262
};
216263

217-
auto maybeSt = maybeLstat(source);
218-
if (!maybeSt) {
219-
if (optional)
264+
/* TODO: Replace the sucky implementation of createDirs and use that instead. */
265+
auto createDirsAndOpen = [&](const std::filesystem::path & path) -> std::pair<AutoCloseFD, Descriptor> {
266+
Descriptor parentFd = chrootRootDirFd;
267+
AutoCloseFD maybeParentFdOwned;
268+
/* How many directories deep we are. */
269+
unsigned nestedDirCount = 0;
270+
271+
for (const auto & p : path) {
272+
/* Trailing `/`, we don't care. It might matter for symlink resolution, so better skip it.
273+
Also skip `.` as a no-op. */
274+
if (p.native().empty() || p.native() == ".")
275+
continue;
276+
277+
/* TODO: Add additional validation in config parsing in case the
278+
user specified a borked `extra-sandbox-paths`. Maybe we should just
279+
reject `..` components completely? */
280+
if (p.native() == "..") {
281+
if (nestedDirCount == 0)
282+
throw Error("sandbox path %s escapes the chroot", PathFmt(path));
283+
--nestedDirCount;
284+
} else {
285+
++nestedDirCount;
286+
}
287+
288+
assert(!p.native().starts_with("/")); /* Already check that the path is relative, can't happen. */
289+
290+
/* No symlinks shall be followed. */
291+
AutoCloseFD nextComponent = ::openat(parentFd, p.c_str(), O_DIRECTORY | O_NOFOLLOW | O_CLOEXEC);
292+
if (!nextComponent) {
293+
if (errno != ENOENT) /* We might have to create it if it doesn't exist. */
294+
throw SysError("creating directory component %s in the sandbox path %s", PathFmt(p), PathFmt(path));
295+
296+
/* Let the umask restrict the permissions. TODO: Do we want
297+
this? We don't handle EEXIST here because that would mean
298+
somebody is already racing us. Fail closed. */
299+
if (::mkdirat(parentFd, p.c_str(), 0777) == -1)
300+
throw SysError("creating directory component %s in the sandbox path %s", PathFmt(p), PathFmt(path));
301+
302+
/* Try again after creating the directory. Should succeed. */
303+
nextComponent = ::openat(parentFd, p.c_str(), O_DIRECTORY | O_NOFOLLOW | O_CLOEXEC);
304+
if (!nextComponent) /* Something is terribly wrong, bail out. */
305+
throw SysError("creating directory component %s in the sandbox path %s", PathFmt(p), PathFmt(path));
306+
}
307+
308+
/* Proceed to the next iteration. */
309+
maybeParentFdOwned = std::move(nextComponent);
310+
parentFd = maybeParentFdOwned.get();
311+
}
312+
313+
return {std::move(maybeParentFdOwned), parentFd};
314+
};
315+
316+
AutoCloseFD sourceFd = ::open(source.c_str(), O_PATH | O_NOFOLLOW | O_CLOEXEC);
317+
318+
/* Skip non-existent files source paths if .optional is set. */
319+
if (!sourceFd) {
320+
if (optional && (errno == ENOENT || errno == ENOTDIR))
220321
return;
221322
else
222323
throw SysError("getting attributes of path %1%", PathFmt(source));
223324
}
224-
auto st = *maybeSt;
325+
326+
auto st = nix::fstat(sourceFd.get());
225327

226328
if (S_ISDIR(st.st_mode)) {
227-
createDirs(target);
228-
bindMount();
329+
auto [maybeDirFdOwned, dirFd] = createDirsAndOpen(target);
330+
/* We must always open a fresh directory - i.e. the path must not resolve to chrootRootDir itself. */
331+
if (!maybeDirFdOwned)
332+
throw Error("sandbox path %s resolves to the chroot root directory", PathFmt(chrootRootDirPath / target));
333+
assert(maybeDirFdOwned.get() == dirFd); /* See the comment above. */
334+
bindMount(sourceFd.get(), dirFd);
229335
} else if (S_ISLNK(st.st_mode)) {
230-
// Symlinks can (apparently) not be bind-mounted, so just copy it
231-
createDirs(target.parent_path());
232-
copyFile(source, target, false);
336+
/* Symlinks can't be bind-mounted, so copy the contents.
337+
The kernel implies AT_EMPTY_PATH for readlinkat
338+
since https://github.com/torvalds/linux/commit/65cfc6722361 (v2.6.39).
339+
See also: https://github.com/cyphar/libpathrs/issues/18 */
340+
auto symlinkTarget = readLinkAt(sourceFd.get(), CanonPath::root);
341+
auto [maybeParentFdOwned, parentFd] = createDirsAndOpen(target.parent_path());
342+
if (::symlinkat(symlinkTarget.data(), parentFd, target.filename().c_str()) == -1)
343+
throw SysError("creating symlink %s", PathFmt(chrootRootDirPath / target));
233344
} else {
234-
createDirs(target.parent_path());
235-
writeFile(target, "");
236-
bindMount();
345+
auto [maybeParentFdOwned, parentFd] = createDirsAndOpen(target.parent_path());
346+
/* Strictly speaking, O_NOFOLLOW is redundant because O_CREAT | O_EXCL would never follow links anyway. */
347+
AutoCloseFD destFd =
348+
::openat(parentFd, target.filename().c_str(), O_CREAT | O_EXCL | O_WRONLY | O_NOFOLLOW | O_CLOEXEC, 0666);
349+
if (!destFd)
350+
throw SysError("creating regular file %s", PathFmt(chrootRootDirPath / target));
351+
bindMount(sourceFd.get(), destFd.get());
237352
}
238353
}
239354

@@ -647,6 +762,11 @@ void ChrootLinuxDerivationBuilder::enterChroot()
647762
pathsInChroot.emplace(i, canonicalPath);
648763
}
649764

765+
/* We really do have to reopen the dirfd in the child, otherwise move_mount dies with EINVAL. */
766+
AutoCloseFD chrootRootDirFd = openDirectory(chrootRootDir, FinalSymlink::DontFollow);
767+
if (!chrootRootDirFd)
768+
throw SysError("opening directory %s", PathFmt(chrootRootDir));
769+
650770
/* Bind-mount all the directories from the "host"
651771
filesystem that we want in the chroot
652772
environment. */
@@ -666,7 +786,7 @@ void ChrootLinuxDerivationBuilder::enterChroot()
666786
} else
667787
#endif
668788
{
669-
doBind(i.second.source, chrootRootDir / i.first.relative_path(), i.second.optional);
789+
doBind(i.second.source, chrootRootDirFd.get(), i.first.relative_path(), chrootRootDir, i.second.optional);
670790
}
671791
}
672792

@@ -709,8 +829,8 @@ void ChrootLinuxDerivationBuilder::enterChroot()
709829
} else {
710830
if (errno != EINVAL)
711831
throw SysError("mounting /dev/pts");
712-
doBind("/dev/pts", chrootRootDir / "dev" / "pts");
713-
doBind("/dev/ptmx", chrootRootDir / "dev" / "ptmx");
832+
doBind("/dev/pts", chrootRootDirFd.get(), "dev/pts", chrootRootDir);
833+
doBind("/dev/ptmx", chrootRootDirFd.get(), "dev/pts", chrootRootDir);
714834
}
715835
}
716836

@@ -796,7 +916,8 @@ void ChrootLinuxDerivationBuilder::killSandbox(bool getStats)
796916

797917
void ChrootLinuxDerivationBuilder::addDependencyImpl(const StorePath & path)
798918
{
799-
auto [source, target] = ChrootDerivationBuilder::addDependencyPrep(path);
919+
auto [source, targetRelPath] = ChrootDerivationBuilder::addDependencyPrep(path);
920+
assert(targetRelPath.is_relative()); /* The path is relative to the chroot. */
800921

801922
/* Bind-mount the path into the sandbox. This requires
802923
entering its mount namespace, which is not possible
@@ -809,7 +930,11 @@ void ChrootLinuxDerivationBuilder::addDependencyImpl(const StorePath & path)
809930
if (setns(sandboxMountNamespace.get(), CLONE_NEWNS) == -1)
810931
throw SysError("entering sandbox mount namespace");
811932

812-
doBind(source, target);
933+
/* We really do have to reopen the dirfd in the child, otherwise move_mount dies with EINVAL. */
934+
AutoCloseFD chrootRootDirFd = openDirectory(chrootRootDir, FinalSymlink::DontFollow);
935+
if (!chrootRootDirFd)
936+
throw SysError("opening directory %s", PathFmt(chrootRootDir));
937+
doBind(source, chrootRootDirFd.get(), targetRelPath, chrootRootDir);
813938

814939
_exit(0);
815940
}));

src/libstore/unix/build/chroot-derivation-builder.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,15 +108,16 @@ ChrootDerivationBuilder::addDependencyPrep(const StorePath & path)
108108
debug("materialising '%s' in the sandbox", store.printStorePath(path));
109109

110110
std::filesystem::path source = store.toRealPath(path);
111-
std::filesystem::path target = chrootRootDir / std::filesystem::path(store.printStorePath(path)).relative_path();
111+
auto targetRelPath = std::filesystem::path(store.printStorePath(path)).relative_path();
112+
std::filesystem::path target = chrootRootDir / targetRelPath;
112113

113114
if (pathExists(target)) {
114115
// There is a similar debug message in doBind, so only run it in this block to not have double messages.
115116
debug("bind-mounting %s -> %s", PathFmt(target), PathFmt(source));
116117
throw Error("store path '%s' already exists in the sandbox", store.printStorePath(path));
117118
}
118119

119-
return {source, target};
120+
return {source, targetRelPath};
120121
}
121122

122123
} // namespace nix

src/libutil/include/nix/util/file-system-at.hh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ PosixStat fstatat(Descriptor dirFd, const std::filesystem::path & path);
7171

7272
/**
7373
* Read a symlink relative to a directory file descriptor.
74+
* On Linux, this also supports reading from O_PATH descriptors with CanonPath::root.
7475
*
7576
* @throws SystemError on any I/O errors.
7677
* @throws Interrupted if interrupted.

src/libutil/unix/file-system-at.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,6 @@ AutoCloseFD openFileEnsureBeneathNoSymlinks(
283283

284284
OsString readLinkAt(Descriptor dirFd, const CanonPath & path)
285285
{
286-
assert(!path.isRoot());
287286
assert(!path.rel().starts_with('/')); /* Just in case the invariant is somehow broken. */
288287
std::vector<char> buf;
289288
for (ssize_t bufSize = PATH_MAX / 4; true; bufSize += bufSize / 2) {

tests/functional/linux-sandbox.sh

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,3 +96,36 @@ nix-sandbox-build symlink-derivation.nix -A test_sandbox_paths \
9696
--option extra-sandbox-paths "/dir=$TEST_ROOT" \
9797
--option extra-sandbox-paths "/symlinkDir=$symlinkDir" \
9898
--option extra-sandbox-paths "/symlink=$symlinkcert"
99+
100+
invalidLeafPaths=(
101+
"/foo/.."
102+
"/foo/."
103+
"/.."
104+
"/."
105+
)
106+
107+
for badPath in "${invalidLeafPaths[@]}"; do
108+
expectStderr 1 nix-sandbox-build simple-failing.nix --option extra-sandbox-paths "$badPath=$cert" |
109+
grepQuiet "invalid filename"
110+
done
111+
112+
badPaths=(
113+
"/../escape"
114+
"/foo/../../escape"
115+
"/./../escape"
116+
"/a/b/../../../escape"
117+
)
118+
119+
sources=(
120+
"$cert"
121+
"$symlinkcert"
122+
"$symlinkDir"
123+
"$TEST_ROOT"
124+
)
125+
126+
for badPath in "${badPaths[@]}"; do
127+
for source in "${sources[@]}"; do
128+
expectStderr 1 nix-sandbox-build simple-failing.nix --option extra-sandbox-paths "$badPath=$source" |
129+
grepQuiet "escapes the chroot"
130+
done
131+
done

0 commit comments

Comments
 (0)