Fixes (#53)

BenSmith · web-flow · commit 074992e283ac · 2026-05-18T20:45:07.000-07:00
* update samba, improve gpu auto for nvida vs nouveau

* Add jellyfin workload
diff --git a/workloadctl/containers/jellyfin/README.md b/workloadctl/containers/jellyfin/README.md
@@ -0,0 +1,38 @@
+# jellyfin
+
+Free Software media server with hardware-accelerated video transcoding.
+
+Unlike most workloads under `containers/`, this one **builds no image** — it
+runs the upstream `docker.io/jellyfin/jellyfin` image directly. This directory
+exists only to ship the host setup script.
+
+## Files
+
+- `setup.sh` — host prerequisite script run by `workloadctl enable/disable`.
+  Turns on the `container_use_devices` SELinux boolean so the container can
+  open the GPU render node `/dev/dri/renderD128`.
+
+## Hardware transcoding
+
+The workload passes `/dev/dri/renderD128` into the container and adds the
+`render` group. In the Jellyfin dashboard set:
+
+- Hardware acceleration: **VAAPI**
+- VA-API device: **/dev/dri/renderD128**
+
+Works with AMD (`amdgpu`) and Intel (`i915`) GPUs. AMD Navi 10 / RX 5000-series
+supports H.264 and HEVC encode/decode but not AV1 encode.
+
+For NVIDIA, switch `[devices]` in `jellyfin.toml` to `gpu = "nvidia"` and pick
+NVENC in the UI instead.
+
+## Media
+
+By default jellyfin is a **fully independent workload**: the library is its
+own directory, `/var/lib/workloads/jellyfin/media`, mounted read-only at
+`/media` in the container.
+
+For deployment you can point it elsewhere by editing the last `[storage]`
+volume in `jellyfin.toml` — e.g. at the `smb-server` share
+(`/var/lib/workloads/smb-server/exports/media`) so media can be added over
+SMB. The workload user only needs read access to whatever path you choose.
diff --git a/workloadctl/containers/jellyfin/setup.sh b/workloadctl/containers/jellyfin/setup.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+# Host setup for the jellyfin workload.
+#
+# Usage:
+#   setup.sh enable   — configure host prerequisites
+#   setup.sh disable  — remove host prerequisites
+#
+# Idempotent in both directions. Called by workloadctl enable/disable.
+set -euo pipefail
+
+SEBOOL="container_use_devices"
+
+enable() {
+    # Allow confined containers (container_t) to open device nodes such as the
+    # GPU render node /dev/dri/renderD128. Without this, SELinux denies the
+    # VAAPI device open and hardware transcoding silently falls back to CPU.
+    if command -v getsebool >/dev/null 2>&1; then
+        if getsebool "$SEBOOL" 2>/dev/null | grep -q ' off$'; then
+            echo "  [host] Enabling SELinux boolean ${SEBOOL}..."
+            setsebool -P "$SEBOOL" on
+        else
+            echo "  [host] SELinux boolean ${SEBOOL} already on (or SELinux disabled)"
+        fi
+    else
+        echo "  [host] SELinux tooling not present — skipping ${SEBOOL}"
+    fi
+    echo "  [host] Setup complete"
+}
+
+disable() {
+    # Intentionally leave container_use_devices on: it is a system-wide boolean
+    # and other GPU workloads (game streaming, desktops) may depend on it.
+    # Flipping it off here could break them.
+    echo "  [host] Leaving SELinux boolean ${SEBOOL} unchanged (shared by other GPU workloads)"
+}
+
+case "${1:-}" in
+    enable)  enable ;;
+    disable) disable ;;
+    *)
+        echo "Usage: $0 {enable|disable}" >&2
+        exit 1
+        ;;
+esac
diff --git a/workloadctl/generators/workload-generate b/workloadctl/generators/workload-generate
@@ -345,13 +345,27 @@ def generate_setup_service(config, user_name):
 
 
 def resolve_auto_gpu():
-    """Detect the primary GPU vendor from sysfs. Returns 'nvidia', 'amd', 'intel', or 'none'."""
+    """Detect the primary GPU from sysfs.
+
+    Returns 'nvidia', 'nouveau', 'amd', 'intel', or 'none'. For NVIDIA cards
+    the bound driver is checked: the proprietary driver uses the CDI path,
+    but nouveau has no NVIDIA Container Toolkit support and must use the plain
+    DRM render node instead — so it is reported separately.
+    """
     vendor_map = {"0x10de": "nvidia", "0x1002": "amd", "0x8086": "intel"}
     try:
         for vendor_path in sorted(Path("/sys/class/drm").glob("card*/device/vendor")):
             vendor = vendor_path.read_text().strip().lower()
-            if vendor in vendor_map:
-                return vendor_map[vendor]
+            if vendor not in vendor_map:
+                continue
+            result = vendor_map[vendor]
+            if result == "nvidia":
+                try:
+                    if (vendor_path.parent / "driver").resolve().name == "nouveau":
+                        return "nouveau"
+                except OSError:
+                    pass
+            return result
     except OSError:
         pass
     return "none"
@@ -555,7 +569,9 @@ def generate_system_service(config, user_name, uid):
                 "--device=nvidia.com/gpu=all",
                 "--device /dev/dri",
             ])
-    elif gpu_vendor == "intel":
+    elif gpu_vendor in ("intel", "nouveau"):
+        # nouveau (and Intel) expose the GPU through the standard DRM render
+        # node — no CDI / Container Toolkit involved.
         podman_args.append("--device /dev/dri")
 
     # Generic device passthrough
diff --git a/workloadctl/tests/test_generator.py b/workloadctl/tests/test_generator.py
@@ -5,18 +5,32 @@
 No root required — all paths are overridden via env vars and argv.
 """
 
+import importlib.machinery
+import importlib.util
 import os
 import subprocess
 import sys
 import tempfile
 import textwrap
 import unittest
 from pathlib import Path
+from unittest import mock
 
 GENERATOR = os.path.join(os.path.dirname(__file__), '..', 'generators', 'workload-generate')
 LIB_DIR = os.path.join(os.path.dirname(__file__), '..', 'lib')
 
 
+def _load_generator_module():
+    """Import workload-generate as a module (it has a __main__ guard)."""
+    if LIB_DIR not in sys.path:
+        sys.path.insert(0, LIB_DIR)
+    loader = importlib.machinery.SourceFileLoader("workload_generate", GENERATOR)
+    spec = importlib.util.spec_from_loader("workload_generate", loader)
+    module = importlib.util.module_from_spec(spec)
+    loader.exec_module(module)
+    return module
+
+
 def run_generator(config_dir, services_dir, sysusers_dir):
     """Run the generator and return the CompletedProcess."""
     env = os.environ.copy()
@@ -698,5 +712,74 @@ def test_invalid_toml(self):
             self.assertEqual(result.returncode, 0)
 
 
+class TestResolveAutoGpu(unittest.TestCase):
+    """Unit tests for resolve_auto_gpu() — vendor + NVIDIA driver detection."""
+
+    @classmethod
+    def setUpClass(cls):
+        cls.wg = _load_generator_module()
+
+    def setUp(self):
+        self.root = tempfile.mkdtemp()
+        self.drm = Path(self.root) / "sys" / "class" / "drm"
+        self.drm.mkdir(parents=True)
+
+    def tearDown(self):
+        import shutil
+        shutil.rmtree(self.root)
+
+    def _add_card(self, name, vendor_id, driver=None):
+        """Create a fake /sys/class/drm/<name> with a vendor file and
+        optionally a 'driver' symlink whose basename is the driver name."""
+        device = self.drm / name / "device"
+        device.mkdir(parents=True)
+        (device / "vendor").write_text(vendor_id + "\n")
+        if driver is not None:
+            target = Path(self.root) / "_drivers" / driver
+            target.mkdir(parents=True, exist_ok=True)
+            (device / "driver").symlink_to(target)
+
+    def _resolve(self):
+        """Run resolve_auto_gpu() with /sys/class/drm redirected to the fake tree."""
+        real_path = self.wg.Path
+        drm = self.drm
+
+        def fake_path(arg):
+            if str(arg) == "/sys/class/drm":
+                return real_path(drm)
+            return real_path(arg)
+
+        with mock.patch.object(self.wg, "Path", side_effect=fake_path):
+            return self.wg.resolve_auto_gpu()
+
+    def test_amd(self):
+        self._add_card("card0", "0x1002")
+        self.assertEqual(self._resolve(), "amd")
+
+    def test_intel(self):
+        self._add_card("card0", "0x8086")
+        self.assertEqual(self._resolve(), "intel")
+
+    def test_nvidia_proprietary(self):
+        self._add_card("card0", "0x10de", driver="nvidia")
+        self.assertEqual(self._resolve(), "nvidia")
+
+    def test_nvidia_nouveau(self):
+        self._add_card("card0", "0x10de", driver="nouveau")
+        self.assertEqual(self._resolve(), "nouveau")
+
+    def test_nvidia_no_driver_symlink_falls_back_to_nvidia(self):
+        # No driver bound (e.g. modeset/driver not yet attached) → vendor only.
+        self._add_card("card0", "0x10de")
+        self.assertEqual(self._resolve(), "nvidia")
+
+    def test_no_gpu(self):
+        self.assertEqual(self._resolve(), "none")
+
+    def test_unknown_vendor_skipped(self):
+        self._add_card("card0", "0xbeef")
+        self.assertEqual(self._resolve(), "none")
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/workloadctl/workloads.d/jellyfin.toml b/workloadctl/workloads.d/jellyfin.toml
@@ -0,0 +1,113 @@
+# Jellyfin - Free Software Media System (stream & transcode video)
+# Web interface: http://<host-ip>:8096
+#
+# Runs the official jellyfin/jellyfin image rootless. Video transcoding is
+# hardware-accelerated through VAAPI on the host GPU render node
+# (/dev/dri/renderD128) — works with AMD (amdgpu) and Intel (i915) GPUs.
+# For NVIDIA, use the `gpu = "nvidia"` convenience flag under [devices]
+# instead and select NVENC in the Jellyfin UI.
+#
+# Note on `gpu = "auto"` and NVIDIA: auto detects both the PCI vendor and the
+# bound driver. With the proprietary driver it uses the NVIDIA Container
+# Toolkit (CDI) path for NVENC; with nouveau it uses plain /dev/dri VAAPI.
+# Both work — but nouveau's hardware encode support is weak, so a proprietary
+# driver with `gpu = "nvidia"` is recommended for real transcoding on NVIDIA.
+#
+# Setup:
+#   1. Enable the workload. This pulls the image, creates the volume
+#      directories, and runs the host setup script (setup.sh) which turns on
+#      the `container_use_devices` SELinux boolean so the container may open
+#      the GPU render node:
+#        sudo workloadctl enable jellyfin
+#
+#   2. Add media. By default the library is the workload's own directory,
+#      mounted read-only at /media inside the container:
+#        /var/lib/workloads/jellyfin/media
+#      Drop video files there (created automatically on enable). To serve
+#      media from elsewhere instead — e.g. the smb-server share so files can
+#      be added over SMB — see the [storage] section below.
+#
+#   3. Configure the firewall (LAN-only — recommended):
+#        sudo firewall-cmd --permanent --add-rich-rule='rule family="ipv4" source address="192.168.0.0/24" port port="8096" protocol="tcp" accept'
+#        sudo firewall-cmd --reload
+#
+#   4. Open http://<host-ip>:8096 and complete first-run setup. Add a library
+#      pointing at /media. Then under Dashboard > Playback > Transcoding:
+#        - Hardware acceleration: Video Acceleration API (VAAPI)
+#        - VA-API device:        /dev/dri/renderD128
+#        - Enable the HEVC / H.264 hardware decoders & encoders you want.
+#      (AMD Navi 10 / RX 5000-series does H.264 and HEVC but not AV1 encode.)
+#
+# Notes:
+#   - The official image runs as root inside the container. userns
+#     keep-id:uid=0,gid=0 maps that to the unprivileged _wl-jellyfin host
+#     user, so /config and /cache stay writable without granting real root.
+#   - /media is mounted read-only — Jellyfin never modifies your library.
+#   - To serve from the local registry instead of Docker Hub, retag the image
+#     into zot (e.g. zamd:5050/jellyfin/jellyfin) and adjust [container].
+#
+# After editing this file, apply changes with:
+#   sudo workloadctl recreate jellyfin
+
+[workload]
+name = "jellyfin"
+enabled = false
+
+[container]
+image = "docker.io/jellyfin/jellyfin:latest"
+pull = "missing"
+
+[container.health]
+# The official image is Ubuntu-based (bash present) but ships no curl, so
+# probe the HTTP listener socket directly rather than hitting /health.
+cmd = "bash -c 'exec 3<>/dev/tcp/127.0.0.1/8096'"
+interval = "30s"
+start_period = "30s"
+on_failure = "kill"
+
+[container.environment]
+# Optional: the absolute URL clients should use, shown in the dashboard.
+# JELLYFIN_PublishedServerUrl = "http://192.168.0.100:8096"
+
+[storage]
+# By default the library lives in the workload's own home, so jellyfin is a
+# fully independent workload with no dependency on any other.
+#   Default media path: /var/lib/workloads/jellyfin/media
+#
+# Deployment tweak — to serve media from another location, replace the last
+# entry with an absolute host path, e.g. to share the smb-server library:
+#   "/var/lib/workloads/smb-server/exports/media:/media:ro"
+# (the workload user only needs read access to whatever path you choose).
+volumes = [
+    "./config:/config",
+    "./cache:/cache",
+    "./media:/media:ro",
+]
+
+[security]
+# keep-id:uid=0,gid=0 — the image runs as container root; map that to the
+# unprivileged _wl-jellyfin host user so volumes are writable without real
+# root. Not userns=host: no other host UID maps to the workload user.
+userns = "keep-id:uid=0,gid=0"
+# render — access the GPU VAAPI render node /dev/dri/renderD128.
+extra_groups = ["render"]
+
+[devices]
+# Just the render node — enough for VAAPI transcoding. Avoids pulling in
+# /dev/kfd and the card* node that gpu = "amd" would add.
+devices = ["/dev/dri/renderD128"]
+
+[network]
+mode = "pasta"
+ports = ["8096:8096"]
+
+[host]
+# Turns on the container_use_devices SELinux boolean so the container can
+# open the GPU render node. Idempotent; left in place on `disable` since
+# other GPU workloads may rely on it.
+setup = "setup.sh"
+
+[resources]
+shm_size = "256m"
+memory_high = "2G"
+memory_max = "4G"
diff --git a/workloadctl/workloads.d/schema-reference.toml b/workloadctl/workloads.d/schema-reference.toml
@@ -257,13 +257,20 @@ devices = ["/dev/ttyUSB0", "/dev/video0"]
 
 # Optional: GPU type for hardware acceleration
 # Type: string
-# Options: "amd", "nvidia", "none"
+# Options: "amd", "nvidia", "intel", "auto", "none"
 # Default: "none"
 # "amd"    - Expands to: --device /dev/kfd --device /dev/dri
 #            Requires: extra_groups = ["video", "render"]
 # "nvidia" - Expands to: --device=nvidia.com/gpu=all --device /dev/dri
 #            Requires: extra_groups = ["video"]
+# "intel"  - Expands to: --device /dev/dri
+# "auto"   - Detect the GPU vendor (and, for NVIDIA, the bound driver) from
+#            sysfs and use one of the above. Proprietary NVIDIA -> "nvidia"
+#            (CDI path); nouveau -> the /dev/dri path. Both work, but nouveau
+#            hardware encode is weak — for real transcoding on NVIDIA prefer
+#            the proprietary driver with an explicit gpu = "nvidia".
 # "none"   - No GPU access
+#
 # Advanced: For specific NVIDIA GPU selection, use generic devices instead:
 #           devices = ["nvidia.com/gpu=0", "/dev/dri"]
 gpu = "amd"
diff --git a/workloadctl/workloads.d/smb-server.toml b/workloadctl/workloads.d/smb-server.toml
@@ -63,8 +63,10 @@ required_files = [
 image = "localhost/smb-server:latest"
 pull = "never"
 
+# The smb-server image ships only the `samba` package (server side); smbclient
+# is not available in-container. Probe the listening socket instead.
 [container.health]
-cmd = "smbclient -N -L 127.0.0.1 > /dev/null 2>&1 || exit 1"
+cmd = "bash -c 'exec 3<>/dev/tcp/127.0.0.1/445'"
 interval = "30s"
 start_period = "10s"
 on_failure = "kill"