Skip to content

Commit 81f75ae

Browse files
committed
playground: ship uutils findutils (find) alongside grep
Generalize the single-module grep handling into a registry of standalone WASM modules so additional uutils tools can drop in. Add `find`, built from uutils/findutils to wasm32-wasip1 in CI, with graceful degradation when the binary is absent. Only `find` is shipped: xargs/locate/updatedb need process spawning or an on-disk database, neither of which works in the browser WASI sandbox. Add an emoji `find . -name '*.md'` example backed by fruit-emoji files in the virtual FS to show off Unicode/multibyte filename matching, plus tests.
1 parent 281bf18 commit 81f75ae

4 files changed

Lines changed: 145 additions & 33 deletions

File tree

.github/workflows/website.yml

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,58 @@ jobs:
227227
echo "grep WASM build: ${grep_short} (${grep_date})"
228228
fi
229229
230+
- name: Build find WASM binary
231+
run: |
232+
# uutils findutils ships `find` as a standalone binary (not part of the
233+
# coreutils multicall) and, like grep, depends on the Oniguruma C
234+
# library (onig_sys), so the WASM build needs a WASI sysroot to compile
235+
# the bundled C sources. Reuse the WASI SDK fetched by the grep step if
236+
# it's still around, otherwise download it.
237+
WASI_SDK_VERSION=25
238+
WASI_SDK_DIR="wasi-sdk-${WASI_SDK_VERSION}.0-x86_64-linux"
239+
if [ ! -d "$WASI_SDK_DIR" ]; then
240+
curl -sL "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-${WASI_SDK_VERSION}/${WASI_SDK_DIR}.tar.gz" | tar xz
241+
fi
242+
export WASI_SDK_PATH="$PWD/${WASI_SDK_DIR}"
243+
export CC_wasm32_wasip1="$WASI_SDK_PATH/bin/clang"
244+
export CFLAGS_wasm32_wasip1="--sysroot=$WASI_SDK_PATH/share/wasi-sysroot"
245+
cd findutils
246+
# Only `find` is shipped: xargs/locate/updatedb need process spawning or
247+
# an on-disk database, neither of which works in the browser WASI sandbox.
248+
cargo build --release --target wasm32-wasip1 --bin find
249+
if [ -f target/wasm32-wasip1/release/find.wasm ]; then
250+
mkdir -p ../uutils.github.io/static/wasm
251+
cp target/wasm32-wasip1/release/find.wasm ../uutils.github.io/static/wasm/find.wasm
252+
# Optimize WASM size if wasm-opt is available
253+
if command -v wasm-opt &> /dev/null; then
254+
wasm-opt -Oz ../uutils.github.io/static/wasm/find.wasm -o ../uutils.github.io/static/wasm/find.wasm
255+
fi
256+
echo "find WASM binary size: $(du -h ../uutils.github.io/static/wasm/find.wasm | cut -f1)"
257+
# Advertise find in the playground's command list. find ships as its
258+
# own WASM module, so it isn't picked up by the coreutils feat_wasm
259+
# scan above; append it to the generated list here.
260+
commands_js=../uutils.github.io/static/wasm/commands.js
261+
if [ -f "$commands_js" ]; then
262+
existing=$(sed -n 's/^const WASM_COMMANDS = \[\(.*\)\];$/\1/p' "$commands_js")
263+
echo "const WASM_COMMANDS = [${existing}, \"find\"];" > "$commands_js"
264+
else
265+
echo 'const WASM_COMMANDS = ["find"];' > "$commands_js"
266+
fi
267+
# Record the findutils commit used to build its WASM module so the
268+
# playground can show it alongside the coreutils build.
269+
find_hash=$(git rev-parse HEAD)
270+
find_short=$(git rev-parse --short HEAD)
271+
find_date=$(git show -s --format=%cI HEAD)
272+
{
273+
echo "const UUTILS_FINDUTILS_VERSION = {"
274+
echo " commit: \"${find_hash}\","
275+
echo " short: \"${find_short}\","
276+
echo " date: \"${find_date}\""
277+
echo "};"
278+
} >> ../uutils.github.io/static/wasm/version.js
279+
echo "find WASM build: ${find_short} (${find_date})"
280+
fi
281+
230282
- name: Run Zola
231283
uses: shalzz/zola-deploy-action@v0.22.1
232284
env:

content/playground.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,12 @@ template = "page.html"
6161
parts.push('grep <a href="' + grepUrl + '"><code>' +
6262
UUTILS_GREP_VERSION.short + '</code></a> (' + grepDate + ')');
6363
}
64+
if (typeof UUTILS_FINDUTILS_VERSION !== "undefined") {
65+
var findDate = UUTILS_FINDUTILS_VERSION.date.split("T")[0];
66+
var findUrl = "https://github.com/uutils/findutils/commit/" + UUTILS_FINDUTILS_VERSION.commit;
67+
parts.push('findutils <a href="' + findUrl + '"><code>' +
68+
UUTILS_FINDUTILS_VERSION.short + '</code></a> (' + findDate + ')');
69+
}
6470
if (typeof SITE_VERSION !== "undefined") {
6571
var siteDate = SITE_VERSION.date.split("T")[0];
6672
var siteUrl = "https://github.com/uutils/uutils.github.io/commit/" + SITE_VERSION.commit;
@@ -95,6 +101,7 @@ Click an example to run it in the terminal:
95101
<button class="playground-example">echo '🍎,🍌,🍒,🥝' | cut -d🍌 -f2</button>
96102
<button class="playground-example">printf '🍒 cherry\n🍎 apple\n🍌 banana\n' | sort -k2</button>
97103
<button class="playground-example">printf '🍎 apple\n🍌 banana\n🍒 cherry\n🥝 kiwi\n' | grep 🍌</button>
104+
<button class="playground-example">find . -name '*.md'</button>
98105
<button class="playground-example">sort -n < numbers.txt | head -3</button>
99106
<button class="playground-example">date</button>
100107
<button class="playground-example">uname -a</button>

static/js/wasm-terminal.js

Lines changed: 58 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,14 @@ if (typeof SharedArrayBuffer === "undefined") {
1414
}
1515

1616
const WASM_URL = "/wasm/uutils.wasm";
17-
// grep ships as its own standalone WASM module (it is not part of the
18-
// coreutils multicall binary), loaded lazily alongside it.
19-
const GREP_WASM_URL = "/wasm/grep.wasm";
17+
// Some utilities ship as their own standalone WASM modules rather than as part
18+
// of the coreutils multicall binary (grep lives in uutils/grep, find in
19+
// uutils/findutils). They are loaded lazily alongside the multicall module and
20+
// each is optional — see loadStandaloneWasm.
21+
const STANDALONE_WASM_URLS = {
22+
grep: "/wasm/grep.wasm",
23+
find: "/wasm/find.wasm",
24+
};
2025
const XTERM_CSS = "https://cdn.jsdelivr.net/npm/@xterm/xterm@5.5.0/css/xterm.min.css";
2126
const XTERM_CSS_INTEGRITY = "sha384-tStR1zLfWgsiXCF3IgfB3lBa8KmBe/lG287CL9WCeKgQYcp1bjb4/+mwN6oti4Co";
2227
const XTERM_JS = "https://cdn.jsdelivr.net/npm/@xterm/xterm@5.5.0/lib/xterm.min.js";
@@ -34,6 +39,11 @@ const SAMPLE_FILES = {
3439
"fruits.txt": "banana\napple\ncherry\ndate\napple\nbanana\ncherry\napple\n",
3540
"csv.txt": "name,age,city\nAlice,30,Paris\nBob,25,London\nCharlie,35,Berlin\nDiana,28,Tokyo\n",
3641
"words.txt": "hello world\nfoo bar baz\nthe quick brown fox\njumps over the lazy dog\n",
42+
// Emoji-named files so `find` has something fun (and Unicode!) to match.
43+
"🍎.md": "# Apple\n",
44+
"🍌.md": "# Banana\n",
45+
"🍒.md": "# Cherry\n",
46+
"🥝.md": "# Kiwi\n",
3747
};
3848

3949
// Commands available in the feat_wasm build.
@@ -51,7 +61,7 @@ const FALLBACK_COMMANDS = [
5161
"sha1sum", "sha224sum", "sha256sum", "sha384sum", "sha512sum",
5262
"shred", "shuf", "sleep", "sum", "tee", "true", "truncate",
5363
"uname", "unexpand", "uniq", "unlink", "vdir", "wc",
54-
"grep",
64+
"grep", "find",
5565
];
5666
const AVAILABLE_COMMANDS =
5767
(typeof WASM_COMMANDS !== "undefined" && Array.isArray(WASM_COMMANDS) && WASM_COMMANDS.length > 0)
@@ -67,7 +77,9 @@ const LOCALE_SHORTCUTS = {
6777
};
6878

6979
let wasmModule = null;
70-
let grepModule = null; // standalone grep WASM module (null if unavailable)
80+
// Compiled standalone modules, keyed by command name (e.g. "grep", "find").
81+
// A key is present only once its module has loaded successfully.
82+
const standaloneModules = {};
7183
let wasiShim = null;
7284
let terminal = null;
7385
let inputBuffer = "";
@@ -155,29 +167,32 @@ async function loadWasm() {
155167
}
156168

157169
/**
158-
* Load the standalone grep WASM module. grep is optional: if the binary isn't
159-
* present (e.g. local dev without a CI build), this resolves to null and grep
160-
* commands report that they're unavailable rather than breaking the terminal.
170+
* Load the optional standalone WASM modules (grep, find, …). Each is optional:
171+
* if a binary isn't present (e.g. local dev without a CI build), its error is
172+
* swallowed and the corresponding command reports that it's unavailable rather
173+
* than breaking the terminal or blocking the coreutils module.
161174
*/
162-
async function loadGrepWasm() {
163-
if (grepModule) return grepModule;
164-
try {
165-
const { module, size } = await compileWasmModule(GREP_WASM_URL);
166-
grepModule = module;
167-
wasmSize += size;
168-
} catch (e) {
169-
console.warn("grep WASM unavailable:", e.message);
170-
grepModule = null;
171-
}
172-
return grepModule;
175+
async function loadStandaloneWasm() {
176+
await Promise.all(
177+
Object.entries(STANDALONE_WASM_URLS).map(async ([cmd, url]) => {
178+
if (standaloneModules[cmd]) return;
179+
try {
180+
const { module, size } = await compileWasmModule(url);
181+
standaloneModules[cmd] = module;
182+
wasmSize += size;
183+
} catch (e) {
184+
console.warn(`${cmd} WASM unavailable:`, e.message);
185+
}
186+
})
187+
);
173188
}
174189

175190
async function initWasm() {
176191
if (wasmReady) return;
177192
try {
178-
// grep is optional and loadGrepWasm swallows its own errors, so it never
179-
// blocks the coreutils module from becoming ready.
180-
await Promise.all([loadWasiShim(), loadWasm(), loadGrepWasm()]);
193+
// The standalone modules are optional and loadStandaloneWasm swallows its
194+
// own errors, so they never block the coreutils module from becoming ready.
195+
await Promise.all([loadWasiShim(), loadWasm(), loadStandaloneWasm()]);
181196
wasmReady = true;
182197
} catch (e) {
183198
// Will fall back to JS implementations
@@ -493,6 +508,7 @@ async function executeCommandLine(line) {
493508
" wc -l fruits.txt\n" +
494509
" seq 1 10 | factor\n" +
495510
" grep -i alice names.txt\n" +
511+
" find . -name '*.md'\n" +
496512
" basename /usr/local/bin/rustc\n" +
497513
" date\n" +
498514
" uname -a\n"
@@ -566,11 +582,11 @@ async function executeCommandLine(line) {
566582
return `uutils: command not found: ${cmd}\nType 'help' for available commands.\n`;
567583
}
568584

569-
// grep is a separate WASM module rather than part of the coreutils
570-
// multicall binary.
571-
const isGrep = cmd === "grep";
572-
if (isGrep && !grepModule) {
573-
return "grep is not available in this build.\n";
585+
// Some utilities (grep, find) are separate WASM modules rather than part
586+
// of the coreutils multicall binary.
587+
const isStandalone = cmd in STANDALONE_WASM_URLS;
588+
if (isStandalone && !standaloneModules[cmd]) {
589+
return `${cmd} is not available in this build.\n`;
574590
}
575591

576592
try {
@@ -586,10 +602,18 @@ async function executeCommandLine(line) {
586602
if (!hasPathArg && cwd && ["ls", "dir"].includes(cmd)) {
587603
resolvedArgs.push(cwd);
588604
}
589-
// grep is invoked directly (argv[0] = "grep"); coreutils utilities go
590-
// through the multicall dispatcher (argv = ["coreutils", <util>, ...]).
605+
// find takes its starting paths *before* the expression. When the user
606+
// gives none (e.g. `find -type f`), GNU find defaults to "."; mirror that
607+
// but use the virtual cwd so `cd subdir; find` searches the right place.
608+
if (cmd === "find") {
609+
const hasStartPath = resolvedArgs.length > 1 && !resolvedArgs[1].startsWith("-");
610+
if (!hasStartPath) resolvedArgs.splice(1, 0, cwd || ".");
611+
}
612+
// Standalone utilities are invoked directly (argv[0] = the command name);
613+
// coreutils utilities go through the multicall dispatcher
614+
// (argv = ["coreutils", <util>, ...]).
591615
let dispatchArgs = resolvedArgs;
592-
if (isGrep) {
616+
if (cmd === "grep") {
593617
// browser_wasi_shim reports stdout as a TTY, so grep would emit GNU
594618
// match-highlight escape codes by default. That looks fine in the
595619
// terminal but corrupts piped/redirected output (e.g. `grep x | wc`),
@@ -599,8 +623,8 @@ async function executeCommandLine(line) {
599623
? resolvedArgs
600624
: [resolvedArgs[0], "--color=never", ...resolvedArgs.slice(1)];
601625
}
602-
const wasmArgs = isGrep ? dispatchArgs : ["coreutils", ...resolvedArgs];
603-
const result = await runCommand(wasmArgs, stdinData, isGrep ? grepModule : wasmModule);
626+
const wasmArgs = isStandalone ? dispatchArgs : ["coreutils", ...resolvedArgs];
627+
const result = await runCommand(wasmArgs, stdinData, isStandalone ? standaloneModules[cmd] : wasmModule);
604628
if (result.stderr) {
605629
return result.stderr + result.stdout;
606630
}
@@ -924,7 +948,8 @@ window._uutilsTestInternals = {
924948
get locale() { return currentLocale; },
925949
set locale(v) { currentLocale = v; },
926950
get wasmReady() { return wasmReady; },
927-
get grepReady() { return grepModule !== null; },
951+
get grepReady() { return !!standaloneModules.grep; },
952+
get findReady() { return !!standaloneModules.find; },
928953
initWasm,
929954
LOCALE_SHORTCUTS,
930955
SAMPLE_FILES,

static/js/wasm-terminal.test.html

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -472,6 +472,34 @@ <h1>wasm-terminal unit tests</h1>
472472
section("grep WASM module (SKIPPED - grep.wasm not loaded)");
473473
}
474474

475+
// ===== find (standalone WASM module) =====
476+
if (T.findReady) {
477+
section("find WASM module");
478+
479+
assert("find in AVAILABLE_COMMANDS",
480+
AVAILABLE_COMMANDS.includes("find"), true);
481+
482+
await assertAsync("find by exact name",
483+
executeCommandLine("find . -name names.txt"),
484+
"./names.txt\n");
485+
486+
await assertAsync("find with no matches",
487+
executeCommandLine("find . -name '*.csv'"),
488+
"");
489+
490+
// Exercises directory traversal plus a pipe into a coreutils utility.
491+
await assertAsync("find piped into sort",
492+
executeCommandLine("find . -type f -name '*.txt' | sort"),
493+
"./csv.txt\n./fruits.txt\n./names.txt\n./numbers.txt\n./words.txt\n");
494+
495+
// find matches (and returns) emoji / multibyte filenames.
496+
await assertAsync("find emoji-named files",
497+
executeCommandLine("find . -name '*.md' | sort"),
498+
"./🍌.md\n./🍎.md\n./🍒.md\n./🥝.md\n");
499+
} else {
500+
section("find WASM module (SKIPPED - find.wasm not loaded)");
501+
}
502+
475503
} else {
476504
section("l10n WASM integration (SKIPPED - WASM not loaded)");
477505
}

0 commit comments

Comments
 (0)