|
| 1 | +<!doctype html> |
| 2 | +<html lang="en"> |
| 3 | + <head> |
| 4 | + <meta charset="UTF-8" /> |
| 5 | + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> |
| 6 | + <title>tsb — str.get_dummies: multi-label string encoding</title> |
| 7 | + <style> |
| 8 | + * { box-sizing: border-box; margin: 0; padding: 0; } |
| 9 | + body { font-family: system-ui, sans-serif; background: #0d1117; color: #c9d1d9; line-height: 1.6; padding: 2rem; } |
| 10 | + h1 { color: #58a6ff; font-size: 1.8rem; margin-bottom: .5rem; } |
| 11 | + h2 { color: #79c0ff; font-size: 1.2rem; margin: 2rem 0 .75rem; } |
| 12 | + p { color: #8b949e; margin-bottom: 1rem; max-width: 800px; } |
| 13 | + code { background: #161b22; padding: .1rem .4rem; border-radius: 4px; font-family: monospace; font-size: .9em; color: #a5d6ff; } |
| 14 | + .card { background: #161b22; border: 1px solid #30363d; border-radius: 8px; padding: 1.5rem; margin-bottom: 1.5rem; max-width: 900px; } |
| 15 | + textarea { width: 100%; background: #0d1117; border: 1px solid #30363d; border-radius: 6px; color: #c9d1d9; font-family: monospace; font-size: .85rem; padding: .75rem; resize: vertical; min-height: 140px; } |
| 16 | + button { background: #238636; color: #fff; border: none; border-radius: 6px; padding: .5rem 1.25rem; cursor: pointer; font-size: .9rem; margin-top: .75rem; } |
| 17 | + button:hover { background: #2ea043; } |
| 18 | + pre { background: #0d1117; border: 1px solid #21262d; border-radius: 6px; padding: 0.75rem 1rem; overflow-x: auto; font-size: 0.85rem; white-space: pre-wrap; margin-top: 0.5rem; color: #7ee787; font-family: monospace; } |
| 19 | + a { color: #58a6ff; } |
| 20 | + </style> |
| 21 | + </head> |
| 22 | + <body> |
| 23 | + <h1>str.get_dummies — multi-label string encoding</h1> |
| 24 | + <p> |
| 25 | + Port of <code>pandas.Series.str.get_dummies(sep)</code>. Splits each |
| 26 | + string by a separator (default <code>"|"</code>) and returns a |
| 27 | + <code>DataFrame</code> of binary indicator columns — one per unique token, |
| 28 | + sorted lexicographically. <code>null</code> / <code>undefined</code> / |
| 29 | + <code>NaN</code> values produce a row of all zeros. |
| 30 | + </p> |
| 31 | + <p><a href="./index.html">← back to index</a></p> |
| 32 | + |
| 33 | + <div class="card"> |
| 34 | + <h2>Example 1 — basic split on <code>|</code></h2> |
| 35 | + <textarea id="ex1-code"> |
| 36 | +const { Series, strGetDummies } = tsb; |
| 37 | +const s = new Series({ data: ["a|b", "b|c", "a"], name: "tags" }); |
| 38 | +const df = strGetDummies(s); |
| 39 | +console.log(JSON.stringify(df.toRecords(), null, 2)); |
| 40 | +console.log("columns =", df.columns.values.join(", ")); |
| 41 | +</textarea> |
| 42 | + <button onclick="run('ex1')">Run</button> |
| 43 | + <pre id="ex1-out">(click Run)</pre> |
| 44 | + </div> |
| 45 | + |
| 46 | + <div class="card"> |
| 47 | + <h2>Example 2 — custom separator</h2> |
| 48 | + <textarea id="ex2-code"> |
| 49 | +const { Series, strGetDummies } = tsb; |
| 50 | +const s = new Series({ data: ["red,green", "green,blue", "red"] }); |
| 51 | +const df = strGetDummies(s, { sep: "," }); |
| 52 | +console.log(JSON.stringify(df.toRecords(), null, 2)); |
| 53 | +</textarea> |
| 54 | + <button onclick="run('ex2')">Run</button> |
| 55 | + <pre id="ex2-out">(click Run)</pre> |
| 56 | + </div> |
| 57 | + |
| 58 | + <div class="card"> |
| 59 | + <h2>Example 3 — null / undefined / NaN → all-zero rows</h2> |
| 60 | + <textarea id="ex3-code"> |
| 61 | +const { Series, strGetDummies } = tsb; |
| 62 | +const s = new Series({ data: ["a|b", null, undefined, NaN, "b"] }); |
| 63 | +const df = strGetDummies(s); |
| 64 | +console.log(JSON.stringify(df.toRecords(), null, 2)); |
| 65 | +</textarea> |
| 66 | + <button onclick="run('ex3')">Run</button> |
| 67 | + <pre id="ex3-out">(click Run)</pre> |
| 68 | + </div> |
| 69 | + |
| 70 | + <div class="card"> |
| 71 | + <h2>Example 4 — preserved Series index</h2> |
| 72 | + <textarea id="ex4-code"> |
| 73 | +const { Series, strGetDummies } = tsb; |
| 74 | +const s = new Series({ data: ["python|pandas", "python|numpy", "pandas|numpy|scipy"], index: ["row-1", "row-2", "row-3"] }); |
| 75 | +const df = strGetDummies(s); |
| 76 | +console.log("index =", df.index.values.join(", ")); |
| 77 | +console.log(JSON.stringify(df.toRecords(), null, 2)); |
| 78 | +</textarea> |
| 79 | + <button onclick="run('ex4')">Run</button> |
| 80 | + <pre id="ex4-out">(click Run)</pre> |
| 81 | + </div> |
| 82 | + |
| 83 | + <script type="module"> |
| 84 | + let tsb; |
| 85 | + try { |
| 86 | + tsb = await import("../src/index.ts"); |
| 87 | + } catch { |
| 88 | + tsb = await import("https://esm.sh/tsb@latest"); |
| 89 | + } |
| 90 | + window.tsb = tsb; |
| 91 | + |
| 92 | + window.run = function run(id) { |
| 93 | + const code = document.getElementById(`${id}-code`).value; |
| 94 | + const out = document.getElementById(`${id}-out`); |
| 95 | + const logs = []; |
| 96 | + const origLog = console.log; |
| 97 | + console.log = (...args) => logs.push(args.map(String).join(" ")); |
| 98 | + try { |
| 99 | + new Function("tsb", code)(tsb); |
| 100 | + out.textContent = logs.join("\n") || "(no output)"; |
| 101 | + } catch (e) { |
| 102 | + out.textContent = "Error: " + e.message; |
| 103 | + } finally { |
| 104 | + console.log = origLog; |
| 105 | + } |
| 106 | + }; |
| 107 | + </script> |
| 108 | + </body> |
| 109 | +</html> |
0 commit comments