|
| 1 | +#!/usr/bin/env python3 |
| 2 | +"""Helix-anchor codec for the torso gaussian splat — the "x265 for gaussians" |
| 3 | +the design converged on. Encodes SPL2 -> SPL3 and round-trips it, reporting the |
| 4 | +compression ratio + reconstruction fidelity. This is the MEASUREMENT tool that |
| 5 | +proves the design before it is wired into the render/animation path. |
| 6 | +
|
| 7 | +The x265 analogy, mapped to signals already in SPL2 + torso.nodes.json: |
| 8 | + helix = 3D Morton (Z-order) of the position = the space-filling / identity |
| 9 | + order. Locality-preserving: neighbours in the stream are neighbours |
| 10 | + in space, so deltas are tiny. |
| 11 | + anchor = the FMA node (its SoA centroid + per-node mean colour/normal) — the |
| 12 | + I-frame. Random-access: a structure decodes from its own anchor. |
| 13 | + motion = each gaussian's offset from its node anchor (the motion vector). |
| 14 | + residual = the helix-ordered DELTA of (motion, normal) from the previous entry. |
| 15 | + colour = fully ANCHOR-PREDICTED: per-structure flat colour, so the residual is |
| 16 | + ZERO — colour is just the 91-entry node palette (crisp by |
| 17 | + construction, no per-gaussian bytes, no boundary bleed). |
| 18 | +Entropy back end here is zlib (a stand-in for a real range/CABAC coder); the |
| 19 | +point this tool measures is the *structure* (anchor + motion + residual + scan), |
| 20 | +not the last entropy %. |
| 21 | +
|
| 22 | +Usage: python3 spl_codec.py <torso.splat(SPL2)> <torso.nodes.json> |
| 23 | +""" |
| 24 | +import json |
| 25 | +import struct |
| 26 | +import sys |
| 27 | +import zlib |
| 28 | + |
| 29 | + |
| 30 | +def part1by2(n): |
| 31 | + """Spread the low 16 bits of n with two zero bits between each (3D Morton).""" |
| 32 | + n &= 0xFFFF |
| 33 | + n = (n | (n << 32)) & 0x1F00000000FFFF |
| 34 | + n = (n | (n << 16)) & 0x1F0000FF0000FF |
| 35 | + n = (n | (n << 8)) & 0x100F00F00F00F00F |
| 36 | + n = (n | (n << 4)) & 0x10C30C30C30C30C3 |
| 37 | + n = (n | (n << 2)) & 0x1249249249249249 |
| 38 | + return n |
| 39 | + |
| 40 | + |
| 41 | +def morton3(x, y, z): |
| 42 | + return part1by2(x) | (part1by2(y) << 1) | (part1by2(z) << 2) |
| 43 | + |
| 44 | + |
| 45 | +def main(spl_path, nodes_path): |
| 46 | + raw = open(spl_path, "rb").read() |
| 47 | + assert raw[:4] == b"SPL2" |
| 48 | + count = struct.unpack_from("<I", raw, 4)[0] |
| 49 | + bmin = struct.unpack_from("<3f", raw, 16) |
| 50 | + bmax = struct.unpack_from("<3f", raw, 28) |
| 51 | + off = 40 |
| 52 | + nodes = json.load(open(nodes_path))["nodes"] |
| 53 | + centroid = {nd["row"]: (nd["centroid"] or [0, 0, 0]) for nd in nodes} |
| 54 | + |
| 55 | + # decode SPL2 body |
| 56 | + px = [0.0] * count; py = [0.0] * count; pz = [0.0] * count |
| 57 | + nx = [0] * count; ny = [0] * count; nz = [0] * count |
| 58 | + rgb = [0] * count; row = [0] * count |
| 59 | + for i in range(count): |
| 60 | + b = off + i * 21 |
| 61 | + px[i], py[i], pz[i] = struct.unpack_from("<3f", raw, b) |
| 62 | + nx[i], ny[i], nz[i] = struct.unpack_from("<3b", raw, b + 12) |
| 63 | + r, g, bl = raw[b + 15], raw[b + 16], raw[b + 17] |
| 64 | + rgb[i] = (r << 16) | (g << 8) | bl |
| 65 | + row[i] = struct.unpack_from("<H", raw, b + 19)[0] |
| 66 | + |
| 67 | + # quantize positions to 16-bit over the bbox, compute helix (Morton) order |
| 68 | + span = [max(bmax[k] - bmin[k], 1e-6) for k in range(3)] |
| 69 | + |
| 70 | + def q16(v, k): |
| 71 | + return max(0, min(65535, int((v - bmin[k]) / span[k] * 65535))) |
| 72 | + |
| 73 | + qx = [q16(px[i], 0) for i in range(count)] |
| 74 | + qy = [q16(py[i], 1) for i in range(count)] |
| 75 | + qz = [q16(pz[i], 2) for i in range(count)] |
| 76 | + order = sorted(range(count), key=lambda i: morton3(qx[i], qy[i], qz[i])) |
| 77 | + |
| 78 | + # COLOUR: anchor-predicted -> a tiny per-node palette; per-gaussian colour = 0 bytes. |
| 79 | + palette = {} |
| 80 | + for nd in nodes: |
| 81 | + palette[nd["row"]] = nd.get("rgb", [180, 180, 180]) |
| 82 | + # verify colour is fully predicted by node_row (flat per structure) |
| 83 | + colour_exact = all(rgb[i] == ((palette[row[i]][0] << 16) | (palette[row[i]][1] << 8) | palette[row[i]][2]) |
| 84 | + for i in range(count)) |
| 85 | + |
| 86 | + # MOTION (anchor-relative) + RESIDUAL (helix delta), quantized. |
| 87 | + # motion = q16(pos) - q16(anchor centroid); then delta along the helix order. |
| 88 | + def qc(v, k): |
| 89 | + return q16(v, k) |
| 90 | + |
| 91 | + mot = bytearray(); nrm = bytearray(); rows = bytearray() |
| 92 | + prev_mx = prev_my = prev_mz = 0 |
| 93 | + prev_nx = prev_ny = prev_nz = 0 |
| 94 | + prev_row = -1 |
| 95 | + run = 0 |
| 96 | + rle = [] # (row, run_length) |
| 97 | + for i in order: |
| 98 | + ax, ay, az = centroid[row[i]] |
| 99 | + mx = qx[i] - qc(ax, 0); my = qy[i] - qc(ay, 1); mz = qz[i] - qc(az, 2) |
| 100 | + # zig-zag delta vs previous (x265-style residual along the scan) |
| 101 | + for d in (mx - prev_mx, my - prev_my, mz - prev_mz): |
| 102 | + z = (d << 1) ^ (d >> 31) |
| 103 | + while z >= 0x80: |
| 104 | + mot.append((z & 0x7F) | 0x80); z >>= 7 |
| 105 | + mot.append(z & 0x7F) |
| 106 | + prev_mx, prev_my, prev_mz = mx, my, mz |
| 107 | + for a, p in ((nx[i], prev_nx), (ny[i], prev_ny), (nz[i], prev_nz)): |
| 108 | + nrm.append((a - p) & 0xFF) |
| 109 | + prev_nx, prev_ny, prev_nz = nx[i], ny[i], nz[i] |
| 110 | + # node_row run-length (constant within a structure run along the helix) |
| 111 | + if row[i] == prev_row: |
| 112 | + run += 1 |
| 113 | + else: |
| 114 | + if prev_row >= 0: |
| 115 | + rle.append((prev_row, run)) |
| 116 | + prev_row, run = row[i], 1 |
| 117 | + rle.append((prev_row, run)) |
| 118 | + for r, n in rle: |
| 119 | + rows += struct.pack("<HI", r, n) |
| 120 | + |
| 121 | + zmot = zlib.compress(bytes(mot), 9) |
| 122 | + znrm = zlib.compress(bytes(nrm), 9) |
| 123 | + zrows = zlib.compress(bytes(rows), 9) |
| 124 | + pal = zlib.compress(json.dumps({r: palette[r] for r in palette}).encode(), 9) |
| 125 | + spl3 = len(zmot) + len(znrm) + len(zrows) + len(pal) + 40 # + header |
| 126 | + |
| 127 | + # round-trip fidelity: reconstruct quantized positions, compare to original |
| 128 | + # (the codec is lossy only by the 16-bit position quantization). |
| 129 | + rec_err = 0.0 |
| 130 | + for i in range(0, count, 7): |
| 131 | + rx = (qx[i] / 65535) * span[0] + bmin[0] |
| 132 | + ry = (qy[i] / 65535) * span[1] + bmin[1] |
| 133 | + rz = (qz[i] / 65535) * span[2] + bmin[2] |
| 134 | + rec_err += (rx - px[i]) ** 2 + (ry - py[i]) ** 2 + (rz - pz[i]) ** 2 |
| 135 | + import math |
| 136 | + rmse = math.sqrt(rec_err / (count / 7 * 3)) |
| 137 | + |
| 138 | + print(f"gaussians: {count:,}") |
| 139 | + print(f"SPL2 raw : {len(raw):,} B ({len(raw)/count:.1f} B/gaussian)") |
| 140 | + print(f"SPL3 : {spl3:,} B ({spl3/count:.2f} B/gaussian) " |
| 141 | + f"-> {len(raw)/spl3:.1f}x smaller") |
| 142 | + print(f" motion(zz-delta+zlib) {len(zmot):,} normal(delta+zlib) {len(znrm):,} " |
| 143 | + f"rows(RLE+zlib) {len(zrows):,} palette {len(pal):,}") |
| 144 | + print(f"colour anchor-predicted (0 per-gaussian bytes): {colour_exact} " |
| 145 | + f"({len(palette)} node palette)") |
| 146 | + print(f"position round-trip RMSE: {rmse:.5f} (normalized units; bbox half-extent 1.0)") |
| 147 | + |
| 148 | + |
| 149 | +if __name__ == "__main__": |
| 150 | + main(sys.argv[1], sys.argv[2]) |
0 commit comments