Fix multi-processor assembly and Silo data ordering

sbryngelson · claude · sbryngelson · commit 040560d0901f · 2026-02-24T11:03:43.000-05:00
Three issues fixed:
1. Silo reader: reinterpret HDF5 data from C row-major to Fortran
   column-major order so data[i,j,k] maps to (x_i, y_j, z_k)
2. Multi-processor assembly: use per-cell searchsorted + np.ix_ indexing
   instead of contiguous block placement, correctly handling ghost/buffer
   cell overlap between processors
3. Renderer: fall back to GIF via Pillow when ffmpeg is unavailable

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/toolchain/mfc/viz/reader.py b/toolchain/mfc/viz/reader.py
@@ -319,58 +319,22 @@ def assemble(case_dir: str, step: int, fmt: str = 'binary',  # pylint: disable=t
         z_cc = (pd.z_cb[:-1] + pd.z_cb[1:]) / 2.0 if pd.p > 0 else np.array([0.0])
         proc_centers.append((rank, pd, x_cc, y_cc, z_cc))
 
-    # Build global coordinate arrays
-    # For each unique origin in each dimension, accumulate sizes
-    x_chunks: Dict[float, Tuple[int, np.ndarray]] = {}
-    y_chunks: Dict[float, Tuple[int, np.ndarray]] = {}
-    z_chunks: Dict[float, Tuple[int, np.ndarray]] = {}
-
-    for rank, pd, x_cc, y_cc, z_cc in proc_centers:
-        x_key = round(x_cc[0], 12)
-        y_key = round(y_cc[0], 12) if ndim >= 2 else 0.0
-        z_key = round(z_cc[0], 12) if ndim >= 3 else 0.0
-        if x_key not in x_chunks:
-            x_chunks[x_key] = (len(x_cc), x_cc)
-        if y_key not in y_chunks:
-            y_chunks[y_key] = (len(y_cc), y_cc)
-        if z_key not in z_chunks:
-            z_chunks[z_key] = (len(z_cc), z_cc)
-
-    # Build global coordinate arrays by concatenating sorted chunks
-    sorted_x_keys = sorted(x_chunks.keys())
-    sorted_y_keys = sorted(y_chunks.keys())
-    sorted_z_keys = sorted(z_chunks.keys())
-
-    global_x = np.concatenate([x_chunks[k][1] for k in sorted_x_keys])
-    global_y = np.concatenate([y_chunks[k][1] for k in sorted_y_keys]) if ndim >= 2 else np.array([0.0])
-    global_z = np.concatenate([z_chunks[k][1] for k in sorted_z_keys]) if ndim >= 3 else np.array([0.0])
-
-    # Compute offsets for each origin
-    x_offsets: Dict[float, int] = {}
-    off = 0
-    for k in sorted_x_keys:
-        x_offsets[k] = off
-        off += x_chunks[k][0]
-
-    y_offsets: Dict[float, int] = {}
-    off = 0
-    for k in sorted_y_keys:
-        y_offsets[k] = off
-        off += y_chunks[k][0]
-
-    z_offsets: Dict[float, int] = {}
-    off = 0
-    for k in sorted_z_keys:
-        z_offsets[k] = off
-        off += z_chunks[k][0]
-
-    # Get all variable names from first processor
-    varnames = list(proc_data[0][1].variables.keys())
+    # Build unique sorted global coordinate arrays (handles ghost overlap)
+    all_x = np.concatenate([xc for _, _, xc, _, _ in proc_centers])
+    global_x = np.unique(np.round(all_x, 12))
+    if ndim >= 2:
+        all_y = np.concatenate([yc for _, _, _, yc, _ in proc_centers])
+        global_y = np.unique(np.round(all_y, 12))
+    else:
+        global_y = np.array([0.0])
+    if ndim >= 3:
+        all_z = np.concatenate([zc for _, _, _, _, zc in proc_centers])
+        global_z = np.unique(np.round(all_z, 12))
+    else:
+        global_z = np.array([0.0])
 
-    # Allocate global arrays
-    nx = len(global_x)
-    ny = len(global_y)
-    nz = len(global_z)
+    varnames = list(proc_data[0][1].variables.keys())
+    nx, ny, nz = len(global_x), len(global_y), len(global_z)
 
     global_vars: Dict[str, np.ndarray] = {}
     for vn in varnames:
@@ -381,25 +345,22 @@ def assemble(case_dir: str, step: int, fmt: str = 'binary',  # pylint: disable=t
         else:
             global_vars[vn] = np.zeros(nx)
 
-    # Place each processor's data at the correct offset
-    for rank, pd, x_cc, y_cc, z_cc in proc_centers:
-        x_key = round(x_cc[0], 12)
-        y_key = round(y_cc[0], 12) if ndim >= 2 else 0.0
-        z_key = round(z_cc[0], 12) if ndim >= 3 else 0.0
-
-        xi = x_offsets[x_key]
-        yi = y_offsets[y_key] if ndim >= 2 else 0
-        zi = z_offsets[z_key] if ndim >= 3 else 0
+    # Place each processor's data using per-cell coordinate lookup
+    # (handles ghost/buffer cell overlap between processors)
+    for _rank, pd, x_cc, y_cc, z_cc in proc_centers:
+        xi = np.searchsorted(global_x, np.round(x_cc, 12))
+        yi = np.searchsorted(global_y, np.round(y_cc, 12)) if ndim >= 2 else np.array([0])
+        zi = np.searchsorted(global_z, np.round(z_cc, 12)) if ndim >= 3 else np.array([0])
 
         for vn, data in pd.variables.items():
             if vn not in global_vars:
                 continue
             if ndim == 3:
-                global_vars[vn][xi:xi + pd.m + 1, yi:yi + pd.n + 1, zi:zi + pd.p + 1] = data
+                global_vars[vn][np.ix_(xi, yi, zi)] = data
             elif ndim == 2:
-                global_vars[vn][xi:xi + pd.m + 1, yi:yi + pd.n + 1] = data
+                global_vars[vn][np.ix_(xi, yi)] = data
             else:
-                global_vars[vn][xi:xi + pd.m + 1] = data
+                global_vars[vn][xi] = data
 
     return AssembledData(
         ndim=ndim, x_cc=global_x, y_cc=global_y, z_cc=global_z,
diff --git a/toolchain/mfc/viz/renderer.py b/toolchain/mfc/viz/renderer.py
@@ -198,7 +198,7 @@ def render_mp4(varname, steps, output, fps=10,  # pylint: disable=too-many-argum
         elif assembled.ndim == 3:
             render_3d_slice(assembled, varname, step, frame_path, **opts)
 
-    # Combine frames into MP4 using ffmpeg
+    # Combine frames into MP4 using ffmpeg, or fall back to GIF via Pillow
     frame_pattern = os.path.join(viz_dir, '%06d.png')
     ffmpeg_cmd = [
         'ffmpeg', '-y',
@@ -210,20 +210,41 @@ def render_mp4(varname, steps, output, fps=10,  # pylint: disable=too-many-argum
         output,
     ]
 
+    success = False
     try:
         subprocess.run(ffmpeg_cmd, check=True, capture_output=True)
+        success = True
     except FileNotFoundError:
-        print(f"ffmpeg not found. Frames saved to {viz_dir}/")
-        print(f"To create video manually: ffmpeg -framerate {fps} "
-              f"-i {frame_pattern} -c:v libx264 -pix_fmt yuv420p {output}")
-        return False
+        pass
     except subprocess.CalledProcessError as e:
         print(f"ffmpeg failed: {e.stderr.decode()}")
-        print(f"Frames saved to {viz_dir}/")
-        return False
+
+    if not success:
+        # Fall back to GIF via Pillow
+        gif_output = output.rsplit('.', 1)[0] + '.gif'
+        try:
+            from PIL import Image  # pylint: disable=import-outside-toplevel
+            frames = []
+            frame_files = sorted(f for f in os.listdir(viz_dir) if f.endswith('.png'))
+            for fname in frame_files:
+                img = Image.open(os.path.join(viz_dir, fname))
+                frames.append(img.copy())
+                img.close()
+            if frames:
+                duration = max(int(1000 / fps), 1)
+                frames[0].save(gif_output, save_all=True, append_images=frames[1:],
+                               duration=duration, loop=0)
+                output = gif_output
+                success = True
+                print(f"ffmpeg not found; saved GIF to {gif_output}")
+        except ImportError:
+            print(f"Neither ffmpeg nor Pillow available. Frames saved to {viz_dir}/")
+            print(f"To create video: ffmpeg -framerate {fps} "
+                  f"-i {frame_pattern} -c:v libx264 -pix_fmt yuv420p {output}")
 
     # Clean up frames
-    for fname in os.listdir(viz_dir):
-        os.remove(os.path.join(viz_dir, fname))
-    os.rmdir(viz_dir)
-    return True
+    if success:
+        for fname in os.listdir(viz_dir):
+            os.remove(os.path.join(viz_dir, fname))
+        os.rmdir(viz_dir)
+    return success
diff --git a/toolchain/mfc/viz/silo_reader.py b/toolchain/mfc/viz/silo_reader.py
@@ -123,11 +123,12 @@ def read_silo_file(  # pylint: disable=too-many-locals
             data_path = attr["value0"]
             data = _resolve_path(f, data_path).astype(np.float64)
 
-            # Silo stores zone-centered data as (ny, nx) for 2-D — but MFC's
-            # DBPUTQV1 call passes the array in Fortran column-major order,
-            # which HDF5 writes row-major.  The resulting shape in the file
-            # is (dims[0], dims[1]) = (nx, ny).  We keep it that way so it
-            # matches the binary reader's (m, n) convention.
+            # MFC's DBPUTQV1 passes the Fortran column-major array as a
+            # flat buffer.  HDF5 stores it row-major.  Reinterpret the
+            # bytes in Fortran order so data[i,j,k] = value at (x_i,y_j,z_k),
+            # matching the binary reader convention.
+            if data.ndim >= 2:
+                data = np.ascontiguousarray(data).ravel().reshape(data.shape, order='F')
             variables[key] = data
 
     return ProcessorData(
@@ -204,6 +205,7 @@ def assemble_silo(  # pylint: disable=too-many-locals,too-many-statements,too-ma
     sample = proc_data[0][1]
     ndim = 1 + (sample.n > 0) + (sample.p > 0)
 
+    # Compute cell centers for each processor
     proc_centers: list = []
     for rank, pd in proc_data:
         x_cc = (pd.x_cb[:-1] + pd.x_cb[1:]) / 2.0
@@ -215,52 +217,19 @@ def assemble_silo(  # pylint: disable=too-many-locals,too-many-statements,too-ma
         )
         proc_centers.append((rank, pd, x_cc, y_cc, z_cc))
 
-    # Build global coordinate arrays from unique chunks
-    x_chunks: dict = {}
-    y_chunks: dict = {}
-    z_chunks: dict = {}
-
-    for _rank, _pd, x_cc, y_cc, z_cc in proc_centers:
-        xk = round(float(x_cc[0]), 12)
-        yk = round(float(y_cc[0]), 12) if ndim >= 2 else 0.0
-        zk = round(float(z_cc[0]), 12) if ndim >= 3 else 0.0
-        if xk not in x_chunks:
-            x_chunks[xk] = x_cc
-        if yk not in y_chunks:
-            y_chunks[yk] = y_cc
-        if zk not in z_chunks:
-            z_chunks[zk] = z_cc
-
-    global_x = np.concatenate([x_chunks[k] for k in sorted(x_chunks)])
-    global_y = (
-        np.concatenate([y_chunks[k] for k in sorted(y_chunks)])
-        if ndim >= 2
-        else np.array([0.0])
-    )
-    global_z = (
-        np.concatenate([z_chunks[k] for k in sorted(z_chunks)])
-        if ndim >= 3
-        else np.array([0.0])
-    )
-
-    # Compute offsets for each chunk
-    x_offsets: dict = {}
-    off = 0
-    for k in sorted(x_chunks):
-        x_offsets[k] = off
-        off += len(x_chunks[k])
-
-    y_offsets: dict = {}
-    off = 0
-    for k in sorted(y_chunks):
-        y_offsets[k] = off
-        off += len(y_chunks[k])
-
-    z_offsets: dict = {}
-    off = 0
-    for k in sorted(z_chunks):
-        z_offsets[k] = off
-        off += len(z_chunks[k])
+    # Build unique sorted global coordinate arrays (handles ghost overlap)
+    all_x = np.concatenate([xc for _, _, xc, _, _ in proc_centers])
+    global_x = np.unique(np.round(all_x, 12))
+    if ndim >= 2:
+        all_y = np.concatenate([yc for _, _, _, yc, _ in proc_centers])
+        global_y = np.unique(np.round(all_y, 12))
+    else:
+        global_y = np.array([0.0])
+    if ndim >= 3:
+        all_z = np.concatenate([zc for _, _, _, _, zc in proc_centers])
+        global_z = np.unique(np.round(all_z, 12))
+    else:
+        global_z = np.array([0.0])
 
     varnames = list(proc_data[0][1].variables.keys())
     nx, ny, nz = len(global_x), len(global_y), len(global_z)
@@ -274,28 +243,22 @@ def assemble_silo(  # pylint: disable=too-many-locals,too-many-statements,too-ma
         else:
             global_vars[vn] = np.zeros(nx)
 
+    # Place each processor's data using per-cell coordinate lookup
+    # (handles ghost/buffer cell overlap between processors)
     for _rank, pd, x_cc, y_cc, z_cc in proc_centers:
-        xk = round(float(x_cc[0]), 12)
-        yk = round(float(y_cc[0]), 12) if ndim >= 2 else 0.0
-        zk = round(float(z_cc[0]), 12) if ndim >= 3 else 0.0
-
-        xi = x_offsets[xk]
-        yi = y_offsets[yk] if ndim >= 2 else 0
-        zi = z_offsets[zk] if ndim >= 3 else 0
-
-        lx = len(x_cc)
-        ly = len(y_cc) if ndim >= 2 else 1
-        lz = len(z_cc) if ndim >= 3 else 1
+        xi = np.searchsorted(global_x, np.round(x_cc, 12))
+        yi = np.searchsorted(global_y, np.round(y_cc, 12)) if ndim >= 2 else np.array([0])
+        zi = np.searchsorted(global_z, np.round(z_cc, 12)) if ndim >= 3 else np.array([0])
 
         for vn, data in pd.variables.items():
             if vn not in global_vars:
                 continue
             if ndim == 3:
-                global_vars[vn][xi : xi + lx, yi : yi + ly, zi : zi + lz] = data
+                global_vars[vn][np.ix_(xi, yi, zi)] = data
             elif ndim == 2:
-                global_vars[vn][xi : xi + lx, yi : yi + ly] = data
+                global_vars[vn][np.ix_(xi, yi)] = data
             else:
-                global_vars[vn][xi : xi + lx] = data
+                global_vars[vn][xi] = data
 
     return AssembledData(
         ndim=ndim,