From 606cde8a29a618859915977fe5a783d7751a025d Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Wed, 4 Feb 2026 10:44:53 -0500
Subject: [PATCH 01/36] Add minian to  `Processing` table

---
 element_miniscope/miniscope.py | 1050 +++++++++++++++++++++++++++-----
 setup.py                       |    4 +
 2 files changed, 895 insertions(+), 159 deletions(-)

diff --git a/element_miniscope/miniscope.py b/element_miniscope/miniscope.py
index 98a611c..d1c42b3 100644
--- a/element_miniscope/miniscope.py
+++ b/element_miniscope/miniscope.py
@@ -382,7 +382,6 @@ def make(self, key):
                 nframes = total_frames
 
             nchannels = 1  # Assumes a single channel
-            
 
         elif acq_software == "Inscopix":
             inscopix_metadata = next(recording_path.glob("session.json"))
@@ -472,7 +471,10 @@ class ProcessingMethod(dj.Lookup):
     processing_method_desc: varchar(1000)
     """
 
-    contents = [("caiman", "caiman analysis suite")]
+    contents = [
+        ("caiman", "caiman analysis suite"),
+        ("minian", "minian analysis suite"),
+    ]
 
 
 @schema
@@ -717,6 +719,7 @@ def make_fetch(self, key):
         avi_files = (RecordingInfo.File & key).fetch("file_path")
         processing_params = (ProcessingParamSet & key).fetch1("params")
         sampling_rate = (RecordingInfo & key).fetch1("fps")
+        px_height, px_width = (RecordingInfo & key).fetch1("px_height", "px_width")
 
         return (
             task_mode,
@@ -725,6 +728,8 @@ def make_fetch(self, key):
             avi_files,
             processing_params,
             sampling_rate,
+            px_height,
+            px_width,
         )
 
     def make_compute(
@@ -736,13 +741,15 @@ def make_compute(
         avi_files,
         processing_params,
         sampling_rate,
+        px_height,
+        px_width,
     ):
         """
         Execute the miniscope analysis defined by the ProcessingTask.
         - task_mode: 'load', confirm that the results are already computed.
         - task_mode: 'trigger' runs the analysis.
         """
-        if method != "caiman":
+        if method not in ["caiman", "minian"]:
             raise NotImplementedError(f"Method {method} is not supported")
 
         params = copy.deepcopy(processing_params)
@@ -766,182 +773,594 @@ def make_compute(
             if method == "caiman":
                 loaded_caiman = loaded_result
                 key = {**key, "processing_time": loaded_caiman.creation_time}
+            elif method == "minian":
+                loaded_minian = loaded_result
+                key = {**key, "processing_time": loaded_minian.creation_time}
             else:
                 raise NotImplementedError(
                     f"Loading of {method} data is not yet supported"
                 )
         elif task_mode == "trigger":
-            import multiprocessing
-            import caiman as cm
-            from caiman.motion_correction import MotionCorrect
-            from caiman.source_extraction.cnmf.cnmf import CNMF
-            from caiman.source_extraction.cnmf.params import CNMFParams
-            from element_interface.run_caiman import _save_mc
-
-            extra_params = params.pop("extra_dj_params", {})
-
             avi_files = [
                 find_full_path(get_miniscope_root_data_dir(), avi_file).as_posix()
                 for avi_file in avi_files
             ]
-            params["fnames"] = avi_files
-            params["fr"] = sampling_rate
-            params["is3D"] = False
-            if "indices" in params:
-                params["motion"] = {
-                    "indices": (
-                        slice(*params.get("indices")[0]),
-                        slice(*params.get("indices")[1]),
-                    )
-                }
-            else:
-                params["motion"] = {"indices": (slice(None), slice(None))}
+            if method == "caiman":
+                import multiprocessing
+                import caiman as cm
+                from caiman.motion_correction import MotionCorrect
+                from caiman.source_extraction.cnmf.cnmf import CNMF
+                from caiman.source_extraction.cnmf.params import CNMFParams
+                from element_interface.run_caiman import _save_mc
+
+                extra_params = params.pop("extra_dj_params", {})
+
+                params["fnames"] = avi_files
+                params["fr"] = sampling_rate
+                params["is3D"] = False
+                if "indices" in params:
+                    params["motion"] = {
+                        "indices": (
+                            slice(*params.get("indices")[0]),
+                            slice(*params.get("indices")[1]),
+                        )
+                    }
+                else:
+                    params["motion"] = {"indices": (slice(None), slice(None))}
 
-            @memoized_result(
-                uniqueness_dict=params,
-                output_directory=output_dir,
-            )
-            def _run_processing():
-                mc_indices = params["motion"].get("indices")
-                caiman_temp = os.environ.get("CAIMAN_TEMP")
-                os.environ["CAIMAN_TEMP"] = str(output_dir)
-                n_processes = np.floor(multiprocessing.cpu_count() * 0.6)
-                n_processes = int(os.getenv("CAIMAN_MC_N_PROCESSES", n_processes))
-                _, dview, n_processes = cm.cluster.setup_cluster(
-                    backend="multiprocessing",
-                    n_processes=n_processes,
-                    maxtasksperchild=1,
+                @memoized_result(
+                    uniqueness_dict=params,
+                    output_directory=output_dir,
                 )
-                try:
-                    opts = CNMFParams(params_dict=params)
-                    cnm = CNMF(n_processes, params=opts, dview=dview)
-                    fnames = cnm.params.get("data", "fnames")
-                    mc = MotionCorrect(fnames, dview=cnm.dview, **cnm.params.motion)
-                    mc_base_attrs = list(mc.__dict__)
-                    logger.info("Starting motion correction (CaImAn)...")
-                    mc.motion_correct(save_movie=mc_indices is None)
-                    mc_results = {
-                        k: v for k, v in mc.__dict__.items() if k not in mc_base_attrs
-                    }
-                    if cnm.params.get("motion", "pw_rigid"):
-                        mc_results["b0"] = np.ceil(
-                            np.max(np.abs(mc.shifts_rig))
-                        ).astype(int)
-                        cnm.estimates.shifts = mc.shifts_rig
-                        if cnm.params.get("motion", "is3D"):
-                            cnm.estimates.shifts = [
-                                mc.x_shifts_els,
-                                mc.y_shifts_els,
-                                mc.z_shifts_els,
-                            ]
-                        else:
-                            cnm.estimates.shifts = [mc.x_shifts_els, mc.y_shifts_els]
-                    else:
-                        mc_results["b0"] = np.ceil(
-                            np.max(np.abs(mc.shifts_rig))
-                        ).astype(int)
-                        cnm.estimates.shifts = mc.shifts_rig
-
-                    base_name = pathlib.Path(fnames[0]).stem
-                    fname_mc = (
-                        mc.fname_tot_els
-                        if cnm.params.motion["pw_rigid"]
-                        else mc.fname_tot_rig
+                def _run_processing():
+                    mc_indices = params["motion"].get("indices")
+                    caiman_temp = os.environ.get("CAIMAN_TEMP")
+                    os.environ["CAIMAN_TEMP"] = str(output_dir)
+                    n_processes = np.floor(multiprocessing.cpu_count() * 0.6)
+                    n_processes = int(os.getenv("CAIMAN_MC_N_PROCESSES", n_processes))
+                    _, dview, n_processes = cm.cluster.setup_cluster(
+                        backend="multiprocessing",
+                        n_processes=n_processes,
+                        maxtasksperchild=1,
                     )
-                    if all(fname_mc):
-                        logger.info("Generating C-order memmap file...")
-                        border_to_0 = 0 if mc.border_nan == "copy" else mc.border_to_0
-                        fname_new = cm.mmapping.save_memmap(
-                            fname_mc,
-                            base_name=base_name + "_mc",
-                            order="C",
-                            var_name_hdf5=cnm.params.get("data", "var_name_hdf5"),
-                            border_to_0=border_to_0,
+                    try:
+                        opts = CNMFParams(params_dict=params)
+                        cnm = CNMF(n_processes, params=opts, dview=dview)
+                        fnames = cnm.params.get("data", "fnames")
+                        mc = MotionCorrect(fnames, dview=cnm.dview, **cnm.params.motion)
+                        mc_base_attrs = list(mc.__dict__)
+                        logger.info("Starting motion correction (CaImAn)...")
+                        mc.motion_correct(save_movie=mc_indices is None)
+                        mc_results = {
+                            k: v
+                            for k, v in mc.__dict__.items()
+                            if k not in mc_base_attrs
+                        }
+                        if cnm.params.get("motion", "pw_rigid"):
+                            mc_results["b0"] = np.ceil(
+                                np.max(np.abs(mc.shifts_rig))
+                            ).astype(int)
+                            cnm.estimates.shifts = mc.shifts_rig
+                            if cnm.params.get("motion", "is3D"):
+                                cnm.estimates.shifts = [
+                                    mc.x_shifts_els,
+                                    mc.y_shifts_els,
+                                    mc.z_shifts_els,
+                                ]
+                            else:
+                                cnm.estimates.shifts = [
+                                    mc.x_shifts_els,
+                                    mc.y_shifts_els,
+                                ]
+                        else:
+                            mc_results["b0"] = np.ceil(
+                                np.max(np.abs(mc.shifts_rig))
+                            ).astype(int)
+                            cnm.estimates.shifts = mc.shifts_rig
+
+                        base_name = pathlib.Path(fnames[0]).stem
+                        fname_mc = (
+                            mc.fname_tot_els
+                            if cnm.params.motion["pw_rigid"]
+                            else mc.fname_tot_rig
                         )
-                    else:
+                        if all(fname_mc):
+                            logger.info("Generating C-order memmap file...")
+                            border_to_0 = (
+                                0 if mc.border_nan == "copy" else mc.border_to_0
+                            )
+                            fname_new = cm.mmapping.save_memmap(
+                                fname_mc,
+                                base_name=base_name + "_mc",
+                                order="C",
+                                var_name_hdf5=cnm.params.get("data", "var_name_hdf5"),
+                                border_to_0=border_to_0,
+                            )
+                        else:
+                            logger.info(
+                                "Applying shifts, then generating C-order memmap file..."
+                            )
+                            fname_new = mc.apply_shifts_movie(
+                                fnames,
+                                save_memmap=True,
+                                save_base_name=base_name + "_mc",
+                                order="C",
+                            )
+                            mc.mmap_file = [fname_new]
+                        Yr, dims, T = cm.mmapping.load_memmap(fname_new)
+                        images = np.reshape(Yr.T, [T] + list(dims), order="F")
+                        cnm.mmap_file = fname_new
+                        # terminate the previous cluster and setup a new one with fewer
+                        # processes for CNMF because it is memory intensive
+                        dview.terminate()
+                        n_processes = np.floor(multiprocessing.cpu_count() * 0.2)
+                        n_processes = int(
+                            os.getenv("CAIMAN_CNMF_N_PROCESSES", n_processes)
+                        )
+                        _, dview, n_processes = cm.cluster.setup_cluster(
+                            backend="multiprocessing",
+                            n_processes=n_processes,
+                            maxtasksperchild=1,
+                        )
+                        cnm.dview = dview
                         logger.info(
-                            "Applying shifts, then generating C-order memmap file..."
+                            f"Starting CNMF analysis with {n_processes} processes..."
                         )
-                        fname_new = mc.apply_shifts_movie(
-                            fnames,
-                            save_memmap=True,
-                            save_base_name=base_name + "_mc",
-                            order="C",
+
+                        cnm.fit(images, indices=(slice(None), slice(None)))
+                        cnm.estimates.evaluate_components(
+                            images, cnm.params, dview=cnm.dview
                         )
-                        mc.mmap_file = [fname_new]
-                    Yr, dims, T = cm.mmapping.load_memmap(fname_new)
-                    images = np.reshape(Yr.T, [T] + list(dims), order="F")
-                    cnm.mmap_file = fname_new
-                    # terminate the previous cluster and setup a new one with fewer
-                    # processes for CNMF because it is memory intensive
-                    dview.terminate()
-                    n_processes = np.floor(multiprocessing.cpu_count() * 0.2)
-                    n_processes = int(os.getenv("CAIMAN_CNMF_N_PROCESSES", n_processes))
-                    _, dview, n_processes = cm.cluster.setup_cluster(
-                        backend="multiprocessing",
-                        n_processes=n_processes,
-                        maxtasksperchild=1,
+                        cnm.estimates.detrend_df_f(quantileMin=8, frames_window=250)
+                        logger.info("Computing summary images...")
+                        correlation_image, _ = cm.summary_images.correlation_pnr(
+                            images[:: max(T // 1000, 1)],
+                            gSig=cnm.params.init["gSig"][0],
+                            swap_dim=False,
+                        )
+                        correlation_image[np.isnan(correlation_image)] = 0
+                        cnm.estimates.Cn = correlation_image
+                        fname_hdf5 = cnm.mmap_file[:-4] + "hdf5"
+                        cnm.save(fname_hdf5)
+                        cnmf_output_file = pathlib.Path(fname_hdf5)
+                        summary_images = {
+                            "average_image": np.mean(
+                                images[:: max(T // 1000, 1)], axis=0
+                            ),
+                            "max_image": np.max(images[:: max(T // 1000, 1)], axis=0),
+                            "correlation_image": correlation_image,
+                        }
+                        _save_mc(
+                            mc,
+                            cnmf_output_file.as_posix(),
+                            params["is3D"],
+                            summary_images=summary_images,
+                        )
+                    except Exception as e:
+                        dview.terminate()
+                        raise e
+                    else:
+                        cm.stop_server(dview=dview)
+                        logger.info("CNMF analysis complete. Resulted saved.")
+                        caiman_temp = os.environ.get("CAIMAN_TEMP")
+                        if caiman_temp is not None:
+                            os.environ["CAIMAN_TEMP"] = caiman_temp
+                        else:
+                            del os.environ["CAIMAN_TEMP"]
+
+                _run_processing()
+                _, imaging_dataset = get_loader_result(
+                    key, ProcessingTask, full_output_dir=output_dir
+                )
+                caiman_dataset = imaging_dataset
+                key["processing_time"] = caiman_dataset.creation_time
+                key["package_version"] = cm.__version__
+                file_entries = [
+                    {
+                        **key,
+                        "file_name": f.relative_to(
+                            get_processed_root_data_dir()
+                        ).as_posix(),
+                        "file": f.as_posix(),
+                    }
+                    for f in output_dir.rglob("*")
+                    if f.is_file()
+                ]
+
+            elif method == "minian":
+                import multiprocessing
+                import psutil
+                from dask.distributed import Client, LocalCluster
+                from minian.cnmf import (
+                    compute_AtC,
+                    compute_trace,
+                    get_noise_fft,
+                    smooth_sig,
+                    unit_merge,
+                    update_spatial,
+                    update_temporal,
+                    update_background,
+                )
+                from minian.initialization import (
+                    gmm_refine,
+                    initA,
+                    initC,
+                    intensity_refine,
+                    ks_refine,
+                    pnr_refine,
+                    seeds_init,
+                    seeds_merge,
+                )
+                from minian.motion_correction import apply_transform, estimate_motion
+                from minian.preprocessing import denoise, remove_background
+
+                from minian.utilities import (
+                    TaskAnnotation,
+                    get_optimal_chk,
+                    load_videos,
+                    open_minian,
+                    save_minian,
+                )
+                from minian.visualization import (
+                    CNMFViewer,
+                    export_plot,
+                    VArrayViewer,
+                    generate_videos,
+                    visualize_gmm_fit,
+                    visualize_motion,
+                    visualize_preprocess,
+                    visualize_seeds,
+                    visualize_spatial_update,
+                    visualize_temporal_update,
+                    write_video,
+                )
+
+                # Setup Dask cluster with env vars or auto-config
+                n_workers = int(
+                    os.getenv(
+                        "MINIAN_NWORKERS",
+                        max(1, int(multiprocessing.cpu_count() * 0.8)),
+                    )
+                )
+                memory_total = psutil.virtual_memory().total
+                memory_per_worker = int(memory_total * 0.8 / n_workers / 1e9)
+                memory_limit = os.getenv(
+                    "MINIAN_MEMORY_LIMIT", f"{memory_per_worker}GB"
+                )
+
+                # Set intermediate storage path
+                temp_path = str(output_dir / "intermediate")
+                os.makedirs(temp_path, exist_ok=True)
+                os.environ["MINIAN_INTERMEDIATE"] = temp_path
+
+                # Start Dask cluster
+                logger.info(
+                    f"Starting Minian processing with {n_workers} workers, {memory_limit} memory limit..."
+                )
+                cluster = LocalCluster(
+                    n_workers=n_workers,
+                    memory_limit=memory_limit,
+                    resources={"MEM": 1},
+                    threads_per_worker=2,
+                    dashboard_address=None,  # Disable dashboard in automated pipeline
+                )
+                annotation_plugin = TaskAnnotation()
+                cluster.scheduler.add_plugin(annotation_plugin)
+                client = Client(cluster)
+
+                try:
+
+                    # ===== LOAD VIDEOS =====
+                    logger.info("Loading videos...")
+                    param_load_videos = params.get(
+                        "param_load_videos",
+                        {"pattern": "r'.*\.avi$'", "downsample_strategy": "subset"},
+                    )
+                    video_array = load_videos(
+                        str(avi_files[0].parent), **param_load_videos
+                    )
+                    chunk_size, _ = get_optimal_chk(video_array, dtype=float)
+                    video_array = video_array.chunk(
+                        {"frame": chunk_size["frame"], "height": -1, "width": -1}
                     )
-                    cnm.dview = dview
-                    logger.info(f"Starting CNMF analysis with {n_processes} processes...")
 
-                    cnm.fit(images, indices=(slice(None), slice(None)))
-                    cnm.estimates.evaluate_components(
-                        images, cnm.params, dview=cnm.dview
+                    # ===== PREPROCESSING =====
+                    logger.info("Preprocessing: glow removal...")
+                    subset = None
+                    video_array_base_ref = video_array.sel(subset)
+                    video_min_removed = video_array_base_ref.min("frame").compute()
+                    video_array_ref = video_array_base_ref - video_min_removed
+                    logger.info("Preprocessing: denoising...")
+                    param_denoise = params.get(
+                        "param_denoise", {"method": "median", "ksize": 7}
                     )
-                    cnm.estimates.detrend_df_f(quantileMin=8, frames_window=250)
-                    logger.info("Computing summary images...")
-                    correlation_image, _ = cm.summary_images.correlation_pnr(
-                        images[:: max(T // 1000, 1)],
-                        gSig=cnm.params.init["gSig"][0],
-                        swap_dim=False,
+                    video_array_denoised = denoise(video_array_ref, **param_denoise)
+
+                    logger.info("Preprocessing: background removal...")
+                    param_background_removal = params.get(
+                        "param_background_removal", {"method": "tophat", "wnd": 10}
                     )
-                    correlation_image[np.isnan(correlation_image)] = 0
-                    cnm.estimates.Cn = correlation_image
-                    fname_hdf5 = cnm.mmap_file[:-4] + "hdf5"
-                    cnm.save(fname_hdf5)
-                    cnmf_output_file = pathlib.Path(fname_hdf5)
-                    summary_images = {
-                        "average_image": np.mean(images[:: max(T // 1000, 1)], axis=0),
-                        "max_image": np.max(images[:: max(T // 1000, 1)], axis=0),
-                        "correlation_image": correlation_image,
-                    }
-                    _save_mc(
-                        mc,
-                        cnmf_output_file.as_posix(),
-                        params["is3D"],
-                        summary_images=summary_images,
+                    video_array_bg_removed = remove_background(
+                        video_array_denoised, **param_background_removal
+                    )
+
+                    logger.info("Preprocessing: saving pre-processed video...")
+                    video_array_preprocessed = save_minian(
+                        video_array_bg_removed.rename("video_array_preprocessed"),
+                        dpath=temp_path,
+                        overwrite=True,
+                    )
+
+                    # ===== MOTION CORRECTION =====
+                    logger.info("Estimating motion...")
+                    param_estimate_motion = params.get(
+                        "param_estimate_motion", {"dim": "frame"}
+                    )
+                    motion = estimate_motion(
+                        video_array_preprocessed, **param_estimate_motion
+                    )
+                    motion = save_minian(
+                        motion.rename("motion"), dpath=temp_path, overwrite=True
+                    )
+
+                    logger.info("Applying motion correction...")
+                    Y = apply_transform(video_array_preprocessed, motion, fill=0)
+                    Y_fm_chk = save_minian(
+                        Y.astype(float).rename("Y_fm_chk"), temp_path, overwrite=True
+                    )
+                    Y_hw_chk = save_minian(
+                        Y_fm_chk.rename("Y_hw_chk"),
+                        temp_path,
+                        overwrite=True,
+                        chunks={
+                            "frame": -1,
+                            "height": chunk_size["height"],
+                            "width": chunk_size["width"],
+                        },
+                    )
+                    write_video(Y_fm_chk, "motion_corrected_movie.mp4", output_dir)
+
+                    logger.info("Motion Correction: Create and save max projection...")
+                    max_proj = Y_fm_chk.max("frame").compute()
+                    max_proj = save_minian(
+                        max_proj.rename("max_proj"),
+                        **{"dpath": output_dir, "overwrite": True},
+                    )
+
+                    # ===== INITIALIZATION =====
+                    logger.info("Initializing seeds...")
+                    param_seeds_init = params.get("param_seeds_init", {})
+                    seeds = seeds_init(Y_fm_chk, **param_seeds_init)
+
+                    logger.info("Refining seeds with PNR...")
+                    param_pnr_refine = params.get("param_pnr_refine", {})
+                    seeds, pnr, gmm = pnr_refine(Y_hw_chk, seeds, **param_pnr_refine)
+
+                    logger.info("Refining seeds with KS test...")
+                    param_ks_refine = params.get("param_ks_refine", {})
+                    seeds = ks_refine(Y_hw_chk, seeds, **param_ks_refine)
+
+                    logger.info("Merging seeds...")
+                    param_seeds_merge = params.get("param_seeds_merge", {})
+                    seeds_final = seeds[
+                        seeds["mask_ks"] & seeds["mask_pnr"]
+                    ].reset_index(drop=True)
+                    seeds_final = seeds_merge(
+                        Y_hw_chk, max_proj, seeds_final, **param_seeds_merge
+                    )
+
+                    logger.info(
+                        "Initializing spatial footprints (A) and temporal traces (C)..."
+                    )
+                    param_initialize = params.get("param_initialize", {})
+                    A_init = initA(
+                        Y_hw_chk,
+                        seeds_final[seeds_final["mask_mrg"]],
+                        **param_initialize,
+                    )
+                    A_init = save_minian(
+                        A_init.rename("A_init"), temp_path, overwrite=True
+                    )
+
+                    C_init = initC(Y_fm_chk, A_init)
+                    C_init = save_minian(
+                        C_init.rename("C_init"),
+                        temp_path,
+                        overwrite=True,
+                        chunks={"unit_id": 1, "frame": -1},
+                    )
+
+                    # Initial merge
+                    logger.info("Merging initial components...")
+                    param_init_merge = params.get("param_init_merge", {})
+                    A, C = unit_merge(A_init, C_init, **param_init_merge)
+                    A = save_minian(A.rename("A"), temp_path, overwrite=True)
+                    C = save_minian(C.rename("C"), temp_path, overwrite=True)
+                    C_chk = save_minian(
+                        C.rename("C_chk"),
+                        temp_path,
+                        overwrite=True,
+                        chunks={"unit_id": -1, "frame": chunk_size["frame"]},
+                    )
+
+                    # ===== CNMF - FIRST ITERATION =====
+                    logger.info("CNMF: Computing noise statistics...")
+                    param_get_noise = params.get("param_get_noise", {})
+                    sn_spatial = get_noise_fft(Y_hw_chk, **param_get_noise)
+                    sn_spatial = save_minian(
+                        sn_spatial.rename("sn_spatial"), temp_path, overwrite=True
+                    )
+
+                    # Initialize background
+                    logger.info("CNMF: Initializing background...")
+                    b, f = update_background(Y_fm_chk, A, C_chk)
+                    b = save_minian(b.rename("b"), temp_path, overwrite=True)
+                    f = save_minian(f.rename("f"), temp_path, overwrite=True)
+
+                    # First spatial update
+                    logger.info("CNMF: First spatial update...")
+                    param_first_spatial = params.get("param_first_spatial", {})
+                    A_new, mask_spatial = update_spatial(
+                        Y_hw_chk, A, C, sn_spatial, **param_first_spatial
+                    )
+                    A = save_minian(
+                        A_new.rename("A"),
+                        temp_path,
+                        overwrite=True,
+                        chunks={"unit_id": 1, "height": -1, "width": -1},
+                    )
+
+                    # First temporal update
+                    logger.info("CNMF: First temporal update...")
+                    param_first_temporal = params.get("param_first_temporal", {})
+                    YrA = compute_trace(Y_fm_chk, A, b, C_chk, f)
+                    YrA = save_minian(
+                        YrA.rename("YrA"),
+                        temp_path,
+                        overwrite=True,
+                        chunks={"unit_id": 1, "frame": -1},
+                    )
+                    C_new, S_new, b0_new, c0_new, g, mask_temporal = update_temporal(
+                        A, C, YrA=YrA, **param_first_temporal
+                    )
+                    C = save_minian(
+                        C_new.rename("C").chunk({"unit_id": 1, "frame": -1}),
+                        temp_path,
+                        overwrite=True,
+                    )
+                    C_chk = save_minian(
+                        C.rename("C_chk"),
+                        temp_path,
+                        overwrite=True,
+                        chunks={"unit_id": -1, "frame": chunk_size["frame"]},
+                    )
+                    S = save_minian(
+                        S_new.rename("S").chunk({"unit_id": 1, "frame": -1}),
+                        temp_path,
+                        overwrite=True,
+                    )
+                    b0 = save_minian(
+                        b0_new.rename("b0").chunk({"unit_id": 1, "frame": -1}),
+                        temp_path,
+                        overwrite=True,
+                    )
+                    c0 = save_minian(
+                        c0_new.rename("c0").chunk({"unit_id": 1, "frame": -1}),
+                        temp_path,
+                        overwrite=True,
+                    )
+
+                    # First merge
+                    logger.info("CNMF: Merging after first iteration...")
+                    param_first_merge = params.get("param_first_merge", {})
+                    A, C = unit_merge(A, C, **param_first_merge)
+                    A = save_minian(A.rename("A"), temp_path, overwrite=True)
+                    C = save_minian(C.rename("C"), temp_path, overwrite=True)
+                    C_chk = save_minian(
+                        C.rename("C_chk"),
+                        temp_path,
+                        overwrite=True,
+                        chunks={"unit_id": -1, "frame": chunk_size["frame"]},
+                    )
+
+                    # ===== CNMF - SECOND ITERATION =====
+                    # Second spatial update
+                    logger.info("CNMF: Second spatial update...")
+                    param_second_spatial = params.get("param_second_spatial", {})
+                    A_new, mask_spatial = update_spatial(
+                        Y_hw_chk, A, C, sn_spatial, **param_second_spatial
+                    )
+                    A = save_minian(
+                        A_new.rename("A"),
+                        temp_path,
+                        overwrite=True,
+                        chunks={"unit_id": 1, "height": -1, "width": -1},
+                    )
+                    b_new, f_new = update_background(Y_fm_chk, A, C_chk)
+                    b = save_minian(b_new.rename("b"), temp_path, overwrite=True)
+                    f = save_minian(
+                        f_new.chunk({"frame": chunk_size["frame"]}).rename("f"),
+                        temp_path,
+                        overwrite=True,
+                    )
+
+                    # Second temporal update
+                    logger.info("CNMF: Second temporal update...")
+                    param_second_temporal = params.get("param_second_temporal", {})
+                    YrA = compute_trace(Y_fm_chk, A, b, C_chk, f)
+                    YrA = save_minian(
+                        YrA.rename("YrA"),
+                        temp_path,
+                        overwrite=True,
+                        chunks={"unit_id": 1, "frame": -1},
+                    )
+                    C_new, S_new, b0_new, c0_new, g, mask_temporal = update_temporal(
+                        A, C, YrA=YrA, **param_second_temporal
+                    )
+
+                    # Apply temporal mask to keep only valid units
+                    A = A.sel(unit_id=C_new.coords["unit_id"].values)
+
+                    # ===== SAVE FINAL RESULTS =====
+                    logger.info("Saving final results...")
+                    A = save_minian(
+                        A.rename("A"), **{"dpath": output_dir, "overwrite": True}
+                    )
+                    C = save_minian(
+                        C_new.rename("C"), **{"dpath": output_dir, "overwrite": True}
+                    )
+                    S = save_minian(
+                        S_new.rename("S"), **{"dpath": output_dir, "overwrite": True}
+                    )
+                    c0 = save_minian(
+                        c0_new.rename("c0"), **{"dpath": output_dir, "overwrite": True}
+                    )
+                    b0 = save_minian(
+                        b0_new.rename("b0"), **{"dpath": output_dir, "overwrite": True}
+                    )
+                    b = save_minian(
+                        b_new.rename("b"), **{"dpath": output_dir, "overwrite": True}
+                    )
+                    f = save_minian(
+                        f_new.rename("f"), **{"dpath": output_dir, "overwrite": True}
+                    )
+                    motion = save_minian(
+                        motion.rename("motion"),
+                        **{"dpath": output_dir, "overwrite": True},
+                    )
+                    logger.info(
+                        f"Minian processing complete. {A.sizes['unit_id']} units detected."
                     )
                 except Exception as e:
-                    dview.terminate()
+                    client.close()
+                    cluster.close()
+                    logger.error(f"Minian processing failed: {e}")
                     raise e
-                else:
-                    cm.stop_server(dview=dview)
-                    logger.info("CNMF analysis complete. Resulted saved.")
-                    caiman_temp = os.environ.get("CAIMAN_TEMP")
-                    if caiman_temp is not None:
-                        os.environ["CAIMAN_TEMP"] = caiman_temp
-                    else:
-                        del os.environ["CAIMAN_TEMP"]
+                finally:
+                    client.close()
+                    cluster.close()
+                # Load results and prepare for insertion
+                minian_loader = MinianLoader(output_dir)
+                key["processing_time"] = minian_loader.creation_time
+
+                # Get minian version if available
+                try:
+                    import minian
+
+                    key["package_version"] = getattr(minian, "__version__", "")
+                except (ImportError, AttributeError):
+                    key["package_version"] = ""
+
+                file_entries = [
+                    {
+                        **key,
+                        "file_name": f.name,
+                        "file": f.as_posix(),
+                    }
+                    for f in output_dir.rglob("*")
+                    if f.is_file()
+                ]
 
-            _run_processing()
-            _, imaging_dataset = get_loader_result(
-                key, ProcessingTask, full_output_dir=output_dir
-            )
-            caiman_dataset = imaging_dataset
-            key["processing_time"] = caiman_dataset.creation_time
-            key["package_version"] = cm.__version__
-            file_entries = [
-                {
-                    **key,
-                    "file_name": f.relative_to(
-                        get_processed_root_data_dir()
-                    ).as_posix(),
-                    "file": f.as_posix(),
-                }
-                for f in output_dir.rglob("*")
-                if f.is_file()
-            ]
         else:
             raise ValueError(f"Unknown task mode: {task_mode}")
         return (file_entries, output_dir)
@@ -1131,6 +1550,38 @@ def make(self, key):
             }
             self.Summary.insert1(summary_images)
 
+        elif method == "minian":
+            minian_dataset = loaded_result
+
+            self.insert1(
+                {**key, "motion_correct_channel": minian_dataset.alignment_channel}
+            )
+
+            # Minian uses rigid motion correction
+            rigid_correction = minian_dataset.extract_rigid_mc()
+            if rigid_correction is not None:
+                rigid_correction.update(**key)
+                self.RigidMotionCorrection.insert1(rigid_correction)
+
+            # -- summary images --
+            ref_image = minian_dataset.ref_image
+            mean_image = minian_dataset.mean_image
+            max_proj_image = minian_dataset.max_proj_image
+            correlation_image = minian_dataset.correlation_map
+
+            summary_images = {
+                **key,
+                "ref_image": (
+                    ref_image if ref_image is not None else np.zeros((1, 1, 1))
+                ),
+                "average_image": (
+                    mean_image if mean_image is not None else np.zeros((1, 1, 1))
+                ),
+                "correlation_image": correlation_image,
+                "max_proj_image": max_proj_image,
+            }
+            self.Summary.insert1(summary_images)
+
         else:
             raise NotImplementedError("Unknown/unimplemented method: {}".format(method))
 
@@ -1238,6 +1689,57 @@ def make(self, key):
                     cells, ignore_extra_fields=True, allow_direct_insert=True
                 )
 
+        elif method == "minian":
+            minian_dataset = loaded_result
+
+            # infer "segmentation_channel" - from params if available, else from minian loader
+            params = (ProcessingParamSet * ProcessingTask & key).fetch1("params")
+            segmentation_channel = params.get(
+                "segmentation_channel", minian_dataset.segmentation_channel
+            )
+
+            masks, cells = [], []
+            for mask in minian_dataset.masks:
+                masks.append(
+                    {
+                        **key,
+                        "segmentation_channel": segmentation_channel,
+                        "mask": mask["mask_id"],
+                        "mask_npix": mask["mask_npix"],
+                        "mask_center_x": mask["mask_center_x"],
+                        "mask_center_y": mask["mask_center_y"],
+                        "mask_center_z": mask["mask_center_z"],
+                        "mask_xpix": mask["mask_xpix"],
+                        "mask_ypix": mask["mask_ypix"],
+                        "mask_zpix": mask["mask_zpix"],
+                        "mask_weights": mask["mask_weights"],
+                    }
+                )
+                if mask["accepted"]:
+                    cells.append(
+                        {
+                            **key,
+                            "mask_classification_method": "minian_default_classifier",
+                            "mask": mask["mask_id"],
+                            "mask_type": "soma",
+                        }
+                    )
+
+            self.insert1(key)
+            self.Mask.insert(masks, ignore_extra_fields=True)
+
+            if cells:
+                MaskClassification.insert1(
+                    {
+                        **key,
+                        "mask_classification_method": "minian_default_classifier",
+                    },
+                    allow_direct_insert=True,
+                )
+                MaskClassification.MaskType.insert(
+                    cells, ignore_extra_fields=True, allow_direct_insert=True
+                )
+
         else:
             raise NotImplementedError(f"Unknown/unimplemented method: {method}")
 
@@ -1255,7 +1757,7 @@ class MaskClassificationMethod(dj.Lookup):
     mask_classification_method: varchar(48)
     """
 
-    contents = zip(["caiman_default_classifier"])
+    contents = zip(["caiman_default_classifier", "minian_default_classifier"])
 
 
 @schema
@@ -1360,6 +1862,29 @@ def make(self, key):
             self.insert1(key)
             self.Trace.insert(fluo_traces)
 
+        elif method == "minian":
+            minian_dataset = loaded_result
+
+            # infer "segmentation_channel" - from params if available, else from minian loader
+            params = (ProcessingParamSet * ProcessingTask & key).fetch1("params")
+            segmentation_channel = params.get(
+                "segmentation_channel", minian_dataset.segmentation_channel
+            )
+
+            fluo_traces = []
+            for mask in minian_dataset.masks:
+                fluo_traces.append(
+                    {
+                        **key,
+                        "mask": mask["mask_id"],
+                        "fluorescence_channel": segmentation_channel,
+                        "fluorescence": mask["inferred_trace"],
+                    }
+                )
+
+            self.insert1(key)
+            self.Trace.insert(fluo_traces)
+
         else:
             raise NotImplementedError("Unknown/unimplemented method: {}".format(method))
 
@@ -1369,14 +1894,14 @@ class ActivityExtractionMethod(dj.Lookup):
     """Lookup table for activity extraction methods.
 
     Attributes:
-        extraction_method (foreign key, varchar(32) ): Extraction method from CaImAn.
+        extraction_method (foreign key, varchar(32) ): Extraction method from CaImAn or Minian.
     """
 
     definition = """
     extraction_method: varchar(32)
     """
 
-    contents = zip(["caiman_deconvolution", "caiman_dff"])
+    contents = zip(["caiman_deconvolution", "caiman_dff", "minian_deconvolution"])
 
 
 @schema
@@ -1421,7 +1946,15 @@ def key_source(self):
             & 'extraction_method LIKE "caiman%"'
         )
 
-        return caiman_key_source.proj()
+        minian_key_source = (
+            Fluorescence
+            * ActivityExtractionMethod
+            * ProcessingParamSet.proj("processing_method")
+            & 'processing_method = "minian"'
+            & 'extraction_method LIKE "minian%"'
+        )
+
+        return caiman_key_source.proj() + minian_key_source.proj()
 
     def make(self, key):
         """Populates table with activity trace data."""
@@ -1456,6 +1989,28 @@ def make(self, key):
                     for mask in caiman_dataset.masks
                 )
 
+        elif method == "minian":
+            minian_dataset = loaded_result
+
+            if key["extraction_method"] == "minian_deconvolution":
+                # infer "segmentation_channel" - from params if available, else from minian loader
+                params = (ProcessingParamSet * ProcessingTask & key).fetch1("params")
+                segmentation_channel = params.get(
+                    "segmentation_channel", minian_dataset.segmentation_channel
+                )
+
+                self.insert1(key)
+                self.Trace.insert(
+                    dict(
+                        key,
+                        mask=mask["mask_id"],
+                        fluorescence_channel=segmentation_channel,
+                        activity_trace=mask["spikes"],
+                    )
+                    for mask in minian_dataset.masks
+                    if "spikes" in mask
+                )
+
         else:
             raise NotImplementedError("Unknown/unimplemented method: {}".format(method))
 
@@ -1526,6 +2081,181 @@ def make(self, key):
 # Helper Functions ---------------------------------------------------------------------
 
 
+class MinianLoader:
+    """Loader class for Minian analysis results.
+
+    Provides a consistent interface for accessing Minian outputs similar to CaImAn loader.
+    """
+
+    def __init__(self, output_dir):
+        """Initialize the MinianLoader.
+
+        Args:
+            output_dir: Path to the directory containing Minian zarr outputs.
+        """
+        from minian.utilities import open_minian
+
+        self.output_dir = pathlib.Path(output_dir)
+        self._minian_ds = open_minian(str(self.output_dir))
+
+        # Load core arrays
+        self._A = self._minian_ds.get(
+            "A"
+        )  # Spatial footprints (unit_id, height, width)
+        self._C = self._minian_ds.get("C")  # Temporal traces (unit_id, frame)
+        self._S = self._minian_ds.get(
+            "S"
+        )  # Deconvolved activity/spikes (unit_id, frame)
+        self._b = self._minian_ds.get("b")  # Background spatial (height, width)
+        self._f = self._minian_ds.get("f")  # Background temporal (frame)
+        self._b0 = self._minian_ds.get("b0")  # Baseline (unit_id, frame)
+        self._c0 = self._minian_ds.get("c0")  # Initial calcium (unit_id, frame)
+
+        # Try to load motion correction data
+        self._motion = self._minian_ds.get("motion")
+
+        # Try to load reference/max projection images
+        self._max_proj = self._minian_ds.get("max_proj")
+        self._varr_ref = self._minian_ds.get("varr_ref")
+
+    @property
+    def minian_dataset(self):
+        """Return the raw Minian xarray Dataset."""
+        return self._minian_ds
+
+    @property
+    def creation_time(self):
+        """Get the creation time of the Minian output."""
+        # Use the modification time of the output directory
+        return datetime.fromtimestamp(self.output_dir.stat().st_mtime, tz=timezone.utc)
+
+    @property
+    def alignment_channel(self):
+        """Channel used for motion correction (default 0 for miniscope)."""
+        return 0
+
+    @property
+    def segmentation_channel(self):
+        """Channel used for segmentation (default 0 for miniscope)."""
+        return 0
+
+    @property
+    def is_pw_rigid(self):
+        """Minian uses rigid motion correction by default."""
+        return False
+
+    @property
+    def motion_shifts(self):
+        """Return motion correction shifts as dict with 'x' and 'y' keys."""
+        if self._motion is not None:
+            motion_data = self._motion.compute()
+            return {
+                "x": motion_data.sel(shift_dim="width").values,
+                "y": motion_data.sel(shift_dim="height").values,
+            }
+        return None
+
+    def extract_rigid_mc(self):
+        """Extract rigid motion correction data in format compatible with MotionCorrection table."""
+        shifts = self.motion_shifts
+        if shifts is None:
+            return None
+
+        return {
+            "x_shifts": shifts["x"],
+            "y_shifts": shifts["y"],
+            "x_std": np.std(shifts["x"]),
+            "y_std": np.std(shifts["y"]),
+        }
+
+    @property
+    def ref_image(self):
+        """Return reference image used for motion correction."""
+        if self._varr_ref is not None:
+            # Take mean across frames for reference
+            return self._varr_ref.mean(dim="frame").compute().values[np.newaxis, :, :]
+        return None
+
+    @property
+    def mean_image(self):
+        """Return mean image (average across frames)."""
+        if self._varr_ref is not None:
+            return self._varr_ref.mean(dim="frame").compute().values[np.newaxis, :, :]
+        return None
+
+    @property
+    def max_proj_image(self):
+        """Return maximum projection image."""
+        if self._max_proj is not None:
+            return self._max_proj.compute().values[np.newaxis, :, :]
+        elif self._varr_ref is not None:
+            return self._varr_ref.max(dim="frame").compute().values[np.newaxis, :, :]
+        return None
+
+    @property
+    def correlation_map(self):
+        """Return correlation image (computed during initialization)."""
+        # Minian doesn't store correlation image by default
+        # Return None or compute if needed
+        return None
+
+    @property
+    def masks(self):
+        """Extract mask information in format compatible with Segmentation table.
+
+        Yields dict for each unit with mask properties.
+        """
+        if self._A is None:
+            return []
+
+        A_data = self._A.compute()
+        C_data = self._C.compute() if self._C is not None else None
+        S_data = self._S.compute() if self._S is not None else None
+
+        masks = []
+        for unit_idx, unit_id in enumerate(A_data.coords["unit_id"].values):
+            footprint = A_data.sel(unit_id=unit_id).values
+
+            # Find non-zero pixels
+            mask_indices = np.where(footprint > 0)
+            if len(mask_indices[0]) == 0:
+                continue
+
+            y_pix = mask_indices[0]
+            x_pix = mask_indices[1]
+            weights = footprint[y_pix, x_pix]
+
+            mask_dict = {
+                "mask_id": int(unit_id),
+                "mask_npix": len(x_pix),
+                "mask_center_x": int(np.mean(x_pix)),
+                "mask_center_y": int(np.mean(y_pix)),
+                "mask_center_z": None,
+                "mask_xpix": x_pix,
+                "mask_ypix": y_pix,
+                "mask_zpix": None,
+                "mask_weights": weights,
+                "accepted": True,  # All units accepted (no manual curation)
+            }
+
+            # Add trace data if available
+            if C_data is not None:
+                mask_dict["inferred_trace"] = C_data.sel(unit_id=unit_id).values
+            if S_data is not None:
+                mask_dict["spikes"] = S_data.sel(unit_id=unit_id).values
+
+            masks.append(mask_dict)
+
+        return masks
+
+    @property
+    def num_units(self):
+        """Return number of detected units."""
+        if self._A is not None:
+            return len(self._A.coords["unit_id"])
+        return 0
+
+
 def get_loader_result(key, table, full_output_dir=None) -> tuple:
     """Retrieve the loaded processed imaging results from the loader (e.g. caiman, etc.)
 
@@ -1548,6 +2278,8 @@ def get_loader_result(key, table, full_output_dir=None) -> tuple:
         from element_interface import caiman_loader
 
         loaded_output = caiman_loader.CaImAn(output_dir)
+    elif method == "minian":
+        loaded_output = MinianLoader(output_dir)
     else:
         raise NotImplementedError("Unknown/unimplemented method: {}".format(method))
 
diff --git a/setup.py b/setup.py
index ddfc8bf..56f6b2c 100644
--- a/setup.py
+++ b/setup.py
@@ -41,5 +41,9 @@
             "element-session @ git+https://github.com/datajoint/element-session.git",
         ],
         "tests": ["pytest", "pytest-cov", "shutils"],
+        "minian": [
+            "dask",
+            "minian @ git+https://github.com/kushalbakshi/minian.git",
+        ]
     },
 )
\ No newline at end of file

From 1b6263910a371956eabe04a6c0bed3a4354dc191 Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Wed, 4 Feb 2026 12:37:04 -0500
Subject: [PATCH 02/36] version pin xarray

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index 56f6b2c..649e0ee 100644
--- a/setup.py
+++ b/setup.py
@@ -43,6 +43,7 @@
         "tests": ["pytest", "pytest-cov", "shutils"],
         "minian": [
             "dask",
+            "xarray==2022.3.0",
             "minian @ git+https://github.com/kushalbakshi/minian.git",
         ]
     },

From 20a7370c8d752833f98689fb4d4bfb69ac9dfc43 Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Wed, 4 Feb 2026 12:39:45 -0500
Subject: [PATCH 03/36] version pin dask

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 649e0ee..342dbf7 100644
--- a/setup.py
+++ b/setup.py
@@ -42,7 +42,7 @@
         ],
         "tests": ["pytest", "pytest-cov", "shutils"],
         "minian": [
-            "dask",
+            "dask==2022.5.0",
             "xarray==2022.3.0",
             "minian @ git+https://github.com/kushalbakshi/minian.git",
         ]

From 569deec859a7d35533c7ac00021099b010a825bc Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Wed, 4 Feb 2026 12:41:39 -0500
Subject: [PATCH 04/36] downgrade pandas

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index 342dbf7..aad3b79 100644
--- a/setup.py
+++ b/setup.py
@@ -44,6 +44,7 @@
         "minian": [
             "dask==2022.5.0",
             "xarray==2022.3.0",
+            "pandas==1.5.3",
             "minian @ git+https://github.com/kushalbakshi/minian.git",
         ]
     },

From c4adef98858b4a225a3137992c2f3529f51eda61 Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Wed, 4 Feb 2026 12:49:20 -0500
Subject: [PATCH 05/36] Remove unused imports

---
 element_miniscope/miniscope.py | 19 +------------------
 1 file changed, 1 insertion(+), 18 deletions(-)

diff --git a/element_miniscope/miniscope.py b/element_miniscope/miniscope.py
index d1c42b3..bd97c89 100644
--- a/element_miniscope/miniscope.py
+++ b/element_miniscope/miniscope.py
@@ -972,20 +972,16 @@ def _run_processing():
                 import psutil
                 from dask.distributed import Client, LocalCluster
                 from minian.cnmf import (
-                    compute_AtC,
                     compute_trace,
                     get_noise_fft,
-                    smooth_sig,
                     unit_merge,
                     update_spatial,
                     update_temporal,
                     update_background,
                 )
                 from minian.initialization import (
-                    gmm_refine,
                     initA,
                     initC,
-                    intensity_refine,
                     ks_refine,
                     pnr_refine,
                     seeds_init,
@@ -998,22 +994,9 @@ def _run_processing():
                     TaskAnnotation,
                     get_optimal_chk,
                     load_videos,
-                    open_minian,
                     save_minian,
                 )
-                from minian.visualization import (
-                    CNMFViewer,
-                    export_plot,
-                    VArrayViewer,
-                    generate_videos,
-                    visualize_gmm_fit,
-                    visualize_motion,
-                    visualize_preprocess,
-                    visualize_seeds,
-                    visualize_spatial_update,
-                    visualize_temporal_update,
-                    write_video,
-                )
+                from minian.visualization import write_video
 
                 # Setup Dask cluster with env vars or auto-config
                 n_workers = int(

From 1a692a8e0504c94eac3e108d207b99114a28534b Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Wed, 4 Feb 2026 12:54:28 -0500
Subject: [PATCH 06/36] Update video pattern

---
 element_miniscope/miniscope.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/element_miniscope/miniscope.py b/element_miniscope/miniscope.py
index bd97c89..32d3261 100644
--- a/element_miniscope/miniscope.py
+++ b/element_miniscope/miniscope.py
@@ -1037,10 +1037,10 @@ def _run_processing():
                     logger.info("Loading videos...")
                     param_load_videos = params.get(
                         "param_load_videos",
-                        {"pattern": "r'.*\.avi$'", "downsample_strategy": "subset"},
+                        {"pattern": "r'Miniscope_video.avi$'", "downsample_strategy": "subset"},
                     )
                     video_array = load_videos(
-                        str(avi_files[0].parent), **param_load_videos
+                        str(pathlib.Path(avi_files[0]).parent), **param_load_videos
                     )
                     chunk_size, _ = get_optimal_chk(video_array, dtype=float)
                     video_array = video_array.chunk(

From 20ca3e67be505e2a4c525353ac943b1144796711 Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Wed, 4 Feb 2026 12:58:02 -0500
Subject: [PATCH 07/36] hardcode video pattern

---
 element_miniscope/miniscope.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/element_miniscope/miniscope.py b/element_miniscope/miniscope.py
index 32d3261..52dcf3f 100644
--- a/element_miniscope/miniscope.py
+++ b/element_miniscope/miniscope.py
@@ -1037,7 +1037,7 @@ def _run_processing():
                     logger.info("Loading videos...")
                     param_load_videos = params.get(
                         "param_load_videos",
-                        {"pattern": "r'Miniscope_video.avi$'", "downsample_strategy": "subset"},
+                        {"pattern": "Miniscope_video.avi", "downsample_strategy": "subset"},
                     )
                     video_array = load_videos(
                         str(pathlib.Path(avi_files[0]).parent), **param_load_videos

From 8cde0ac21ca7f58613c74218deadeb539420b64d Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Wed, 4 Feb 2026 13:07:13 -0500
Subject: [PATCH 08/36] set more conversative memory and worker count

---
 element_miniscope/miniscope.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/element_miniscope/miniscope.py b/element_miniscope/miniscope.py
index 52dcf3f..acd54b8 100644
--- a/element_miniscope/miniscope.py
+++ b/element_miniscope/miniscope.py
@@ -1002,11 +1002,11 @@ def _run_processing():
                 n_workers = int(
                     os.getenv(
                         "MINIAN_NWORKERS",
-                        max(1, int(multiprocessing.cpu_count() * 0.8)),
+                        max(1, int(multiprocessing.cpu_count() * 0.4)),
                     )
                 )
                 memory_total = psutil.virtual_memory().total
-                memory_per_worker = int(memory_total * 0.8 / n_workers / 1e9)
+                memory_per_worker = int(memory_total * 0.4 / n_workers / 1e9)
                 memory_limit = os.getenv(
                     "MINIAN_MEMORY_LIMIT", f"{memory_per_worker}GB"
                 )

From 1d8a00d0cd9c4a480e2409645d34ce46a61890d9 Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Wed, 4 Feb 2026 13:49:41 -0500
Subject: [PATCH 09/36] version pin distributed

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index aad3b79..4741da3 100644
--- a/setup.py
+++ b/setup.py
@@ -45,6 +45,7 @@
             "dask==2022.5.0",
             "xarray==2022.3.0",
             "pandas==1.5.3",
+            "distributed<2024.1.0"
             "minian @ git+https://github.com/kushalbakshi/minian.git",
         ]
     },

From 0b1f989ed46bc847660bc9e031d2a379a9547aa3 Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Wed, 4 Feb 2026 13:50:23 -0500
Subject: [PATCH 10/36] fix typo

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 4741da3..45347cd 100644
--- a/setup.py
+++ b/setup.py
@@ -45,7 +45,7 @@
             "dask==2022.5.0",
             "xarray==2022.3.0",
             "pandas==1.5.3",
-            "distributed<2024.1.0"
+            "distributed<2024.1.0",
             "minian @ git+https://github.com/kushalbakshi/minian.git",
         ]
     },

From 35b1a43d99688cd0f31400c8a71993bfc1f23ddc Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Wed, 4 Feb 2026 13:58:38 -0500
Subject: [PATCH 11/36] Version pin distributed

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 45347cd..4a054cb 100644
--- a/setup.py
+++ b/setup.py
@@ -45,7 +45,7 @@
             "dask==2022.5.0",
             "xarray==2022.3.0",
             "pandas==1.5.3",
-            "distributed<2024.1.0",
+            "distributed=2023.12.1",
             "minian @ git+https://github.com/kushalbakshi/minian.git",
         ]
     },

From 8d0fe23a8e72b5d9c87f719fce50760f810e9ddc Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Wed, 4 Feb 2026 13:59:48 -0500
Subject: [PATCH 12/36] version pin distributed to an even older version

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 4a054cb..a036b3b 100644
--- a/setup.py
+++ b/setup.py
@@ -45,7 +45,7 @@
             "dask==2022.5.0",
             "xarray==2022.3.0",
             "pandas==1.5.3",
-            "distributed=2023.12.1",
+            "distributed==2022.5.0",
             "minian @ git+https://github.com/kushalbakshi/minian.git",
         ]
     },

From 4ce9f96aecdcf4b06fe8fd60781b35b400f86875 Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Wed, 4 Feb 2026 14:52:28 -0500
Subject: [PATCH 13/36] Fix reading from environment variables

---
 element_miniscope/miniscope.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/element_miniscope/miniscope.py b/element_miniscope/miniscope.py
index acd54b8..a068a3a 100644
--- a/element_miniscope/miniscope.py
+++ b/element_miniscope/miniscope.py
@@ -1007,9 +1007,13 @@ def _run_processing():
                 )
                 memory_total = psutil.virtual_memory().total
                 memory_per_worker = int(memory_total * 0.4 / n_workers / 1e9)
-                memory_limit = os.getenv(
-                    "MINIAN_MEMORY_LIMIT", f"{memory_per_worker}GB"
-                )
+
+                memory_limit_env = os.getenv("MINIAN_MEMORY_LIMIT")
+                if memory_limit_env:
+                    # Add GB suffix if user provided just a number
+                    memory_limit = memory_limit_env if any(c.isalpha() for c in memory_limit_env) else f"{memory_limit_env}GB"
+                else:
+                    memory_limit = f"{memory_per_worker}GB"
 
                 # Set intermediate storage path
                 temp_path = str(output_dir / "intermediate")

From a770a02e95149033515fa11709371f19d6d71f07 Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Wed, 4 Feb 2026 18:06:46 -0500
Subject: [PATCH 14/36] fix loading videos

---
 element_miniscope/miniscope.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/element_miniscope/miniscope.py b/element_miniscope/miniscope.py
index a068a3a..2c84b23 100644
--- a/element_miniscope/miniscope.py
+++ b/element_miniscope/miniscope.py
@@ -1039,10 +1039,12 @@ def _run_processing():
 
                     # ===== LOAD VIDEOS =====
                     logger.info("Loading videos...")
-                    param_load_videos = params.get(
-                        "param_load_videos",
-                        {"pattern": "Miniscope_video.avi", "downsample_strategy": "subset"},
-                    )
+                    default_load_params = {
+                        "pattern": "Miniscope_video.avi",
+                        "dtype": "uint8",
+                        "downsample_strategy": "subset"
+                    }
+                    param_load_videos = {**default_load_params, **params.get("param_load_videos", {})}
                     video_array = load_videos(
                         str(pathlib.Path(avi_files[0]).parent), **param_load_videos
                     )

From 6bf56182c34fb478dcd55e6ca72b6251a888661b Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Wed, 4 Feb 2026 18:32:30 -0500
Subject: [PATCH 15/36] General fixes to minian implementation

---
 element_miniscope/miniscope.py | 395 ++++++++++++++++++++++-----------
 1 file changed, 269 insertions(+), 126 deletions(-)

diff --git a/element_miniscope/miniscope.py b/element_miniscope/miniscope.py
index 2c84b23..12d7ba9 100644
--- a/element_miniscope/miniscope.py
+++ b/element_miniscope/miniscope.py
@@ -989,7 +989,6 @@ def _run_processing():
                 )
                 from minian.motion_correction import apply_transform, estimate_motion
                 from minian.preprocessing import denoise, remove_background
-
                 from minian.utilities import (
                     TaskAnnotation,
                     get_optimal_chk,
@@ -997,23 +996,51 @@ def _run_processing():
                     save_minian,
                 )
                 from minian.visualization import write_video
+                
 
-                # Setup Dask cluster with env vars or auto-config
+                # ===== CONTAINER-AWARE MEMORY DETECTION =====
+                def get_container_memory_limit():
+                    """Get memory limit respecting container cgroups (v1 and v2)."""
+                    # Try cgroup v2 first (modern Docker/Kubernetes)
+                    try:
+                        with open("/sys/fs/cgroup/memory.max") as f:
+                            limit = f.read().strip()
+                            if limit != "max":
+                                return int(limit)
+                    except (FileNotFoundError, PermissionError):
+                        pass
+                    # Try cgroup v1 (older Docker)
+                    try:
+                        with open("/sys/fs/cgroup/memory/memory.limit_in_bytes") as f:
+                            limit = int(f.read().strip())
+                            # cgroup v1 returns a very large number if unlimited
+                            if limit < 9223372036854771712:
+                                return limit
+                    except (FileNotFoundError, PermissionError):
+                        pass
+                    # Fall back to psutil (bare metal or unrestricted container)
+                    return psutil.virtual_memory().total
+
+                # ===== DASK CLUSTER CONFIGURATION =====
+                memory_total = get_container_memory_limit()
                 n_workers = int(
                     os.getenv(
                         "MINIAN_NWORKERS",
-                        max(1, int(multiprocessing.cpu_count() * 0.4)),
+                        max(1, min(int(multiprocessing.cpu_count() * 0.4), 8)),  # Cap at 8 workers
                     )
                 )
-                memory_total = psutil.virtual_memory().total
-                memory_per_worker = int(memory_total * 0.4 / n_workers / 1e9)
+                # Reserve 20% for system overhead, distribute rest among workers
+                memory_per_worker = int(memory_total * 0.8 / n_workers)
 
                 memory_limit_env = os.getenv("MINIAN_MEMORY_LIMIT")
                 if memory_limit_env:
-                    # Add GB suffix if user provided just a number
-                    memory_limit = memory_limit_env if any(c.isalpha() for c in memory_limit_env) else f"{memory_limit_env}GB"
+                    memory_limit = (
+                        memory_limit_env
+                        if any(c.isalpha() for c in memory_limit_env)
+                        else f"{memory_limit_env}GB"
+                    )
                 else:
-                    memory_limit = f"{memory_per_worker}GB"
+                    memory_limit = f"{memory_per_worker // (1024**3)}GB"
 
                 # Set intermediate storage path
                 temp_path = str(output_dir / "intermediate")
@@ -1022,7 +1049,8 @@ def _run_processing():
 
                 # Start Dask cluster
                 logger.info(
-                    f"Starting Minian processing with {n_workers} workers, {memory_limit} memory limit..."
+                    f"Starting Minian processing with {n_workers} workers, "
+                    f"{memory_limit} memory limit per worker..."
                 )
                 cluster = LocalCluster(
                     n_workers=n_workers,
@@ -1036,46 +1064,54 @@ def _run_processing():
                 client = Client(cluster)
 
                 try:
-
                     # ===== LOAD VIDEOS =====
                     logger.info("Loading videos...")
                     default_load_params = {
-                        "pattern": "Miniscope_video.avi",
-                        "dtype": "uint8",
-                        "downsample_strategy": "subset"
+                        "pattern": r".*\.avi$",
+                        "dtype": np.uint8,
+                        "downsample": dict(frame=1, height=1, width=1),
+                        "downsample_strategy": "subset",
+                    }
+                    param_load_videos = {
+                        **default_load_params,
+                        **params.get("param_load_videos", {}),
                     }
-                    param_load_videos = {**default_load_params, **params.get("param_load_videos", {})}
                     video_array = load_videos(
                         str(pathlib.Path(avi_files[0]).parent), **param_load_videos
                     )
                     chunk_size, _ = get_optimal_chk(video_array, dtype=float)
-                    video_array = video_array.chunk(
-                        {"frame": chunk_size["frame"], "height": -1, "width": -1}
+                    video_array = save_minian(
+                        video_array.chunk(
+                            {"frame": chunk_size["frame"], "height": -1, "width": -1}
+                        ).rename("video_array"),
+                        temp_path,
+                        overwrite=True,
+                    )
+                    logger.info(
+                        f"Loaded video: {video_array.sizes['frame']} frames, "
+                        f"{video_array.sizes['height']}x{video_array.sizes['width']} pixels"
                     )
 
                     # ===== PREPROCESSING =====
-                    logger.info("Preprocessing: glow removal...")
-                    subset = None
-                    video_array_base_ref = video_array.sel(subset)
-                    video_min_removed = video_array_base_ref.min("frame").compute()
-                    video_array_ref = video_array_base_ref - video_min_removed
+                    logger.info("Preprocessing: glow removal (subtracting minimum)...")
+                    video_min = video_array.min("frame").compute()
+                    video_array_ref = video_array - video_min
+
                     logger.info("Preprocessing: denoising...")
                     param_denoise = params.get(
                         "param_denoise", {"method": "median", "ksize": 7}
                     )
-                    video_array_denoised = denoise(video_array_ref, **param_denoise)
+                    video_array_ref = denoise(video_array_ref, **param_denoise)
 
                     logger.info("Preprocessing: background removal...")
                     param_background_removal = params.get(
                         "param_background_removal", {"method": "tophat", "wnd": 10}
                     )
-                    video_array_bg_removed = remove_background(
-                        video_array_denoised, **param_background_removal
-                    )
+                    video_array_ref = remove_background(video_array_ref, **param_background_removal)
 
-                    logger.info("Preprocessing: saving pre-processed video...")
-                    video_array_preprocessed = save_minian(
-                        video_array_bg_removed.rename("video_array_preprocessed"),
+                    logger.info("Preprocessing: saving preprocessed video...")
+                    video_array_ref = save_minian(
+                        video_array_ref.rename("video_array_ref"),
                         dpath=temp_path,
                         overwrite=True,
                     )
@@ -1085,17 +1121,21 @@ def _run_processing():
                     param_estimate_motion = params.get(
                         "param_estimate_motion", {"dim": "frame"}
                     )
-                    motion = estimate_motion(
-                        video_array_preprocessed, **param_estimate_motion
-                    )
+                    motion = estimate_motion(video_array_ref, **param_estimate_motion)
                     motion = save_minian(
-                        motion.rename("motion"), dpath=temp_path, overwrite=True
+                        motion.rename("motion").chunk({"frame": chunk_size["frame"]}),
+                        dpath=temp_path,
+                        overwrite=True,
                     )
 
                     logger.info("Applying motion correction...")
-                    Y = apply_transform(video_array_preprocessed, motion, fill=0)
+                    Y = apply_transform(video_array_ref, motion, fill=0)
+
+                    # Save two versions with different chunking strategies
                     Y_fm_chk = save_minian(
-                        Y.astype(float).rename("Y_fm_chk"), temp_path, overwrite=True
+                        Y.astype(float).rename("Y_fm_chk"),
+                        temp_path,
+                        overwrite=True,
                     )
                     Y_hw_chk = save_minian(
                         Y_fm_chk.rename("Y_hw_chk"),
@@ -1107,50 +1147,75 @@ def _run_processing():
                             "width": chunk_size["width"],
                         },
                     )
-                    write_video(Y_fm_chk, "motion_corrected_movie.mp4", output_dir)
 
-                    logger.info("Motion Correction: Create and save max projection...")
-                    max_proj = Y_fm_chk.max("frame").compute()
+                    # Save motion corrected video
+                    logger.info("Saving motion corrected video...")
+                    write_video(Y_fm_chk, "motion_corrected.mp4", str(output_dir))
+
+                    # Create and save max projection
+                    logger.info("Creating max projection...")
                     max_proj = save_minian(
-                        max_proj.rename("max_proj"),
-                        **{"dpath": output_dir, "overwrite": True},
-                    )
+                        Y_fm_chk.max("frame").rename("max_proj"),
+                        dpath=str(output_dir),
+                        overwrite=True,
+                    ).compute()
 
-                    # ===== INITIALIZATION =====
+                    # ===== SEED INITIALIZATION =====
                     logger.info("Initializing seeds...")
-                    param_seeds_init = params.get("param_seeds_init", {})
+                    param_seeds_init = params.get(
+                        "param_seeds_init",
+                        {
+                            "wnd_size": 1000,
+                            "method": "rolling",
+                            "stp_size": 500,
+                            "max_wnd": 15,
+                            "diff_thres": 3,
+                        },
+                    )
                     seeds = seeds_init(Y_fm_chk, **param_seeds_init)
+                    logger.info(f"Initial seeds: {len(seeds)}")
 
                     logger.info("Refining seeds with PNR...")
-                    param_pnr_refine = params.get("param_pnr_refine", {})
+                    param_pnr_refine = params.get(
+                        "param_pnr_refine", {"noise_freq": 0.06, "thres": 1}
+                    )
                     seeds, pnr, gmm = pnr_refine(Y_hw_chk, seeds, **param_pnr_refine)
 
                     logger.info("Refining seeds with KS test...")
-                    param_ks_refine = params.get("param_ks_refine", {})
+                    param_ks_refine = params.get("param_ks_refine", {"sig": 0.05})
                     seeds = ks_refine(Y_hw_chk, seeds, **param_ks_refine)
 
                     logger.info("Merging seeds...")
-                    param_seeds_merge = params.get("param_seeds_merge", {})
-                    seeds_final = seeds[
-                        seeds["mask_ks"] & seeds["mask_pnr"]
-                    ].reset_index(drop=True)
+                    param_seeds_merge = params.get(
+                        "param_seeds_merge",
+                        {"thres_dist": 10, "thres_corr": 0.8, "noise_freq": 0.06},
+                    )
+                    seeds_final = seeds[seeds["mask_ks"] & seeds["mask_pnr"]].reset_index(
+                        drop=True
+                    )
                     seeds_final = seeds_merge(
                         Y_hw_chk, max_proj, seeds_final, **param_seeds_merge
                     )
+                    n_seeds = seeds_final["mask_mrg"].sum()
+                    logger.info(f"Seeds after filtering and merging: {n_seeds}")
 
-                    logger.info(
-                        "Initializing spatial footprints (A) and temporal traces (C)..."
+                    if n_seeds == 0:
+                        raise ValueError(
+                            "No seeds remaining after refinement. "
+                            "Consider adjusting param_seeds_init or param_pnr_refine thresholds."
+                        )
+
+                    # ===== INITIALIZE A AND C =====
+                    logger.info("Initializing spatial footprints (A)...")
+                    param_initialize = params.get(
+                        "param_initialize", {"thres_corr": 0.8, "wnd": 10, "noise_freq": 0.06}
                     )
-                    param_initialize = params.get("param_initialize", {})
                     A_init = initA(
-                        Y_hw_chk,
-                        seeds_final[seeds_final["mask_mrg"]],
-                        **param_initialize,
-                    )
-                    A_init = save_minian(
-                        A_init.rename("A_init"), temp_path, overwrite=True
+                        Y_hw_chk, seeds_final[seeds_final["mask_mrg"]], **param_initialize
                     )
+                    A_init = save_minian(A_init.rename("A_init"), temp_path, overwrite=True)
 
+                    logger.info("Initializing temporal traces (C)...")
                     C_init = initC(Y_fm_chk, A_init)
                     C_init = save_minian(
                         C_init.rename("C_init"),
@@ -1159,9 +1224,9 @@ def _run_processing():
                         chunks={"unit_id": 1, "frame": -1},
                     )
 
-                    # Initial merge
-                    logger.info("Merging initial components...")
-                    param_init_merge = params.get("param_init_merge", {})
+                    # Initial unit merge
+                    logger.info("Initial unit merge...")
+                    param_init_merge = params.get("param_init_merge", {"thres_corr": 0.8})
                     A, C = unit_merge(A_init, C_init, **param_init_merge)
                     A = save_minian(A.rename("A"), temp_path, overwrite=True)
                     C = save_minian(C.rename("C"), temp_path, overwrite=True)
@@ -1171,27 +1236,49 @@ def _run_processing():
                         overwrite=True,
                         chunks={"unit_id": -1, "frame": chunk_size["frame"]},
                     )
+                    logger.info(f"Units after initial merge: {A.sizes['unit_id']}")
 
-                    # ===== CNMF - FIRST ITERATION =====
-                    logger.info("CNMF: Computing noise statistics...")
-                    param_get_noise = params.get("param_get_noise", {})
+                    # ===== INITIALIZE BACKGROUND =====
+                    logger.info("Initializing background terms...")
+                    b, f = update_background(Y_fm_chk, A, C_chk)
+                    b = save_minian(b.rename("b"), temp_path, overwrite=True)
+                    f = save_minian(f.rename("f"), temp_path, overwrite=True)
+
+                    # ===== COMPUTE NOISE STATISTICS =====
+                    logger.info("Computing noise statistics...")
+                    param_get_noise = params.get("param_get_noise", {"noise_range": (0.06, 0.5)})
                     sn_spatial = get_noise_fft(Y_hw_chk, **param_get_noise)
                     sn_spatial = save_minian(
                         sn_spatial.rename("sn_spatial"), temp_path, overwrite=True
                     )
 
-                    # Initialize background
-                    logger.info("CNMF: Initializing background...")
-                    b, f = update_background(Y_fm_chk, A, C_chk)
-                    b = save_minian(b.rename("b"), temp_path, overwrite=True)
-                    f = save_minian(f.rename("f"), temp_path, overwrite=True)
+                    # ========================================
+                    # CNMF ITERATION 1
+                    # ========================================
 
-                    # First spatial update
-                    logger.info("CNMF: First spatial update...")
-                    param_first_spatial = params.get("param_first_spatial", {})
-                    A_new, mask_spatial = update_spatial(
+                    # ----- First Spatial Update -----
+                    logger.info("CNMF Iteration 1: Spatial update...")
+                    param_first_spatial = params.get(
+                        "param_first_spatial",
+                        {"dl_wnd": 5, "sparse_penal": 0.01, "size_thres": (25, None)},
+                    )
+                    A_new, mask, norm_fac = update_spatial(
                         Y_hw_chk, A, C, sn_spatial, **param_first_spatial
                     )
+
+                    # Apply mask and normalization to C (CRITICAL)
+                    C_new = save_minian(
+                        (C.sel(unit_id=mask) * norm_fac).rename("C_new"),
+                        temp_path,
+                        overwrite=True,
+                    )
+                    C_chk_new = save_minian(
+                        (C_chk.sel(unit_id=mask) * norm_fac).rename("C_chk_new"),
+                        temp_path,
+                        overwrite=True,
+                    )
+
+                    # Save updated A
                     A = save_minian(
                         A_new.rename("A"),
                         temp_path,
@@ -1199,19 +1286,43 @@ def _run_processing():
                         chunks={"unit_id": 1, "height": -1, "width": -1},
                     )
 
-                    # First temporal update
-                    logger.info("CNMF: First temporal update...")
-                    param_first_temporal = params.get("param_first_temporal", {})
-                    YrA = compute_trace(Y_fm_chk, A, b, C_chk, f)
+                    # Update background after spatial update
+                    b_new, f_new = update_background(Y_fm_chk, A, C_chk_new)
+                    b = save_minian(b_new.rename("b"), temp_path, overwrite=True)
+                    f = save_minian(
+                        f_new.chunk({"frame": chunk_size["frame"]}).rename("f"),
+                        temp_path,
+                        overwrite=True,
+                    )
+                    C = save_minian(C_new.rename("C"), temp_path, overwrite=True)
+                    C_chk = save_minian(C_chk_new.rename("C_chk"), temp_path, overwrite=True)
+
+                    logger.info(f"Units after first spatial update: {A.sizes['unit_id']}")
+
+                    # ----- First Temporal Update -----
+                    logger.info("CNMF Iteration 1: Temporal update...")
                     YrA = save_minian(
-                        YrA.rename("YrA"),
+                        compute_trace(Y_fm_chk, A, b, C_chk, f).rename("YrA"),
                         temp_path,
                         overwrite=True,
                         chunks={"unit_id": 1, "frame": -1},
                     )
-                    C_new, S_new, b0_new, c0_new, g, mask_temporal = update_temporal(
+
+                    param_first_temporal = params.get(
+                        "param_first_temporal",
+                        {
+                            "noise_freq": 0.06,
+                            "sparse_penal": 0.5,
+                            "p": 1,
+                            "add_lag": 20,
+                            "jac_thres": 0.2,
+                        },
+                    )
+                    C_new, S_new, b0_new, c0_new, g, mask = update_temporal(
                         A, C, YrA=YrA, **param_first_temporal
                     )
+
+                    # Save temporal results
                     C = save_minian(
                         C_new.rename("C").chunk({"unit_id": 1, "frame": -1}),
                         temp_path,
@@ -1239,12 +1350,20 @@ def _run_processing():
                         overwrite=True,
                     )
 
-                    # First merge
-                    logger.info("CNMF: Merging after first iteration...")
-                    param_first_merge = params.get("param_first_merge", {})
-                    A, C = unit_merge(A, C, **param_first_merge)
-                    A = save_minian(A.rename("A"), temp_path, overwrite=True)
-                    C = save_minian(C.rename("C"), temp_path, overwrite=True)
+                    # Sync A with C after temporal update drops units
+                    A = A.sel(unit_id=C.coords["unit_id"].values)
+
+                    logger.info(f"Units after first temporal update: {A.sizes['unit_id']}")
+
+                    # ----- First Merge -----
+                    logger.info("CNMF Iteration 1: Merging units...")
+                    param_first_merge = params.get("param_first_merge", {"thres_corr": 0.8})
+                    A_mrg, C_mrg, [sig_mrg] = unit_merge(
+                        A, C, [C + b0 + c0], **param_first_merge
+                    )
+
+                    A = save_minian(A_mrg.rename("A"), temp_path, overwrite=True)
+                    C = save_minian(C_mrg.rename("C"), temp_path, overwrite=True)
                     C_chk = save_minian(
                         C.rename("C_chk"),
                         temp_path,
@@ -1252,82 +1371,107 @@ def _run_processing():
                         chunks={"unit_id": -1, "frame": chunk_size["frame"]},
                     )
 
-                    # ===== CNMF - SECOND ITERATION =====
-                    # Second spatial update
-                    logger.info("CNMF: Second spatial update...")
-                    param_second_spatial = params.get("param_second_spatial", {})
-                    A_new, mask_spatial = update_spatial(
+                    logger.info(f"Units after first merge: {A.sizes['unit_id']}")
+
+                    # ========================================
+                    # CNMF ITERATION 2
+                    # ========================================
+
+                    # ----- Second Spatial Update -----
+                    logger.info("CNMF Iteration 2: Spatial update...")
+                    param_second_spatial = params.get(
+                        "param_second_spatial",
+                        {"dl_wnd": 5, "sparse_penal": 0.01, "size_thres": (25, None)},
+                    )
+                    A_new, mask, norm_fac = update_spatial(
                         Y_hw_chk, A, C, sn_spatial, **param_second_spatial
                     )
+
+                    # Apply mask and normalization to C
+                    C_new = save_minian(
+                        (C.sel(unit_id=mask) * norm_fac).rename("C_new"),
+                        temp_path,
+                        overwrite=True,
+                    )
+                    C_chk_new = save_minian(
+                        (C_chk.sel(unit_id=mask) * norm_fac).rename("C_chk_new"),
+                        temp_path,
+                        overwrite=True,
+                    )
+
+                    # Save updated A
                     A = save_minian(
                         A_new.rename("A"),
                         temp_path,
                         overwrite=True,
                         chunks={"unit_id": 1, "height": -1, "width": -1},
                     )
-                    b_new, f_new = update_background(Y_fm_chk, A, C_chk)
+
+                    # Update background
+                    b_new, f_new = update_background(Y_fm_chk, A, C_chk_new)
                     b = save_minian(b_new.rename("b"), temp_path, overwrite=True)
                     f = save_minian(
                         f_new.chunk({"frame": chunk_size["frame"]}).rename("f"),
                         temp_path,
                         overwrite=True,
                     )
+                    C = save_minian(C_new.rename("C"), temp_path, overwrite=True)
+                    C_chk = save_minian(C_chk_new.rename("C_chk"), temp_path, overwrite=True)
+
+                    logger.info(f"Units after second spatial update: {A.sizes['unit_id']}")
 
-                    # Second temporal update
-                    logger.info("CNMF: Second temporal update...")
-                    param_second_temporal = params.get("param_second_temporal", {})
-                    YrA = compute_trace(Y_fm_chk, A, b, C_chk, f)
+                    # ----- Second Temporal Update -----
+                    logger.info("CNMF Iteration 2: Temporal update...")
                     YrA = save_minian(
-                        YrA.rename("YrA"),
+                        compute_trace(Y_fm_chk, A, b, C_chk, f).rename("YrA"),
                         temp_path,
                         overwrite=True,
                         chunks={"unit_id": 1, "frame": -1},
                     )
-                    C_new, S_new, b0_new, c0_new, g, mask_temporal = update_temporal(
+
+                    param_second_temporal = params.get(
+                        "param_second_temporal",
+                        {
+                            "noise_freq": 0.06,
+                            "sparse_penal": 0.5,
+                            "p": 1,
+                            "add_lag": 20,
+                            "jac_thres": 0.2,
+                        },
+                    )
+                    C_new, S_new, b0_new, c0_new, g, mask = update_temporal(
                         A, C, YrA=YrA, **param_second_temporal
                     )
 
-                    # Apply temporal mask to keep only valid units
+                    # Sync A with final C
                     A = A.sel(unit_id=C_new.coords["unit_id"].values)
 
+                    logger.info(f"Final units after second temporal update: {A.sizes['unit_id']}")
+
                     # ===== SAVE FINAL RESULTS =====
-                    logger.info("Saving final results...")
-                    A = save_minian(
-                        A.rename("A"), **{"dpath": output_dir, "overwrite": True}
-                    )
-                    C = save_minian(
-                        C_new.rename("C"), **{"dpath": output_dir, "overwrite": True}
-                    )
-                    S = save_minian(
-                        S_new.rename("S"), **{"dpath": output_dir, "overwrite": True}
-                    )
-                    c0 = save_minian(
-                        c0_new.rename("c0"), **{"dpath": output_dir, "overwrite": True}
-                    )
-                    b0 = save_minian(
-                        b0_new.rename("b0"), **{"dpath": output_dir, "overwrite": True}
-                    )
-                    b = save_minian(
-                        b_new.rename("b"), **{"dpath": output_dir, "overwrite": True}
-                    )
-                    f = save_minian(
-                        f_new.rename("f"), **{"dpath": output_dir, "overwrite": True}
-                    )
-                    motion = save_minian(
-                        motion.rename("motion"),
-                        **{"dpath": output_dir, "overwrite": True},
-                    )
+                    logger.info("Saving final results to output directory...")
+                    final_save_params = {"dpath": str(output_dir), "overwrite": True}
+
+                    A = save_minian(A.rename("A"), **final_save_params)
+                    C = save_minian(C_new.rename("C"), **final_save_params)
+                    S = save_minian(S_new.rename("S"), **final_save_params)
+                    c0 = save_minian(c0_new.rename("c0"), **final_save_params)
+                    b0 = save_minian(b0_new.rename("b0"), **final_save_params)
+                    b = save_minian(b_new.rename("b"), **final_save_params)
+                    f = save_minian(f_new.rename("f"), **final_save_params)
+                    motion = save_minian(motion.rename("motion"), **final_save_params)
+
                     logger.info(
                         f"Minian processing complete. {A.sizes['unit_id']} units detected."
                     )
+
                 except Exception as e:
-                    client.close()
-                    cluster.close()
                     logger.error(f"Minian processing failed: {e}")
                     raise e
                 finally:
                     client.close()
                     cluster.close()
+
                 # Load results and prepare for insertion
                 minian_loader = MinianLoader(output_dir)
                 key["processing_time"] = minian_loader.creation_time
@@ -1335,7 +1479,6 @@ def _run_processing():
                 # Get minian version if available
                 try:
                     import minian
-
                     key["package_version"] = getattr(minian, "__version__", "")
                 except (ImportError, AttributeError):
                     key["package_version"] = ""

From 92ce84d087f30f555f77708de6840e81f42489ca Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Thu, 5 Feb 2026 18:46:23 -0500
Subject: [PATCH 16/36] Updates to minian routine after testing

---
 element_miniscope/miniscope.py | 364 +++++++++++++++------------------
 1 file changed, 165 insertions(+), 199 deletions(-)

diff --git a/element_miniscope/miniscope.py b/element_miniscope/miniscope.py
index 12d7ba9..dca8836 100644
--- a/element_miniscope/miniscope.py
+++ b/element_miniscope/miniscope.py
@@ -970,9 +970,75 @@ def _run_processing():
             elif method == "minian":
                 import multiprocessing
                 import psutil
+                import shutil
                 from dask.distributed import Client, LocalCluster
+
+                # ===== APPLY COMPATIBILITY PATCHES =====
+                # Fix for NetworkX 3.0+ API changes
+                import networkx as nx
+                import scipy.sparse
+                from scipy.sparse import issparse
+                import minian.cnmf as cnmf_module
+
+                def label_connected_fixed(adj, only_connected=False):
+                    """Fixed label_connected for NetworkX 3.0+ compatibility."""
+                    if issparse(adj):
+                        adj = adj.toarray()
+                    adj = adj.copy()
+                    np.fill_diagonal(adj, 0)
+                    adj = np.triu(adj)
+                    g = nx.from_numpy_array(adj)
+                    labels = np.zeros(adj.shape[0], dtype=int)
+                    for icomp, comp in enumerate(nx.connected_components(g)):
+                        for node in comp:
+                            labels[node] = icomp
+                    if only_connected:
+                        iso_mask = np.array([len(c) == 1 for c in nx.connected_components(g)])
+                        labels[np.isin(labels, np.where(iso_mask)[0])] = -1
+                    return labels
+
+                cnmf_module.label_connected = label_connected_fixed
+
+                # Fix for sparse array auto-densification
+                import sparse.numba_backend._sparse_array as sparse_mod
+                sparse_mod.AUTO_DENSIFY = True
+
+                # Patch update_temporal to handle sparse.COO arrays
+                _original_update_temporal = cnmf_module.update_temporal
+
+                def patched_update_temporal(A, C, b=None, f=None, Y=None, YrA=None, 
+                                            noise_freq=0.25, p=2, add_lag="p", jac_thres=0.1, 
+                                            sparse_penal=1, bseg=None, med_wd=None, 
+                                            zero_thres=1e-8, max_iters=200, use_smooth=True, 
+                                            normalize=True, warm_start=False, post_scal=False, 
+                                            scs_fallback=False, concurrent_update=False):
+                    """Patched update_temporal that handles sparse.COO arrays properly."""
+                    _original_csc_matrix = scipy.sparse.csc_matrix
+
+                    class PatchedCSCMatrix(scipy.sparse.csc_matrix):
+                        def __new__(cls, arg1, shape=None, dtype=None, copy=False):
+                            if hasattr(arg1, 'todense'):
+                                arg1 = arg1.todense()
+                            return _original_csc_matrix(arg1, shape=shape, dtype=dtype, copy=copy)
+
+                    scipy.sparse.csc_matrix = PatchedCSCMatrix
+                    try:
+                        result = _original_update_temporal(
+                            A, C, b=b, f=f, Y=Y, YrA=YrA, noise_freq=noise_freq,
+                            p=p, add_lag=add_lag, jac_thres=jac_thres, sparse_penal=sparse_penal,
+                            bseg=bseg, med_wd=med_wd, zero_thres=zero_thres, max_iters=max_iters,
+                            use_smooth=use_smooth, normalize=normalize, warm_start=warm_start,
+                            post_scal=post_scal, scs_fallback=scs_fallback, 
+                            concurrent_update=concurrent_update
+                        )
+                    finally:
+                        scipy.sparse.csc_matrix = _original_csc_matrix
+                    return result
+
+                cnmf_module.update_temporal = patched_update_temporal
+
+                # Now import minian modules (after patches are applied)
                 from minian.cnmf import (
-                    compute_trace,
                     get_noise_fft,
                     unit_merge,
                     update_spatial,
@@ -996,12 +1062,10 @@ def _run_processing():
                     save_minian,
                 )
                 from minian.visualization import write_video
-                
 
                 # ===== CONTAINER-AWARE MEMORY DETECTION =====
                 def get_container_memory_limit():
                     """Get memory limit respecting container cgroups (v1 and v2)."""
-                    # Try cgroup v2 first (modern Docker/Kubernetes)
                     try:
                         with open("/sys/fs/cgroup/memory.max") as f:
                             limit = f.read().strip()
@@ -1009,16 +1073,13 @@ def get_container_memory_limit():
                                 return int(limit)
                     except (FileNotFoundError, PermissionError):
                         pass
-                    # Try cgroup v1 (older Docker)
                     try:
                         with open("/sys/fs/cgroup/memory/memory.limit_in_bytes") as f:
                             limit = int(f.read().strip())
-                            # cgroup v1 returns a very large number if unlimited
                             if limit < 9223372036854771712:
                                 return limit
                     except (FileNotFoundError, PermissionError):
                         pass
-                    # Fall back to psutil (bare metal or unrestricted container)
                     return psutil.virtual_memory().total
 
                 # ===== DASK CLUSTER CONFIGURATION =====
@@ -1026,10 +1087,9 @@ def get_container_memory_limit():
                 n_workers = int(
                     os.getenv(
                         "MINIAN_NWORKERS",
-                        max(1, min(int(multiprocessing.cpu_count() * 0.4), 8)),  # Cap at 8 workers
+                        max(1, min(int(multiprocessing.cpu_count() * 0.4), 8)),
                     )
                 )
-                # Reserve 20% for system overhead, distribute rest among workers
                 memory_per_worker = int(memory_total * 0.8 / n_workers)
 
                 memory_limit_env = os.getenv("MINIAN_MEMORY_LIMIT")
@@ -1047,6 +1107,14 @@ def get_container_memory_limit():
                 os.makedirs(temp_path, exist_ok=True)
                 os.environ["MINIAN_INTERMEDIATE"] = temp_path
 
+                # Helper function to clean intermediate files
+                def clean_intermediate_files(file_names):
+                    """Remove intermediate zarr files to avoid conflicts."""
+                    for fname in file_names:
+                        path = os.path.join(temp_path, f"{fname}.zarr")
+                        if os.path.exists(path):
+                            shutil.rmtree(path)
+
                 # Start Dask cluster
                 logger.info(
                     f"Starting Minian processing with {n_workers} workers, "
@@ -1057,7 +1125,7 @@ def get_container_memory_limit():
                     memory_limit=memory_limit,
                     resources={"MEM": 1},
                     threads_per_worker=2,
-                    dashboard_address=None,  # Disable dashboard in automated pipeline
+                    dashboard_address=None,
                 )
                 annotation_plugin = TaskAnnotation()
                 cluster.scheduler.add_plugin(annotation_plugin)
@@ -1076,89 +1144,83 @@ def get_container_memory_limit():
                         **default_load_params,
                         **params.get("param_load_videos", {}),
                     }
-                    video_array = load_videos(
+                    varr = load_videos(
                         str(pathlib.Path(avi_files[0]).parent), **param_load_videos
                     )
-                    chunk_size, _ = get_optimal_chk(video_array, dtype=float)
-                    video_array = save_minian(
-                        video_array.chunk(
-                            {"frame": chunk_size["frame"], "height": -1, "width": -1}
-                        ).rename("video_array"),
+                    chk, _ = get_optimal_chk(varr, dtype=float)
+
+                    # Save raw video to zarr
+                    logger.info("Saving raw video to zarr...")
+                    varr = save_minian(
+                        varr.chunk({"frame": chk["frame"], "height": -1, "width": -1}).rename("varr"),
                         temp_path,
                         overwrite=True,
                     )
                     logger.info(
-                        f"Loaded video: {video_array.sizes['frame']} frames, "
-                        f"{video_array.sizes['height']}x{video_array.sizes['width']} pixels"
+                        f"Loaded video: {varr.sizes['frame']} frames, "
+                        f"{varr.sizes['height']}x{varr.sizes['width']} pixels"
                     )
 
                     # ===== PREPROCESSING =====
-                    logger.info("Preprocessing: glow removal (subtracting minimum)...")
-                    video_min = video_array.min("frame").compute()
-                    video_array_ref = video_array - video_min
+                    logger.info("Preprocessing: glow removal...")
+                    varr_min = varr.min("frame").compute()
+                    varr_ref = varr - varr_min
+                    varr_ref = varr_ref.clip(min=0)
 
                     logger.info("Preprocessing: denoising...")
                     param_denoise = params.get(
                         "param_denoise", {"method": "median", "ksize": 7}
                     )
-                    video_array_ref = denoise(video_array_ref, **param_denoise)
+                    varr_ref = denoise(varr_ref, **param_denoise)
 
                     logger.info("Preprocessing: background removal...")
                     param_background_removal = params.get(
                         "param_background_removal", {"method": "tophat", "wnd": 10}
                     )
-                    video_array_ref = remove_background(video_array_ref, **param_background_removal)
+                    varr_ref = remove_background(varr_ref, **param_background_removal)
 
-                    logger.info("Preprocessing: saving preprocessed video...")
-                    video_array_ref = save_minian(
-                        video_array_ref.rename("video_array_ref"),
-                        dpath=temp_path,
-                        overwrite=True,
-                    )
+                    # Keep it chunked
+                    varr_ref = varr_ref.chunk({"frame": chk["frame"], "height": -1, "width": -1})
 
                     # ===== MOTION CORRECTION =====
                     logger.info("Estimating motion...")
                     param_estimate_motion = params.get(
                         "param_estimate_motion", {"dim": "frame"}
                     )
-                    motion = estimate_motion(video_array_ref, **param_estimate_motion)
+                    motion = estimate_motion(varr_ref, **param_estimate_motion)
                     motion = save_minian(
-                        motion.rename("motion").chunk({"frame": chunk_size["frame"]}),
-                        dpath=temp_path,
+                        motion.rename("motion").chunk({"frame": chk["frame"]}),
+                        temp_path,
                         overwrite=True,
                     )
 
                     logger.info("Applying motion correction...")
-                    Y = apply_transform(video_array_ref, motion, fill=0)
+                    Y = apply_transform(varr_ref, motion, fill=0)
 
-                    # Save two versions with different chunking strategies
+                    # Save two versions with different chunking
+                    logger.info("Saving motion-corrected video (frame-chunked)...")
                     Y_fm_chk = save_minian(
                         Y.astype(float).rename("Y_fm_chk"),
                         temp_path,
                         overwrite=True,
                     )
+
+                    logger.info("Saving motion-corrected video (spatial-chunked)...")
                     Y_hw_chk = save_minian(
                         Y_fm_chk.rename("Y_hw_chk"),
                         temp_path,
                         overwrite=True,
-                        chunks={
-                            "frame": -1,
-                            "height": chunk_size["height"],
-                            "width": chunk_size["width"],
-                        },
+                        chunks={"frame": -1, "height": chk["height"], "width": chk["width"]},
                     )
 
-                    # Save motion corrected video
-                    logger.info("Saving motion corrected video...")
+                    # Save motion corrected video as mp4
+                    logger.info("Writing motion corrected video...")
                     write_video(Y_fm_chk, "motion_corrected.mp4", str(output_dir))
 
                     # Create and save max projection
-                    logger.info("Creating max projection...")
-                    max_proj = save_minian(
-                        Y_fm_chk.max("frame").rename("max_proj"),
-                        dpath=str(output_dir),
-                        overwrite=True,
-                    ).compute()
+                    logger.info("Computing max projection...")
+                    max_proj = Y_fm_chk.max("frame").compute()
+                    max_proj = save_minian(max_proj.rename("max_proj"), temp_path, overwrite=True)
 
                     # ===== SEED INITIALIZATION =====
                     logger.info("Initializing seeds...")
@@ -1180,24 +1242,22 @@ def get_container_memory_limit():
                         "param_pnr_refine", {"noise_freq": 0.06, "thres": 1}
                     )
                     seeds, pnr, gmm = pnr_refine(Y_hw_chk, seeds, **param_pnr_refine)
+                    logger.info(f"Seeds after PNR refine: {seeds['mask_pnr'].sum()} / {len(seeds)}")
 
                     logger.info("Refining seeds with KS test...")
                     param_ks_refine = params.get("param_ks_refine", {"sig": 0.05})
                     seeds = ks_refine(Y_hw_chk, seeds, **param_ks_refine)
+                    logger.info(f"Seeds after KS refine: {seeds['mask_ks'].sum()} / {len(seeds)}")
 
                     logger.info("Merging seeds...")
                     param_seeds_merge = params.get(
                         "param_seeds_merge",
                         {"thres_dist": 10, "thres_corr": 0.8, "noise_freq": 0.06},
                     )
-                    seeds_final = seeds[seeds["mask_ks"] & seeds["mask_pnr"]].reset_index(
-                        drop=True
-                    )
-                    seeds_final = seeds_merge(
-                        Y_hw_chk, max_proj, seeds_final, **param_seeds_merge
-                    )
+                    seeds_final = seeds[seeds["mask_ks"] & seeds["mask_pnr"]].reset_index(drop=True)
+                    seeds_final = seeds_merge(Y_hw_chk, max_proj, seeds_final, **param_seeds_merge)
                     n_seeds = seeds_final["mask_mrg"].sum()
-                    logger.info(f"Seeds after filtering and merging: {n_seeds}")
+                    logger.info(f"Seeds after merge: {n_seeds} / {len(seeds_final)}")
 
                     if n_seeds == 0:
                         raise ValueError(
@@ -1210,10 +1270,9 @@ def get_container_memory_limit():
                     param_initialize = params.get(
                         "param_initialize", {"thres_corr": 0.8, "wnd": 10, "noise_freq": 0.06}
                     )
-                    A_init = initA(
-                        Y_hw_chk, seeds_final[seeds_final["mask_mrg"]], **param_initialize
-                    )
+                    A_init = initA(Y_hw_chk, seeds_final[seeds_final["mask_mrg"]], **param_initialize)
                     A_init = save_minian(A_init.rename("A_init"), temp_path, overwrite=True)
+                    logger.info(f"A_init shape: {A_init.shape}")
 
                     logger.info("Initializing temporal traces (C)...")
                     C_init = initC(Y_fm_chk, A_init)
@@ -1223,6 +1282,7 @@ def get_container_memory_limit():
                         overwrite=True,
                         chunks={"unit_id": 1, "frame": -1},
                     )
+                    logger.info(f"C_init shape: {C_init.shape}")
 
                     # Initial unit merge
                     logger.info("Initial unit merge...")
@@ -1234,39 +1294,39 @@ def get_container_memory_limit():
                         C.rename("C_chk"),
                         temp_path,
                         overwrite=True,
-                        chunks={"unit_id": -1, "frame": chunk_size["frame"]},
+                        chunks={"unit_id": -1, "frame": chk["frame"]},
                     )
                     logger.info(f"Units after initial merge: {A.sizes['unit_id']}")
 
                     # ===== INITIALIZE BACKGROUND =====
                     logger.info("Initializing background terms...")
                     b, f = update_background(Y_fm_chk, A, C_chk)
-                    b = save_minian(b.rename("b"), temp_path, overwrite=True)
                     f = save_minian(f.rename("f"), temp_path, overwrite=True)
+                    b = save_minian(b.rename("b"), temp_path, overwrite=True)
+                    logger.info(f"Background initialized - b: {b.shape}, f: {f.shape}")
 
                     # ===== COMPUTE NOISE STATISTICS =====
                     logger.info("Computing noise statistics...")
                     param_get_noise = params.get("param_get_noise", {"noise_range": (0.06, 0.5)})
                     sn_spatial = get_noise_fft(Y_hw_chk, **param_get_noise)
-                    sn_spatial = save_minian(
-                        sn_spatial.rename("sn_spatial"), temp_path, overwrite=True
-                    )
+                    sn_spatial = save_minian(sn_spatial.rename("sn_spatial"), temp_path, overwrite=True)
 
                     # ========================================
                     # CNMF ITERATION 1
                     # ========================================
 
+                    # Clean intermediate files that might conflict
+                    clean_intermediate_files(["C_new", "S_new", "b0_new", "c0_new", "g", "YrA"])
+
                     # ----- First Spatial Update -----
                     logger.info("CNMF Iteration 1: Spatial update...")
                     param_first_spatial = params.get(
                         "param_first_spatial",
-                        {"dl_wnd": 5, "sparse_penal": 0.01, "size_thres": (25, None)},
+                        {"dl_wnd": 10, "sparse_penal": 0.01, "size_thres": (25, None)},
                     )
                     A_new, mask, norm_fac = update_spatial(
                         Y_hw_chk, A, C, sn_spatial, **param_first_spatial
                     )
-
-                    # Apply mask and normalization to C (CRITICAL)
                     C_new = save_minian(
                         (C.sel(unit_id=mask) * norm_fac).rename("C_new"),
                         temp_path,
@@ -1277,192 +1337,98 @@ def get_container_memory_limit():
                         temp_path,
                         overwrite=True,
                     )
+                    logger.info(f"Units after first spatial update: {A_new.sizes['unit_id']}")
 
-                    # Save updated A
-                    A = save_minian(
-                        A_new.rename("A"),
-                        temp_path,
-                        overwrite=True,
-                        chunks={"unit_id": 1, "height": -1, "width": -1},
-                    )
-
-                    # Update background after spatial update
-                    b_new, f_new = update_background(Y_fm_chk, A, C_chk_new)
-                    b = save_minian(b_new.rename("b"), temp_path, overwrite=True)
-                    f = save_minian(
-                        f_new.chunk({"frame": chunk_size["frame"]}).rename("f"),
-                        temp_path,
-                        overwrite=True,
-                    )
-                    C = save_minian(C_new.rename("C"), temp_path, overwrite=True)
-                    C_chk = save_minian(C_chk_new.rename("C_chk"), temp_path, overwrite=True)
-
-                    logger.info(f"Units after first spatial update: {A.sizes['unit_id']}")
+                    # Update background after first spatial
+                    logger.info("Updating background...")
+                    b_new, f_new = update_background(Y_fm_chk, A_new, C_chk_new)
 
                     # ----- First Temporal Update -----
                     logger.info("CNMF Iteration 1: Temporal update...")
-                    YrA = save_minian(
-                        compute_trace(Y_fm_chk, A, b, C_chk, f).rename("YrA"),
-                        temp_path,
-                        overwrite=True,
-                        chunks={"unit_id": 1, "frame": -1},
-                    )
-
                     param_first_temporal = params.get(
                         "param_first_temporal",
                         {
                             "noise_freq": 0.06,
-                            "sparse_penal": 0.5,
+                            "sparse_penal": 1,
                             "p": 1,
                             "add_lag": 20,
                             "jac_thres": 0.2,
                         },
                     )
                     C_new, S_new, b0_new, c0_new, g, mask = update_temporal(
-                        A, C, YrA=YrA, **param_first_temporal
+                        A_new, C_new,
+                        Y=Y_fm_chk,
+                        b=b_new, f=f_new,
+                        **param_first_temporal
                     )
-
-                    # Save temporal results
-                    C = save_minian(
-                        C_new.rename("C").chunk({"unit_id": 1, "frame": -1}),
-                        temp_path,
-                        overwrite=True,
-                    )
-                    C_chk = save_minian(
-                        C.rename("C_chk"),
-                        temp_path,
-                        overwrite=True,
-                        chunks={"unit_id": -1, "frame": chunk_size["frame"]},
-                    )
-                    S = save_minian(
-                        S_new.rename("S").chunk({"unit_id": 1, "frame": -1}),
-                        temp_path,
-                        overwrite=True,
-                    )
-                    b0 = save_minian(
-                        b0_new.rename("b0").chunk({"unit_id": 1, "frame": -1}),
-                        temp_path,
-                        overwrite=True,
-                    )
-                    c0 = save_minian(
-                        c0_new.rename("c0").chunk({"unit_id": 1, "frame": -1}),
-                        temp_path,
-                        overwrite=True,
-                    )
-
-                    # Sync A with C after temporal update drops units
-                    A = A.sel(unit_id=C.coords["unit_id"].values)
-
-                    logger.info(f"Units after first temporal update: {A.sizes['unit_id']}")
+                    logger.info(f"Units after first temporal update: {C_new.sizes['unit_id']}")
 
                     # ----- First Merge -----
                     logger.info("CNMF Iteration 1: Merging units...")
                     param_first_merge = params.get("param_first_merge", {"thres_corr": 0.8})
-                    A_mrg, C_mrg, [sig_mrg] = unit_merge(
-                        A, C, [C + b0 + c0], **param_first_merge
+                    A_mrg, C_mrg, [S_mrg] = unit_merge(
+                        A_new.sel(unit_id=mask), C_new, [S_new], **param_first_merge
                     )
-
-                    A = save_minian(A_mrg.rename("A"), temp_path, overwrite=True)
-                    C = save_minian(C_mrg.rename("C"), temp_path, overwrite=True)
-                    C_chk = save_minian(
-                        C.rename("C_chk"),
-                        temp_path,
-                        overwrite=True,
-                        chunks={"unit_id": -1, "frame": chunk_size["frame"]},
-                    )
-
-                    logger.info(f"Units after first merge: {A.sizes['unit_id']}")
+                    logger.info(f"Units after first merge: {A_mrg.sizes['unit_id']}")
 
                     # ========================================
                     # CNMF ITERATION 2
                     # ========================================
 
+                    # Clean intermediate files before second iteration
+                    clean_intermediate_files(["C_new", "S_new", "b0_new", "c0_new", "g", "YrA"])
+
                     # ----- Second Spatial Update -----
                     logger.info("CNMF Iteration 2: Spatial update...")
                     param_second_spatial = params.get(
                         "param_second_spatial",
-                        {"dl_wnd": 5, "sparse_penal": 0.01, "size_thres": (25, None)},
+                        {"dl_wnd": 10, "sparse_penal": 0.01, "size_thres": (25, None)},
                     )
-                    A_new, mask, norm_fac = update_spatial(
-                        Y_hw_chk, A, C, sn_spatial, **param_second_spatial
+                    A_new2, mask2, norm_fac2 = update_spatial(
+                        Y_hw_chk, A_mrg, C_mrg, sn_spatial, **param_second_spatial
                     )
+                    C_new2 = C_mrg.sel(unit_id=mask2) * norm_fac2
+                    logger.info(f"Units after second spatial update: {A_new2.sizes['unit_id']}")
 
-                    # Apply mask and normalization to C
-                    C_new = save_minian(
-                        (C.sel(unit_id=mask) * norm_fac).rename("C_new"),
-                        temp_path,
-                        overwrite=True,
-                    )
-                    C_chk_new = save_minian(
-                        (C_chk.sel(unit_id=mask) * norm_fac).rename("C_chk_new"),
-                        temp_path,
-                        overwrite=True,
-                    )
-
-                    # Save updated A
-                    A = save_minian(
-                        A_new.rename("A"),
-                        temp_path,
-                        overwrite=True,
-                        chunks={"unit_id": 1, "height": -1, "width": -1},
-                    )
-
-                    # Update background
-                    b_new, f_new = update_background(Y_fm_chk, A, C_chk_new)
-                    b = save_minian(b_new.rename("b"), temp_path, overwrite=True)
-                    f = save_minian(
-                        f_new.chunk({"frame": chunk_size["frame"]}).rename("f"),
-                        temp_path,
-                        overwrite=True,
-                    )
-                    C = save_minian(C_new.rename("C"), temp_path, overwrite=True)
-                    C_chk = save_minian(C_chk_new.rename("C_chk"), temp_path, overwrite=True)
-
-                    logger.info(f"Units after second spatial update: {A.sizes['unit_id']}")
+                    # Update background after second spatial
+                    logger.info("Updating background...")
+                    b_new2, f_new2 = update_background(Y_fm_chk, A_new2, C_new2)
 
                     # ----- Second Temporal Update -----
                     logger.info("CNMF Iteration 2: Temporal update...")
-                    YrA = save_minian(
-                        compute_trace(Y_fm_chk, A, b, C_chk, f).rename("YrA"),
-                        temp_path,
-                        overwrite=True,
-                        chunks={"unit_id": 1, "frame": -1},
-                    )
-
                     param_second_temporal = params.get(
                         "param_second_temporal",
                         {
                             "noise_freq": 0.06,
-                            "sparse_penal": 0.5,
+                            "sparse_penal": 1,
                             "p": 1,
                             "add_lag": 20,
-                            "jac_thres": 0.2,
+                            "jac_thres": 0.4,
                         },
                     )
-                    C_new, S_new, b0_new, c0_new, g, mask = update_temporal(
-                        A, C, YrA=YrA, **param_second_temporal
+                    C_final, S_final, b0_final, c0_final, g_final, mask_final = update_temporal(
+                        A_new2, C_new2,
+                        Y=Y_fm_chk,
+                        b=b_new2, f=f_new2,
+                        **param_second_temporal
                     )
-
-                    # Sync A with final C
-                    A = A.sel(unit_id=C_new.coords["unit_id"].values)
-
-                    logger.info(f"Final units after second temporal update: {A.sizes['unit_id']}")
+                    A_final = A_new2.sel(unit_id=mask_final)
+                    logger.info(f"Final units: {A_final.sizes['unit_id']}")
 
                     # ===== SAVE FINAL RESULTS =====
                     logger.info("Saving final results to output directory...")
                     final_save_params = {"dpath": str(output_dir), "overwrite": True}
 
-                    A = save_minian(A.rename("A"), **final_save_params)
-                    C = save_minian(C_new.rename("C"), **final_save_params)
-                    S = save_minian(S_new.rename("S"), **final_save_params)
-                    c0 = save_minian(c0_new.rename("c0"), **final_save_params)
-                    b0 = save_minian(b0_new.rename("b0"), **final_save_params)
-                    b = save_minian(b_new.rename("b"), **final_save_params)
-                    f = save_minian(f_new.rename("f"), **final_save_params)
-                    motion = save_minian(motion.rename("motion"), **final_save_params)
+                    A_final = save_minian(A_final.rename("A"), **final_save_params)
+                    C_final = save_minian(C_final.rename("C"), **final_save_params)
+                    S_final = save_minian(S_final.rename("S"), **final_save_params)
+                    b_final = save_minian(b_new2.rename("b"), **final_save_params)
+                    f_final = save_minian(f_new2.rename("f"), **final_save_params)
+                    motion_final = save_minian(motion.rename("motion"), **final_save_params)
+                    max_proj_final = save_minian(max_proj.rename("max_proj"), **final_save_params)
 
                     logger.info(
-                        f"Minian processing complete. {A.sizes['unit_id']} units detected."
+                        f"Minian processing complete. {A_final.sizes['unit_id']} units detected."
                     )
 
                 except Exception as e:

From 443d0d40d851d603d3d85cbb5d0353f7d4376d77 Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Thu, 5 Feb 2026 18:47:24 -0500
Subject: [PATCH 17/36] Remove file cleanup for now

---
 element_miniscope/miniscope.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/element_miniscope/miniscope.py b/element_miniscope/miniscope.py
index dca8836..335fd6f 100644
--- a/element_miniscope/miniscope.py
+++ b/element_miniscope/miniscope.py
@@ -1315,9 +1315,6 @@ def clean_intermediate_files(file_names):
                     # CNMF ITERATION 1
                     # ========================================
 
-                    # Clean intermediate files that might conflict
-                    clean_intermediate_files(["C_new", "S_new", "b0_new", "c0_new", "g", "YrA"])
-
                     # ----- First Spatial Update -----
                     logger.info("CNMF Iteration 1: Spatial update...")
                     param_first_spatial = params.get(
@@ -1375,9 +1372,6 @@ def clean_intermediate_files(file_names):
                     # CNMF ITERATION 2
                     # ========================================
 
-                    # Clean intermediate files before second iteration
-                    clean_intermediate_files(["C_new", "S_new", "b0_new", "c0_new", "g", "YrA"])
-
                     # ----- Second Spatial Update -----
                     logger.info("CNMF Iteration 2: Spatial update...")
                     param_second_spatial = params.get(

From 1620e0559a086bfce7d3f31a8935616ff204b359 Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Thu, 5 Feb 2026 22:40:11 -0500
Subject: [PATCH 18/36] patch compability for mixed array types

---
 element_miniscope/miniscope.py | 45 ++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/element_miniscope/miniscope.py b/element_miniscope/miniscope.py
index 335fd6f..9dcb8b0 100644
--- a/element_miniscope/miniscope.py
+++ b/element_miniscope/miniscope.py
@@ -971,9 +971,11 @@ def _run_processing():
                 import multiprocessing
                 import psutil
                 import shutil
+                import dask.array as darr
                 from dask.distributed import Client, LocalCluster
 
                 # ===== APPLY COMPATIBILITY PATCHES =====
+                
                 # Fix for NetworkX 3.0+ API changes
                 import networkx as nx
                 import scipy.sparse
@@ -1000,9 +1002,50 @@ def label_connected_fixed(adj, only_connected=False):
                 cnmf_module.label_connected = label_connected_fixed
 
                 # Fix for sparse array auto-densification
+                import sparse
                 import sparse.numba_backend._sparse_array as sparse_mod
                 sparse_mod.AUTO_DENSIFY = True
 
+                # Fix for darr.block with mixed sparse array types
+                _original_darr_block = darr.block
+
+                def patched_darr_block(arrays, allow_unknown_chunksizes=False):
+                    """Patched darr.block that ensures sparse array type consistency."""
+                    def convert_to_coo(arr):
+                        if arr is None:
+                            return arr
+                        if isinstance(arr, sparse.COO):
+                            return arr
+                        if isinstance(arr, sparse.SparseArray):
+                            return sparse.COO(arr)
+                        if isinstance(arr, np.ndarray):
+                            return sparse.COO.from_numpy(arr)
+                        if hasattr(arr, 'todense'):
+                            return sparse.COO.from_numpy(np.asarray(arr.todense()))
+                        return arr
+
+                    def recursive_convert(obj):
+                        if isinstance(obj, list):
+                            return [recursive_convert(item) for item in obj]
+                        elif isinstance(obj, np.ndarray) and obj.dtype == object:
+                            result = np.empty_like(obj)
+                            for idx in np.ndindex(obj.shape):
+                                result[idx] = convert_to_coo(obj[idx])
+                            return result
+                        else:
+                            return convert_to_coo(obj)
+
+                    try:
+                        return _original_darr_block(arrays, allow_unknown_chunksizes=allow_unknown_chunksizes)
+                    except ValueError as e:
+                        if "All arrays must be instances of SparseArray" in str(e):
+                            converted = recursive_convert(arrays)
+                            return _original_darr_block(converted, allow_unknown_chunksizes=allow_unknown_chunksizes)
+                        raise
+
+                darr.block = patched_darr_block
+                darr.core.block = patched_darr_block
+
                 # Patch update_temporal to handle sparse.COO arrays
                 _original_update_temporal = cnmf_module.update_temporal
 
@@ -1037,6 +1080,8 @@ def __new__(cls, arg1, shape=None, dtype=None, copy=False):
 
                 cnmf_module.update_temporal = patched_update_temporal
 
+                logger.info("Applied minian compatibility patches (NetworkX, sparse arrays, dask.block)")
+
                 # Now import minian modules (after patches are applied)
                 from minian.cnmf import (
                     get_noise_fft,

From f821da825fb9b77c055dd0009c283047a002ef97 Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Thu, 5 Feb 2026 22:53:32 -0500
Subject: [PATCH 19/36] patch sparse array error pt2

---
 element_miniscope/miniscope.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/element_miniscope/miniscope.py b/element_miniscope/miniscope.py
index 9dcb8b0..8262c58 100644
--- a/element_miniscope/miniscope.py
+++ b/element_miniscope/miniscope.py
@@ -1046,6 +1046,30 @@ def recursive_convert(obj):
                 darr.block = patched_darr_block
                 darr.core.block = patched_darr_block
 
+                # ===== PATCH: sparse/dense concatenation compatibility =====
+                import sparse
+                import sparse.numba_backend._coo.common as _sparse_coo_common
+                import numpy as np
+
+                _original_sparse_concat = _sparse_coo_common.concatenate
+
+                def _patched_sparse_concat(arrays, axis=0):
+                    """Convert any dense arrays to sparse.COO before concatenation."""
+                    converted = []
+                    for arr in arrays:
+                        if isinstance(arr, sparse.SparseArray):
+                            converted.append(arr)
+                        elif isinstance(arr, np.ndarray):
+                            converted.append(sparse.COO.from_numpy(arr))
+                        else:
+                            try:
+                                converted.append(sparse.COO.from_numpy(np.asarray(arr)))
+                            except Exception:
+                                converted.append(arr)
+                    return _original_sparse_concat(converted, axis=axis)
+
+                _sparse_coo_common.concatenate = _patched_sparse_concat
+
                 # Patch update_temporal to handle sparse.COO arrays
                 _original_update_temporal = cnmf_module.update_temporal
 

From 07cd10492e337abd16a98d6a251ce511c984c662 Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Thu, 5 Feb 2026 23:15:31 -0500
Subject: [PATCH 20/36] patch sparse array issue pt3

---
 element_miniscope/miniscope.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/element_miniscope/miniscope.py b/element_miniscope/miniscope.py
index 8262c58..742aa5e 100644
--- a/element_miniscope/miniscope.py
+++ b/element_miniscope/miniscope.py
@@ -1070,6 +1070,21 @@ def _patched_sparse_concat(arrays, axis=0):
 
                 _sparse_coo_common.concatenate = _patched_sparse_concat
 
+                # ===== PATCH: sparse/dense concatenation compatibility =====
+                import sparse
+                import numpy as np
+                import sparse.numba_backend._coo.common as _sparse_coo_common
+
+                _original_check = _sparse_coo_common.check_consistent_fill_value
+
+                def _patched_check(arrays):
+                    """Auto-convert any dense arrays to sparse.COO before validation."""
+                    for i, arr in enumerate(arrays):
+                        if not isinstance(arr, sparse.SparseArray):
+                            arrays[i] = sparse.COO.from_numpy(np.asarray(arr))
+                    return _original_check(arrays)
+
+                _sparse_coo_common.check_consistent_fill_value = _patched_check
                 # Patch update_temporal to handle sparse.COO arrays
                 _original_update_temporal = cnmf_module.update_temporal
 

From 90f19a8ab30066e46d64487e2053e323c40f7aea Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Fri, 6 Feb 2026 12:07:13 -0500
Subject: [PATCH 21/36] Create new intermediate dirs for cnmf and update env

---
 element_miniscope/miniscope.py | 52 ++++++++++++++++++++--------------
 1 file changed, 31 insertions(+), 21 deletions(-)

diff --git a/element_miniscope/miniscope.py b/element_miniscope/miniscope.py
index 742aa5e..e19f351 100644
--- a/element_miniscope/miniscope.py
+++ b/element_miniscope/miniscope.py
@@ -1186,16 +1186,20 @@ def get_container_memory_limit():
                 else:
                     memory_limit = f"{memory_per_worker // (1024**3)}GB"
 
-                # Set intermediate storage path
-                temp_path = str(output_dir / "intermediate")
-                os.makedirs(temp_path, exist_ok=True)
-                os.environ["MINIAN_INTERMEDIATE"] = temp_path
+                # Set minian intermediate storage paths
+                pre_cnmf_path = str(output_dir / "pre_cnmf")
+                first_cnmf_path = str(output_dir / "first_cnmf")
+                second_cnmf_path = str(output_dir / "second_cnmf")
+                os.makedirs(pre_cnmf_path, exist_ok=True)
+                os.makedirs(first_cnmf_path, exist_ok=True)
+                os.makedirs(second_cnmf_path, exist_ok=True)
+                os.environ["MINIAN_INTERMEDIATE"] = pre_cnmf_path
 
                 # Helper function to clean intermediate files
-                def clean_intermediate_files(file_names):
+                def clean_intermediate_files(directory_path, file_names):
                     """Remove intermediate zarr files to avoid conflicts."""
                     for fname in file_names:
-                        path = os.path.join(temp_path, f"{fname}.zarr")
+                        path = os.path.join(directory_path, f"{fname}.zarr")
                         if os.path.exists(path):
                             shutil.rmtree(path)
 
@@ -1237,7 +1241,7 @@ def clean_intermediate_files(file_names):
                     logger.info("Saving raw video to zarr...")
                     varr = save_minian(
                         varr.chunk({"frame": chk["frame"], "height": -1, "width": -1}).rename("varr"),
-                        temp_path,
+                        pre_cnmf_path,
                         overwrite=True,
                     )
                     logger.info(
@@ -1274,7 +1278,7 @@ def clean_intermediate_files(file_names):
                     motion = estimate_motion(varr_ref, **param_estimate_motion)
                     motion = save_minian(
                         motion.rename("motion").chunk({"frame": chk["frame"]}),
-                        temp_path,
+                        pre_cnmf_path,
                         overwrite=True,
                     )
 
@@ -1285,14 +1289,14 @@ def clean_intermediate_files(file_names):
                     logger.info("Saving motion-corrected video (frame-chunked)...")
                     Y_fm_chk = save_minian(
                         Y.astype(float).rename("Y_fm_chk"),
-                        temp_path,
+                        pre_cnmf_path,
                         overwrite=True,
                     )
 
                     logger.info("Saving motion-corrected video (spatial-chunked)...")
                     Y_hw_chk = save_minian(
                         Y_fm_chk.rename("Y_hw_chk"),
-                        temp_path,
+                        pre_cnmf_path,
                         overwrite=True,
                         chunks={"frame": -1, "height": chk["height"], "width": chk["width"]},
                     )
@@ -1304,7 +1308,7 @@ def clean_intermediate_files(file_names):
                     # Create and save max projection
                     logger.info("Computing max projection...")
                     max_proj = Y_fm_chk.max("frame").compute()
-                    max_proj = save_minian(max_proj.rename("max_proj"), temp_path, overwrite=True)
+                    max_proj = save_minian(max_proj.rename("max_proj"), pre_cnmf_path, overwrite=True)
 
                     # ===== SEED INITIALIZATION =====
                     logger.info("Initializing seeds...")
@@ -1355,14 +1359,14 @@ def clean_intermediate_files(file_names):
                         "param_initialize", {"thres_corr": 0.8, "wnd": 10, "noise_freq": 0.06}
                     )
                     A_init = initA(Y_hw_chk, seeds_final[seeds_final["mask_mrg"]], **param_initialize)
-                    A_init = save_minian(A_init.rename("A_init"), temp_path, overwrite=True)
+                    A_init = save_minian(A_init.rename("A_init"), pre_cnmf_path, overwrite=True)
                     logger.info(f"A_init shape: {A_init.shape}")
 
                     logger.info("Initializing temporal traces (C)...")
                     C_init = initC(Y_fm_chk, A_init)
                     C_init = save_minian(
                         C_init.rename("C_init"),
-                        temp_path,
+                        pre_cnmf_path,
                         overwrite=True,
                         chunks={"unit_id": 1, "frame": -1},
                     )
@@ -1372,11 +1376,11 @@ def clean_intermediate_files(file_names):
                     logger.info("Initial unit merge...")
                     param_init_merge = params.get("param_init_merge", {"thres_corr": 0.8})
                     A, C = unit_merge(A_init, C_init, **param_init_merge)
-                    A = save_minian(A.rename("A"), temp_path, overwrite=True)
-                    C = save_minian(C.rename("C"), temp_path, overwrite=True)
+                    A = save_minian(A.rename("A"), pre_cnmf_path, overwrite=True)
+                    C = save_minian(C.rename("C"), pre_cnmf_path, overwrite=True)
                     C_chk = save_minian(
                         C.rename("C_chk"),
-                        temp_path,
+                        pre_cnmf_path,
                         overwrite=True,
                         chunks={"unit_id": -1, "frame": chk["frame"]},
                     )
@@ -1385,19 +1389,23 @@ def clean_intermediate_files(file_names):
                     # ===== INITIALIZE BACKGROUND =====
                     logger.info("Initializing background terms...")
                     b, f = update_background(Y_fm_chk, A, C_chk)
-                    f = save_minian(f.rename("f"), temp_path, overwrite=True)
-                    b = save_minian(b.rename("b"), temp_path, overwrite=True)
+                    f = save_minian(f.rename("f"), pre_cnmf_path, overwrite=True)
+                    b = save_minian(b.rename("b"), pre_cnmf_path, overwrite=True)
                     logger.info(f"Background initialized - b: {b.shape}, f: {f.shape}")
 
                     # ===== COMPUTE NOISE STATISTICS =====
                     logger.info("Computing noise statistics...")
                     param_get_noise = params.get("param_get_noise", {"noise_range": (0.06, 0.5)})
                     sn_spatial = get_noise_fft(Y_hw_chk, **param_get_noise)
-                    sn_spatial = save_minian(sn_spatial.rename("sn_spatial"), temp_path, overwrite=True)
+                    sn_spatial = save_minian(sn_spatial.rename("sn_spatial"), pre_cnmf_path, overwrite=True)
 
                     # ========================================
                     # CNMF ITERATION 1
                     # ========================================
+                    # clean_intermediate_files(["C_new", "S_new", "b0_new", "c0_new",
+                    # "g", "YrA"])
+                    
+                    os.environ["MINIAN_INTERMEDIATE"] = first_cnmf_path
 
                     # ----- First Spatial Update -----
                     logger.info("CNMF Iteration 1: Spatial update...")
@@ -1410,12 +1418,12 @@ def clean_intermediate_files(file_names):
                     )
                     C_new = save_minian(
                         (C.sel(unit_id=mask) * norm_fac).rename("C_new"),
-                        temp_path,
+                        first_cnmf_path,
                         overwrite=True,
                     )
                     C_chk_new = save_minian(
                         (C_chk.sel(unit_id=mask) * norm_fac).rename("C_chk_new"),
-                        temp_path,
+                        first_cnmf_path,
                         overwrite=True,
                     )
                     logger.info(f"Units after first spatial update: {A_new.sizes['unit_id']}")
@@ -1456,6 +1464,8 @@ def clean_intermediate_files(file_names):
                     # CNMF ITERATION 2
                     # ========================================
 
+                    os.environ["MINIAN_INTERMEDIATE"] = second_cnmf_path
+                    
                     # ----- Second Spatial Update -----
                     logger.info("CNMF Iteration 2: Spatial update...")
                     param_second_spatial = params.get(

From f88560564f88819f4e22560887f1b68e91c3bd5b Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Fri, 6 Feb 2026 12:18:37 -0500
Subject: [PATCH 22/36] Fix minian intermediate

---
 element_miniscope/miniscope.py | 44 ++++++++++++++--------------------
 1 file changed, 18 insertions(+), 26 deletions(-)

diff --git a/element_miniscope/miniscope.py b/element_miniscope/miniscope.py
index e19f351..5ec3139 100644
--- a/element_miniscope/miniscope.py
+++ b/element_miniscope/miniscope.py
@@ -1187,13 +1187,9 @@ def get_container_memory_limit():
                     memory_limit = f"{memory_per_worker // (1024**3)}GB"
 
                 # Set minian intermediate storage paths
-                pre_cnmf_path = str(output_dir / "pre_cnmf")
-                first_cnmf_path = str(output_dir / "first_cnmf")
-                second_cnmf_path = str(output_dir / "second_cnmf")
-                os.makedirs(pre_cnmf_path, exist_ok=True)
-                os.makedirs(first_cnmf_path, exist_ok=True)
-                os.makedirs(second_cnmf_path, exist_ok=True)
-                os.environ["MINIAN_INTERMEDIATE"] = pre_cnmf_path
+                minian_data_path = str(output_dir / "minian_data")
+                os.makedirs(minian_data_path, exist_ok=True)
+                os.environ["MINIAN_INTERMEDIATE"] = minian_data_path
 
                 # Helper function to clean intermediate files
                 def clean_intermediate_files(directory_path, file_names):
@@ -1241,7 +1237,7 @@ def clean_intermediate_files(directory_path, file_names):
                     logger.info("Saving raw video to zarr...")
                     varr = save_minian(
                         varr.chunk({"frame": chk["frame"], "height": -1, "width": -1}).rename("varr"),
-                        pre_cnmf_path,
+                        minian_data_path,
                         overwrite=True,
                     )
                     logger.info(
@@ -1278,7 +1274,7 @@ def clean_intermediate_files(directory_path, file_names):
                     motion = estimate_motion(varr_ref, **param_estimate_motion)
                     motion = save_minian(
                         motion.rename("motion").chunk({"frame": chk["frame"]}),
-                        pre_cnmf_path,
+                        minian_data_path,
                         overwrite=True,
                     )
 
@@ -1289,14 +1285,14 @@ def clean_intermediate_files(directory_path, file_names):
                     logger.info("Saving motion-corrected video (frame-chunked)...")
                     Y_fm_chk = save_minian(
                         Y.astype(float).rename("Y_fm_chk"),
-                        pre_cnmf_path,
+                        minian_data_path,
                         overwrite=True,
                     )
 
                     logger.info("Saving motion-corrected video (spatial-chunked)...")
                     Y_hw_chk = save_minian(
                         Y_fm_chk.rename("Y_hw_chk"),
-                        pre_cnmf_path,
+                        minian_data_path,
                         overwrite=True,
                         chunks={"frame": -1, "height": chk["height"], "width": chk["width"]},
                     )
@@ -1308,7 +1304,7 @@ def clean_intermediate_files(directory_path, file_names):
                     # Create and save max projection
                     logger.info("Computing max projection...")
                     max_proj = Y_fm_chk.max("frame").compute()
-                    max_proj = save_minian(max_proj.rename("max_proj"), pre_cnmf_path, overwrite=True)
+                    max_proj = save_minian(max_proj.rename("max_proj"), minian_data_path, overwrite=True)
 
                     # ===== SEED INITIALIZATION =====
                     logger.info("Initializing seeds...")
@@ -1359,14 +1355,14 @@ def clean_intermediate_files(directory_path, file_names):
                         "param_initialize", {"thres_corr": 0.8, "wnd": 10, "noise_freq": 0.06}
                     )
                     A_init = initA(Y_hw_chk, seeds_final[seeds_final["mask_mrg"]], **param_initialize)
-                    A_init = save_minian(A_init.rename("A_init"), pre_cnmf_path, overwrite=True)
+                    A_init = save_minian(A_init.rename("A_init"), minian_data_path, overwrite=True)
                     logger.info(f"A_init shape: {A_init.shape}")
 
                     logger.info("Initializing temporal traces (C)...")
                     C_init = initC(Y_fm_chk, A_init)
                     C_init = save_minian(
                         C_init.rename("C_init"),
-                        pre_cnmf_path,
+                        minian_data_path,
                         overwrite=True,
                         chunks={"unit_id": 1, "frame": -1},
                     )
@@ -1376,11 +1372,11 @@ def clean_intermediate_files(directory_path, file_names):
                     logger.info("Initial unit merge...")
                     param_init_merge = params.get("param_init_merge", {"thres_corr": 0.8})
                     A, C = unit_merge(A_init, C_init, **param_init_merge)
-                    A = save_minian(A.rename("A"), pre_cnmf_path, overwrite=True)
-                    C = save_minian(C.rename("C"), pre_cnmf_path, overwrite=True)
+                    A = save_minian(A.rename("A"), minian_data_path, overwrite=True)
+                    C = save_minian(C.rename("C"), minian_data_path, overwrite=True)
                     C_chk = save_minian(
                         C.rename("C_chk"),
-                        pre_cnmf_path,
+                        minian_data_path,
                         overwrite=True,
                         chunks={"unit_id": -1, "frame": chk["frame"]},
                     )
@@ -1389,15 +1385,15 @@ def clean_intermediate_files(directory_path, file_names):
                     # ===== INITIALIZE BACKGROUND =====
                     logger.info("Initializing background terms...")
                     b, f = update_background(Y_fm_chk, A, C_chk)
-                    f = save_minian(f.rename("f"), pre_cnmf_path, overwrite=True)
-                    b = save_minian(b.rename("b"), pre_cnmf_path, overwrite=True)
+                    f = save_minian(f.rename("f"), minian_data_path, overwrite=True)
+                    b = save_minian(b.rename("b"), minian_data_path, overwrite=True)
                     logger.info(f"Background initialized - b: {b.shape}, f: {f.shape}")
 
                     # ===== COMPUTE NOISE STATISTICS =====
                     logger.info("Computing noise statistics...")
                     param_get_noise = params.get("param_get_noise", {"noise_range": (0.06, 0.5)})
                     sn_spatial = get_noise_fft(Y_hw_chk, **param_get_noise)
-                    sn_spatial = save_minian(sn_spatial.rename("sn_spatial"), pre_cnmf_path, overwrite=True)
+                    sn_spatial = save_minian(sn_spatial.rename("sn_spatial"), minian_data_path, overwrite=True)
 
                     # ========================================
                     # CNMF ITERATION 1
@@ -1405,8 +1401,6 @@ def clean_intermediate_files(directory_path, file_names):
                     # clean_intermediate_files(["C_new", "S_new", "b0_new", "c0_new",
                     # "g", "YrA"])
                     
-                    os.environ["MINIAN_INTERMEDIATE"] = first_cnmf_path
-
                     # ----- First Spatial Update -----
                     logger.info("CNMF Iteration 1: Spatial update...")
                     param_first_spatial = params.get(
@@ -1418,12 +1412,12 @@ def clean_intermediate_files(directory_path, file_names):
                     )
                     C_new = save_minian(
                         (C.sel(unit_id=mask) * norm_fac).rename("C_new"),
-                        first_cnmf_path,
+                        minian_data_path,
                         overwrite=True,
                     )
                     C_chk_new = save_minian(
                         (C_chk.sel(unit_id=mask) * norm_fac).rename("C_chk_new"),
-                        first_cnmf_path,
+                        minian_data_path,
                         overwrite=True,
                     )
                     logger.info(f"Units after first spatial update: {A_new.sizes['unit_id']}")
@@ -1463,8 +1457,6 @@ def clean_intermediate_files(directory_path, file_names):
                     # ========================================
                     # CNMF ITERATION 2
                     # ========================================
-
-                    os.environ["MINIAN_INTERMEDIATE"] = second_cnmf_path
                     
                     # ----- Second Spatial Update -----
                     logger.info("CNMF Iteration 2: Spatial update...")

From c601e1cc3e88d30bd73c2a585cd10aabcbd06155 Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Wed, 11 Feb 2026 17:45:00 -0500
Subject: [PATCH 23/36] Update minian data path

---
 element_miniscope/miniscope.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/element_miniscope/miniscope.py b/element_miniscope/miniscope.py
index 5ec3139..3c0b2ed 100644
--- a/element_miniscope/miniscope.py
+++ b/element_miniscope/miniscope.py
@@ -1519,7 +1519,7 @@ def clean_intermediate_files(directory_path, file_names):
                     cluster.close()
 
                 # Load results and prepare for insertion
-                minian_loader = MinianLoader(output_dir)
+                minian_loader = MinianLoader(minian_data_path)
                 key["processing_time"] = minian_loader.creation_time
 
                 # Get minian version if available

From 36db0f14c22165ddea3e6558d2a10b06414f1e61 Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Thu, 12 Feb 2026 09:36:56 -0500
Subject: [PATCH 24/36] Fix mismatched units

---
 element_miniscope/miniscope.py | 31 +++++++++++++++++++++++++------
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/element_miniscope/miniscope.py b/element_miniscope/miniscope.py
index 3c0b2ed..e3d9ce5 100644
--- a/element_miniscope/miniscope.py
+++ b/element_miniscope/miniscope.py
@@ -1398,9 +1398,10 @@ def clean_intermediate_files(directory_path, file_names):
                     # ========================================
                     # CNMF ITERATION 1
                     # ========================================
-                    # clean_intermediate_files(["C_new", "S_new", "b0_new", "c0_new",
-                    # "g", "YrA"])
-                    
+
+                    # Clean intermediate files that might conflict
+                    clean_intermediate_files(["C_new", "S_new", "b0_new", "c0_new", "g", "YrA"])
+
                     # ----- First Spatial Update -----
                     logger.info("CNMF Iteration 1: Spatial update...")
                     param_first_spatial = params.get(
@@ -1449,15 +1450,29 @@ def clean_intermediate_files(directory_path, file_names):
                     # ----- First Merge -----
                     logger.info("CNMF Iteration 1: Merging units...")
                     param_first_merge = params.get("param_first_merge", {"thres_corr": 0.8})
+
+                    # <<< PATCHED: unit_merge coordinate fix
+                    # update_temporal returns C_new/S_new already filtered by mask internally.
+                    # Sync A with C_new's actual coordinates instead of using the boolean mask,
+                    # which can cause alignment mismatches on large datasets.
+                    A_filtered = A_new.sel(unit_id=C_new.coords["unit_id"].values)
+                    logger.info(
+                        f"Unit sync check — A: {A_filtered.sizes['unit_id']}, "
+                        f"C: {C_new.sizes['unit_id']}, S: {S_new.sizes['unit_id']}"
+                    )
                     A_mrg, C_mrg, [S_mrg] = unit_merge(
-                        A_new.sel(unit_id=mask), C_new, [S_new], **param_first_merge
+                        A_filtered, C_new, [S_new], **param_first_merge
                     )
+                    # >>> END PATCH
                     logger.info(f"Units after first merge: {A_mrg.sizes['unit_id']}")
 
                     # ========================================
                     # CNMF ITERATION 2
                     # ========================================
-                    
+
+                    # Clean intermediate files before second iteration
+                    clean_intermediate_files(["C_new", "S_new", "b0_new", "c0_new", "g", "YrA"])
+
                     # ----- Second Spatial Update -----
                     logger.info("CNMF Iteration 2: Spatial update...")
                     param_second_spatial = params.get(
@@ -1492,7 +1507,11 @@ def clean_intermediate_files(directory_path, file_names):
                         b=b_new2, f=f_new2,
                         **param_second_temporal
                     )
-                    A_final = A_new2.sel(unit_id=mask_final)
+
+                    # <<< PATCHED: unit_merge coordinate fix (same pattern as iteration 1)
+                    # Sync A with C_final's actual coordinates
+                    A_final = A_new2.sel(unit_id=C_final.coords["unit_id"].values)
+                    # >>> END PATCH
                     logger.info(f"Final units: {A_final.sizes['unit_id']}")
 
                     # ===== SAVE FINAL RESULTS =====

From 0299a55235daade59e2ab139eeb321c66c24dc91 Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Thu, 12 Feb 2026 10:13:51 -0500
Subject: [PATCH 25/36] Remove intermediate file clean up

---
 element_miniscope/miniscope.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/element_miniscope/miniscope.py b/element_miniscope/miniscope.py
index e3d9ce5..f66fb40 100644
--- a/element_miniscope/miniscope.py
+++ b/element_miniscope/miniscope.py
@@ -1399,9 +1399,6 @@ def clean_intermediate_files(directory_path, file_names):
                     # CNMF ITERATION 1
                     # ========================================
 
-                    # Clean intermediate files that might conflict
-                    clean_intermediate_files(["C_new", "S_new", "b0_new", "c0_new", "g", "YrA"])
-
                     # ----- First Spatial Update -----
                     logger.info("CNMF Iteration 1: Spatial update...")
                     param_first_spatial = params.get(
@@ -1470,9 +1467,6 @@ def clean_intermediate_files(directory_path, file_names):
                     # CNMF ITERATION 2
                     # ========================================
 
-                    # Clean intermediate files before second iteration
-                    clean_intermediate_files(["C_new", "S_new", "b0_new", "c0_new", "g", "YrA"])
-
                     # ----- Second Spatial Update -----
                     logger.info("CNMF Iteration 2: Spatial update...")
                     param_second_spatial = params.get(

From 568e5f2d44188c75a10b99f9018cedeeef03224d Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Thu, 12 Feb 2026 20:52:58 -0500
Subject: [PATCH 26/36] santize arrays to avoid nans and add failure flag

---
 element_miniscope/miniscope.py | 61 ++++++++++++++++++++++++++++++++--
 1 file changed, 59 insertions(+), 2 deletions(-)

diff --git a/element_miniscope/miniscope.py b/element_miniscope/miniscope.py
index f66fb40..768d94d 100644
--- a/element_miniscope/miniscope.py
+++ b/element_miniscope/miniscope.py
@@ -971,9 +971,19 @@ def _run_processing():
                 import multiprocessing
                 import psutil
                 import shutil
+                import dask
                 import dask.array as darr
+                import scipy.linalg                          # <<< NEW: for solve_triangular patch
+                import xarray as xr                          # <<< NEW: for sanitize_array
                 from dask.distributed import Client, LocalCluster
 
+                # <<< NEW: Prevent infinite retry loops on NaN/Inf errors >>>
+                # If a Dask task fails (e.g. solve_triangular ValueError), don't
+                # keep retrying until OOM — fail fast after 3 attempts.
+                dask.config.set({
+                    "distributed.scheduler.allowed-failures": 3,
+                })
+
                 # ===== APPLY COMPATIBILITY PATCHES =====
                 
                 # Fix for NetworkX 3.0+ API changes
@@ -1119,7 +1129,20 @@ def __new__(cls, arg1, shape=None, dtype=None, copy=False):
 
                 cnmf_module.update_temporal = patched_update_temporal
 
-                logger.info("Applied minian compatibility patches (NetworkX, sparse arrays, dask.block)")
+                # <<< NEW PATCH: scipy.linalg.solve_triangular NaN/Inf guard >>>
+                # Prevents ValueError('array must not contain infs or NaNs') from
+                # crashing Dask tasks and triggering infinite retry → OOM loops.
+                _original_scipy_solve_tri = scipy.linalg.solve_triangular
+
+                def patched_solve_triangular(a, b, **kwargs):
+                    """Sanitize NaN/Inf before calling solve_triangular."""
+                    a = np.nan_to_num(a, nan=0.0, posinf=0.0, neginf=0.0)
+                    b = np.nan_to_num(b, nan=0.0, posinf=0.0, neginf=0.0)
+                    return _original_scipy_solve_tri(a, b, **kwargs)
+
+                scipy.linalg.solve_triangular = patched_solve_triangular
+
+                logger.info("Applied minian compatibility patches (NetworkX, sparse arrays, dask.block, solve_triangular NaN guard)")
 
                 # Now import minian modules (after patches are applied)
                 from minian.cnmf import (
@@ -1199,6 +1222,29 @@ def clean_intermediate_files(directory_path, file_names):
                         if os.path.exists(path):
                             shutil.rmtree(path)
 
+                def sanitize_array(arr, name="array"):
+                    """Replace NaN/Inf in an xarray DataArray with 0.
+                    Works with both lazy (dask-backed) and in-memory arrays."""
+                    logger.info(f"Sanitizing {name} (replacing NaN/Inf with 0)...")
+                    if hasattr(arr.data, "dask"):
+                        sanitized = xr.apply_ufunc(
+                            lambda x: np.nan_to_num(
+                                x, nan=0.0, posinf=0.0, neginf=0.0
+                            ),
+                            arr,
+                            dask="parallelized",
+                            output_dtypes=[arr.dtype],
+                        )
+                    else:
+                        sanitized = xr.DataArray(
+                            np.nan_to_num(
+                                arr.values, nan=0.0, posinf=0.0, neginf=0.0
+                            ),
+                            coords=arr.coords,
+                            dims=arr.dims,
+                        )
+                    return sanitized
+
                 # Start Dask cluster
                 logger.info(
                     f"Starting Minian processing with {n_workers} workers, "
@@ -1281,6 +1327,14 @@ def clean_intermediate_files(directory_path, file_names):
                     logger.info("Applying motion correction...")
                     Y = apply_transform(varr_ref, motion, fill=0)
 
+                    # <<< NEW: Sanitize after motion correction >>>
+                    # apply_transform can introduce NaN at frame borders where the
+                    # shift goes beyond the image boundary. The fill=0 param should
+                    # handle this, but edge cases slip through on large datasets
+                    # with float64 precision. This prevents downstream
+                    # solve_triangular crashes in CNMF.
+                    Y = sanitize_array(Y, "Y_post_motion_correction")
+
                     # Save two versions with different chunking
                     logger.info("Saving motion-corrected video (frame-chunked)...")
                     Y_fm_chk = save_minian(
@@ -1443,7 +1497,8 @@ def clean_intermediate_files(directory_path, file_names):
                         **param_first_temporal
                     )
                     logger.info(f"Units after first temporal update: {C_new.sizes['unit_id']}")
-
+                    C_new = sanitize_array(C_new, "C_new_pre_merge")
+                    S_new = sanitize_array(S_new, "S_new_pre_merge")
                     # ----- First Merge -----
                     logger.info("CNMF Iteration 1: Merging units...")
                     param_first_merge = params.get("param_first_merge", {"thres_corr": 0.8})
@@ -1501,6 +1556,8 @@ def clean_intermediate_files(directory_path, file_names):
                         b=b_new2, f=f_new2,
                         **param_second_temporal
                     )
+                    C_final = sanitize_array(C_final, "C_final_post_temporal_2")
+                    S_final = sanitize_array(S_final, "S_final_post_temporal_2")
 
                     # <<< PATCHED: unit_merge coordinate fix (same pattern as iteration 1)
                     # Sync A with C_final's actual coordinates

From 6e7984ab4342cedd36e25ead244d5c1c3e1bc805 Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Fri, 13 Feb 2026 08:53:37 -0500
Subject: [PATCH 27/36] Fixes for unexpected NaNs in minian output

---
 element_miniscope/miniscope.py | 80 ++++++++++++++++++++++++++++++----
 1 file changed, 71 insertions(+), 9 deletions(-)

diff --git a/element_miniscope/miniscope.py b/element_miniscope/miniscope.py
index 768d94d..77c1954 100644
--- a/element_miniscope/miniscope.py
+++ b/element_miniscope/miniscope.py
@@ -1,6 +1,7 @@
 import csv
 import copy
 import cv2
+import gc
 import importlib
 import inspect
 import json
@@ -1132,15 +1133,24 @@ def __new__(cls, arg1, shape=None, dtype=None, copy=False):
                 # <<< NEW PATCH: scipy.linalg.solve_triangular NaN/Inf guard >>>
                 # Prevents ValueError('array must not contain infs or NaNs') from
                 # crashing Dask tasks and triggering infinite retry → OOM loops.
-                _original_scipy_solve_tri = scipy.linalg.solve_triangular
-
-                def patched_solve_triangular(a, b, **kwargs):
-                    """Sanitize NaN/Inf before calling solve_triangular."""
-                    a = np.nan_to_num(a, nan=0.0, posinf=0.0, neginf=0.0)
-                    b = np.nan_to_num(b, nan=0.0, posinf=0.0, neginf=0.0)
-                    return _original_scipy_solve_tri(a, b, **kwargs)
-
-                scipy.linalg.solve_triangular = patched_solve_triangular
+                from distributed import WorkerPlugin
+
+                class SolveTriangularNaNGuard(WorkerPlugin):
+                    """Patch scipy.linalg.solve_triangular on each Dask worker process
+                    to sanitize NaN/Inf inputs instead of crashing."""
+                    
+                    def setup(self, worker):
+                        import scipy.linalg
+                        import numpy as np
+                        
+                        _original = scipy.linalg.solve_triangular
+                        
+                        def patched_solve_triangular(a, b, **kwargs):
+                            a = np.nan_to_num(a, nan=0.0, posinf=0.0, neginf=0.0)
+                            b = np.nan_to_num(b, nan=0.0, posinf=0.0, neginf=0.0)
+                            return _original(a, b, **kwargs)
+                        
+                        scipy.linalg.solve_triangular = patched_solve_triangular
 
                 logger.info("Applied minian compatibility patches (NetworkX, sparse arrays, dask.block, solve_triangular NaN guard)")
 
@@ -1260,6 +1270,7 @@ def sanitize_array(arr, name="array"):
                 annotation_plugin = TaskAnnotation()
                 cluster.scheduler.add_plugin(annotation_plugin)
                 client = Client(cluster)
+                client.register_plugin(SolveTriangularNaNGuard())
 
                 try:
                     # ===== LOAD VIDEOS =====
@@ -1517,6 +1528,57 @@ def sanitize_array(arr, name="array"):
                     )
                     # >>> END PATCH
                     logger.info(f"Units after first merge: {A_mrg.sizes['unit_id']}")
+                    A_mrg = save_minian(A_mrg.rename("A_mrg"), minian_data_path, overwrite=True)
+                    C_mrg = save_minian(C_mrg.rename("C_mrg"), minian_data_path, overwrite=True)
+                    S_mrg = save_minian(S_mrg.rename("S_mrg"), minian_data_path, overwrite=True)
+                    gc.collect()
+                    logger.info("Saved merged results to zarr for iteration 2")
+                    # Scale down workers for memory-intensive CNMF spatial updates.
+                    # Preprocessing/motion correction can use 8 workers fine, but
+                    # update_spatial with 71k+ frames needs fewer workers with more
+                    # memory each to avoid OOM on intermediate QR/tensordot products.
+                    n_frames = varr.sizes["frame"]
+                    if n_frames > 5000 and n_workers > 2:
+                        logger.info(
+                            f"Large dataset ({n_frames} frames) — scaling to 2 workers "
+                            f"for CNMF iteration 2 to avoid memory pressure"
+                        )
+                        client.close()
+                        cluster.close()
+                        gc.collect()
+
+                        cnmf_n_workers = 2
+                        cnmf_mem_per_worker = f"{int(memory_total * 0.8 / cnmf_n_workers / (1024**3))}GB"
+                        cluster = LocalCluster(
+                            n_workers=cnmf_n_workers,
+                            memory_limit=cnmf_mem_per_worker,
+                            resources={"MEM": 1},
+                            threads_per_worker=2,
+                            dashboard_address=None,
+                        )
+                        annotation_plugin = TaskAnnotation()
+                        cluster.scheduler.add_plugin(annotation_plugin)
+                        client = Client(cluster)
+                        # Re-register the solve_triangular guard on new workers
+                        client.register_plugin(SolveTriangularNaNGuard())
+                        logger.info(
+                            f"Restarted cluster: {cnmf_n_workers} workers × "
+                            f"{cnmf_mem_per_worker} each"
+                        )
+
+                        # Reload zarr-backed arrays for the new cluster
+                        Y_fm_chk = save_minian(
+                            xr.open_zarr(minian_data_path + "/Y_fm_chk.zarr")["Y_fm_chk"].rename("Y_fm_chk"),
+                            minian_data_path, overwrite=True,
+                        )
+                        Y_hw_chk = save_minian(
+                            xr.open_zarr(minian_data_path + "/Y_hw_chk.zarr")["Y_hw_chk"].rename("Y_hw_chk"),
+                            minian_data_path, overwrite=True,
+                        )
+                        sn_spatial = save_minian(
+                            xr.open_zarr(minian_data_path + "/sn_spatial.zarr")["sn_spatial"].rename("sn_spatial"),
+                            minian_data_path, overwrite=True,
+                        )
 
                     # ========================================
                     # CNMF ITERATION 2

From 65ba4a23aa9305894a1af9d2254ddbc2e1d9b122 Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Fri, 13 Feb 2026 08:58:38 -0500
Subject: [PATCH 28/36] fix worker plugin syntax attempt 1

---
 element_miniscope/miniscope.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/element_miniscope/miniscope.py b/element_miniscope/miniscope.py
index 77c1954..4aa191e 100644
--- a/element_miniscope/miniscope.py
+++ b/element_miniscope/miniscope.py
@@ -1270,7 +1270,7 @@ def sanitize_array(arr, name="array"):
                 annotation_plugin = TaskAnnotation()
                 cluster.scheduler.add_plugin(annotation_plugin)
                 client = Client(cluster)
-                client.register_plugin(SolveTriangularNaNGuard())
+                client.register_worker_plugin(SolveTriangularNaNGuard())
 
                 try:
                     # ===== LOAD VIDEOS =====
@@ -1560,7 +1560,7 @@ def sanitize_array(arr, name="array"):
                         cluster.scheduler.add_plugin(annotation_plugin)
                         client = Client(cluster)
                         # Re-register the solve_triangular guard on new workers
-                        client.register_plugin(SolveTriangularNaNGuard())
+                        client.register_worker_plugin(SolveTriangularNaNGuard())
                         logger.info(
                             f"Restarted cluster: {cnmf_n_workers} workers × "
                             f"{cnmf_mem_per_worker} each"

From ea45ff0caae79fcd9cfd5f6f40617d643767123d Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Fri, 13 Feb 2026 09:19:38 -0500
Subject: [PATCH 29/36] Remove scaling down workers for now

---
 element_miniscope/miniscope.py | 46 ----------------------------------
 1 file changed, 46 deletions(-)

diff --git a/element_miniscope/miniscope.py b/element_miniscope/miniscope.py
index 4aa191e..19af1e4 100644
--- a/element_miniscope/miniscope.py
+++ b/element_miniscope/miniscope.py
@@ -1533,52 +1533,6 @@ def sanitize_array(arr, name="array"):
                     S_mrg = save_minian(S_mrg.rename("S_mrg"), minian_data_path, overwrite=True)
                     gc.collect()
                     logger.info("Saved merged results to zarr for iteration 2")
-                    # Scale down workers for memory-intensive CNMF spatial updates.
-                    # Preprocessing/motion correction can use 8 workers fine, but
-                    # update_spatial with 71k+ frames needs fewer workers with more
-                    # memory each to avoid OOM on intermediate QR/tensordot products.
-                    n_frames = varr.sizes["frame"]
-                    if n_frames > 5000 and n_workers > 2:
-                        logger.info(
-                            f"Large dataset ({n_frames} frames) — scaling to 2 workers "
-                            f"for CNMF iteration 2 to avoid memory pressure"
-                        )
-                        client.close()
-                        cluster.close()
-                        gc.collect()
-
-                        cnmf_n_workers = 2
-                        cnmf_mem_per_worker = f"{int(memory_total * 0.8 / cnmf_n_workers / (1024**3))}GB"
-                        cluster = LocalCluster(
-                            n_workers=cnmf_n_workers,
-                            memory_limit=cnmf_mem_per_worker,
-                            resources={"MEM": 1},
-                            threads_per_worker=2,
-                            dashboard_address=None,
-                        )
-                        annotation_plugin = TaskAnnotation()
-                        cluster.scheduler.add_plugin(annotation_plugin)
-                        client = Client(cluster)
-                        # Re-register the solve_triangular guard on new workers
-                        client.register_worker_plugin(SolveTriangularNaNGuard())
-                        logger.info(
-                            f"Restarted cluster: {cnmf_n_workers} workers × "
-                            f"{cnmf_mem_per_worker} each"
-                        )
-
-                        # Reload zarr-backed arrays for the new cluster
-                        Y_fm_chk = save_minian(
-                            xr.open_zarr(minian_data_path + "/Y_fm_chk.zarr")["Y_fm_chk"].rename("Y_fm_chk"),
-                            minian_data_path, overwrite=True,
-                        )
-                        Y_hw_chk = save_minian(
-                            xr.open_zarr(minian_data_path + "/Y_hw_chk.zarr")["Y_hw_chk"].rename("Y_hw_chk"),
-                            minian_data_path, overwrite=True,
-                        )
-                        sn_spatial = save_minian(
-                            xr.open_zarr(minian_data_path + "/sn_spatial.zarr")["sn_spatial"].rename("sn_spatial"),
-                            minian_data_path, overwrite=True,
-                        )
 
                     # ========================================
                     # CNMF ITERATION 2

From 5476a45707df6e4e40f562be763bbcbff02926c7 Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Mon, 16 Feb 2026 15:45:05 -0500
Subject: [PATCH 30/36] Add dask timeouts

---
 element_miniscope/miniscope.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/element_miniscope/miniscope.py b/element_miniscope/miniscope.py
index 19af1e4..5256b5f 100644
--- a/element_miniscope/miniscope.py
+++ b/element_miniscope/miniscope.py
@@ -983,6 +983,10 @@ def _run_processing():
                 # keep retrying until OOM — fail fast after 3 attempts.
                 dask.config.set({
                     "distributed.scheduler.allowed-failures": 3,
+                    "distributed.comm.timeouts.connect": "300s",
+                    "distributed.comm.timeouts.tcp": "7200s",      # 2 hours
+                    "distributed.worker.lifetime.stale": "7200s",   # don't mark workers stale quickly
+                    "distributed.scheduler.work-stealing": False,
                 })
 
                 # ===== APPLY COMPATIBILITY PATCHES =====
@@ -1349,7 +1353,7 @@ def sanitize_array(arr, name="array"):
                     # Save two versions with different chunking
                     logger.info("Saving motion-corrected video (frame-chunked)...")
                     Y_fm_chk = save_minian(
-                        Y.astype(float).rename("Y_fm_chk"),
+                        Y.astype(np.float32).rename("Y_fm_chk"),
                         minian_data_path,
                         overwrite=True,
                     )

From 1c89ba1f05849cc64776c963545101ee69aa2304 Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Tue, 17 Feb 2026 09:59:13 -0500
Subject: [PATCH 31/36] Increase worker timeout and do not save movie for now

---
 element_miniscope/miniscope.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/element_miniscope/miniscope.py b/element_miniscope/miniscope.py
index 5256b5f..d2f3602 100644
--- a/element_miniscope/miniscope.py
+++ b/element_miniscope/miniscope.py
@@ -984,9 +984,9 @@ def _run_processing():
                 dask.config.set({
                     "distributed.scheduler.allowed-failures": 3,
                     "distributed.comm.timeouts.connect": "300s",
-                    "distributed.comm.timeouts.tcp": "7200s",      # 2 hours
-                    "distributed.worker.lifetime.stale": "7200s",   # don't mark workers stale quickly
+                    "distributed.comm.timeouts.tcp": "7200s",
                     "distributed.scheduler.work-stealing": False,
+                    "distributed.scheduler.worker-ttl": "7200s",    # ← ADD THIS: 1 hour before declaring worker dead
                 })
 
                 # ===== APPLY COMPATIBILITY PATCHES =====
@@ -1366,9 +1366,9 @@ def sanitize_array(arr, name="array"):
                         chunks={"frame": -1, "height": chk["height"], "width": chk["width"]},
                     )
 
-                    # Save motion corrected video as mp4
-                    logger.info("Writing motion corrected video...")
-                    write_video(Y_fm_chk, "motion_corrected.mp4", str(output_dir))
+                    # # Save motion corrected video as mp4
+                    # logger.info("Writing motion corrected video...")
+                    # write_video(Y_fm_chk, "motion_corrected.mp4", str(output_dir))
 
                     # Create and save max projection
                     logger.info("Computing max projection...")

From 81ad94c0e7387e72841eceac664b489025f90b7c Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Mon, 23 Feb 2026 13:08:53 -0500
Subject: [PATCH 32/36] Add Docker container execution path for original minian
 (py38)

When extra_dj_params.docker_image is set in ProcessingParamSet, the
make_compute() method spawns a sibling Python 3.8 container running
the original denisecailab/minian instead of the inline modernized path.
This ensures result parity with the customer team's original pipeline.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 element_miniscope/miniscope.py | 1373 +++++++++++++++++---------------
 1 file changed, 742 insertions(+), 631 deletions(-)

diff --git a/element_miniscope/miniscope.py b/element_miniscope/miniscope.py
index d2f3602..69fc3b1 100644
--- a/element_miniscope/miniscope.py
+++ b/element_miniscope/miniscope.py
@@ -969,665 +969,776 @@ def _run_processing():
                 ]
 
             elif method == "minian":
-                import multiprocessing
-                import psutil
-                import shutil
-                import dask
-                import dask.array as darr
-                import scipy.linalg                          # <<< NEW: for solve_triangular patch
-                import xarray as xr                          # <<< NEW: for sanitize_array
-                from dask.distributed import Client, LocalCluster
-
-                # <<< NEW: Prevent infinite retry loops on NaN/Inf errors >>>
-                # If a Dask task fails (e.g. solve_triangular ValueError), don't
-                # keep retrying until OOM — fail fast after 3 attempts.
-                dask.config.set({
-                    "distributed.scheduler.allowed-failures": 3,
-                    "distributed.comm.timeouts.connect": "300s",
-                    "distributed.comm.timeouts.tcp": "7200s",
-                    "distributed.scheduler.work-stealing": False,
-                    "distributed.scheduler.worker-ttl": "7200s",    # ← ADD THIS: 1 hour before declaring worker dead
-                })
-
-                # ===== APPLY COMPATIBILITY PATCHES =====
-                
-                # Fix for NetworkX 3.0+ API changes
-                import networkx as nx
-                import scipy.sparse
-                from scipy.sparse import issparse
-                import minian.cnmf as cnmf_module
-
-                def label_connected_fixed(adj, only_connected=False):
-                    """Fixed label_connected for NetworkX 3.0+ compatibility."""
-                    if issparse(adj):
-                        adj = adj.toarray()
-                    adj = adj.copy()
-                    np.fill_diagonal(adj, 0)
-                    adj = np.triu(adj)
-                    g = nx.from_numpy_array(adj)
-                    labels = np.zeros(adj.shape[0], dtype=int)
-                    for icomp, comp in enumerate(nx.connected_components(g)):
-                        for node in comp:
-                            labels[node] = icomp
-                    if only_connected:
-                        iso_mask = np.array([len(c) == 1 for c in nx.connected_components(g)])
-                        labels[np.isin(labels, np.where(iso_mask)[0])] = -1
-                    return labels
-
-                cnmf_module.label_connected = label_connected_fixed
-
-                # Fix for sparse array auto-densification
-                import sparse
-                import sparse.numba_backend._sparse_array as sparse_mod
-                sparse_mod.AUTO_DENSIFY = True
-
-                # Fix for darr.block with mixed sparse array types
-                _original_darr_block = darr.block
-
-                def patched_darr_block(arrays, allow_unknown_chunksizes=False):
-                    """Patched darr.block that ensures sparse array type consistency."""
-                    def convert_to_coo(arr):
-                        if arr is None:
-                            return arr
-                        if isinstance(arr, sparse.COO):
-                            return arr
-                        if isinstance(arr, sparse.SparseArray):
-                            return sparse.COO(arr)
-                        if isinstance(arr, np.ndarray):
-                            return sparse.COO.from_numpy(arr)
-                        if hasattr(arr, 'todense'):
-                            return sparse.COO.from_numpy(np.asarray(arr.todense()))
-                        return arr
-
-                    def recursive_convert(obj):
-                        if isinstance(obj, list):
-                            return [recursive_convert(item) for item in obj]
-                        elif isinstance(obj, np.ndarray) and obj.dtype == object:
-                            result = np.empty_like(obj)
-                            for idx in np.ndindex(obj.shape):
-                                result[idx] = convert_to_coo(obj[idx])
-                            return result
-                        else:
-                            return convert_to_coo(obj)
-
-                    try:
-                        return _original_darr_block(arrays, allow_unknown_chunksizes=allow_unknown_chunksizes)
-                    except ValueError as e:
-                        if "All arrays must be instances of SparseArray" in str(e):
-                            converted = recursive_convert(arrays)
-                            return _original_darr_block(converted, allow_unknown_chunksizes=allow_unknown_chunksizes)
-                        raise
-
-                darr.block = patched_darr_block
-                darr.core.block = patched_darr_block
-
-                # ===== PATCH: sparse/dense concatenation compatibility =====
-                import sparse
-                import sparse.numba_backend._coo.common as _sparse_coo_common
-                import numpy as np
-
-                _original_sparse_concat = _sparse_coo_common.concatenate
-
-                def _patched_sparse_concat(arrays, axis=0):
-                    """Convert any dense arrays to sparse.COO before concatenation."""
-                    converted = []
-                    for arr in arrays:
-                        if isinstance(arr, sparse.SparseArray):
-                            converted.append(arr)
-                        elif isinstance(arr, np.ndarray):
-                            converted.append(sparse.COO.from_numpy(arr))
-                        else:
-                            try:
-                                converted.append(sparse.COO.from_numpy(np.asarray(arr)))
-                            except Exception:
-                                converted.append(arr)
-                    return _original_sparse_concat(converted, axis=axis)
-
-                _sparse_coo_common.concatenate = _patched_sparse_concat
-
-                # ===== PATCH: sparse/dense concatenation compatibility =====
-                import sparse
-                import numpy as np
-                import sparse.numba_backend._coo.common as _sparse_coo_common
-
-                _original_check = _sparse_coo_common.check_consistent_fill_value
-
-                def _patched_check(arrays):
-                    """Auto-convert any dense arrays to sparse.COO before validation."""
-                    for i, arr in enumerate(arrays):
-                        if not isinstance(arr, sparse.SparseArray):
-                            arrays[i] = sparse.COO.from_numpy(np.asarray(arr))
-                    return _original_check(arrays)
-
-                _sparse_coo_common.check_consistent_fill_value = _patched_check
-                # Patch update_temporal to handle sparse.COO arrays
-                _original_update_temporal = cnmf_module.update_temporal
-
-                def patched_update_temporal(A, C, b=None, f=None, Y=None, YrA=None, 
-                                            noise_freq=0.25, p=2, add_lag="p", jac_thres=0.1, 
-                                            sparse_penal=1, bseg=None, med_wd=None, 
-                                            zero_thres=1e-8, max_iters=200, use_smooth=True, 
-                                            normalize=True, warm_start=False, post_scal=False, 
-                                            scs_fallback=False, concurrent_update=False):
-                    """Patched update_temporal that handles sparse.COO arrays properly."""
-                    _original_csc_matrix = scipy.sparse.csc_matrix
-
-                    class PatchedCSCMatrix(scipy.sparse.csc_matrix):
-                        def __new__(cls, arg1, shape=None, dtype=None, copy=False):
-                            if hasattr(arg1, 'todense'):
-                                arg1 = arg1.todense()
-                            return _original_csc_matrix(arg1, shape=shape, dtype=dtype, copy=copy)
-
-                    scipy.sparse.csc_matrix = PatchedCSCMatrix
-                    try:
-                        result = _original_update_temporal(
-                            A, C, b=b, f=f, Y=Y, YrA=YrA, noise_freq=noise_freq,
-                            p=p, add_lag=add_lag, jac_thres=jac_thres, sparse_penal=sparse_penal,
-                            bseg=bseg, med_wd=med_wd, zero_thres=zero_thres, max_iters=max_iters,
-                            use_smooth=use_smooth, normalize=normalize, warm_start=warm_start,
-                            post_scal=post_scal, scs_fallback=scs_fallback, 
-                            concurrent_update=concurrent_update
-                        )
-                    finally:
-                        scipy.sparse.csc_matrix = _original_csc_matrix
-                    return result
-
-                cnmf_module.update_temporal = patched_update_temporal
-
-                # <<< NEW PATCH: scipy.linalg.solve_triangular NaN/Inf guard >>>
-                # Prevents ValueError('array must not contain infs or NaNs') from
-                # crashing Dask tasks and triggering infinite retry → OOM loops.
-                from distributed import WorkerPlugin
+                extra_params = params.pop("extra_dj_params", {})
+                docker_image = extra_params.get("docker_image", None)
 
-                class SolveTriangularNaNGuard(WorkerPlugin):
-                    """Patch scipy.linalg.solve_triangular on each Dask worker process
-                    to sanitize NaN/Inf inputs instead of crashing."""
-                    
-                    def setup(self, worker):
-                        import scipy.linalg
-                        import numpy as np
-                        
-                        _original = scipy.linalg.solve_triangular
-                        
-                        def patched_solve_triangular(a, b, **kwargs):
-                            a = np.nan_to_num(a, nan=0.0, posinf=0.0, neginf=0.0)
-                            b = np.nan_to_num(b, nan=0.0, posinf=0.0, neginf=0.0)
-                            return _original(a, b, **kwargs)
-                        
-                        scipy.linalg.solve_triangular = patched_solve_triangular
+                if docker_image:
+                    # === DOCKER CONTAINER EXECUTION PATH ===
+                    # Runs the original minian (Python 3.8) in a sibling container
+                    import docker as docker_sdk
+                    import json as json_mod
 
-                logger.info("Applied minian compatibility patches (NetworkX, sparse arrays, dask.block, solve_triangular NaN guard)")
+                    logger.info(
+                        f"Running minian via Docker container: {docker_image}"
+                    )
 
-                # Now import minian modules (after patches are applied)
-                from minian.cnmf import (
-                    get_noise_fft,
-                    unit_merge,
-                    update_spatial,
-                    update_temporal,
-                    update_background,
-                )
-                from minian.initialization import (
-                    initA,
-                    initC,
-                    ks_refine,
-                    pnr_refine,
-                    seeds_init,
-                    seeds_merge,
-                )
-                from minian.motion_correction import apply_transform, estimate_motion
-                from minian.preprocessing import denoise, remove_background
-                from minian.utilities import (
-                    TaskAnnotation,
-                    get_optimal_chk,
-                    load_videos,
-                    save_minian,
-                )
-                from minian.visualization import write_video
+                    # Resolve host-level paths for sibling container volume mounts
+                    host_s3_root = os.environ["HOST_S3_ROOT"]
+                    host_outbox = os.environ["HOST_OUTBOX"]
+                    container_raw_root = os.environ.get(
+                        "RAW_ROOT_DATA_DIR", "/home/jovyan/s3/inbox"
+                    )
+                    container_processed_root = os.environ.get(
+                        "PROCESSED_ROOT_DATA_DIR", "/home/jovyan/efs/outbox"
+                    )
 
-                # ===== CONTAINER-AWARE MEMORY DETECTION =====
-                def get_container_memory_limit():
-                    """Get memory limit respecting container cgroups (v1 and v2)."""
-                    try:
-                        with open("/sys/fs/cgroup/memory.max") as f:
-                            limit = f.read().strip()
-                            if limit != "max":
-                                return int(limit)
-                    except (FileNotFoundError, PermissionError):
-                        pass
-                    try:
-                        with open("/sys/fs/cgroup/memory/memory.limit_in_bytes") as f:
-                            limit = int(f.read().strip())
-                            if limit < 9223372036854771712:
-                                return limit
-                    except (FileNotFoundError, PermissionError):
-                        pass
-                    return psutil.virtual_memory().total
-
-                # ===== DASK CLUSTER CONFIGURATION =====
-                memory_total = get_container_memory_limit()
-                n_workers = int(
-                    os.getenv(
-                        "MINIAN_NWORKERS",
-                        max(1, min(int(multiprocessing.cpu_count() * 0.4), 8)),
+                    # Map container paths -> host paths for sibling container mounts
+                    input_dir = str(pathlib.Path(avi_files[0]).parent)
+                    input_dir_host = input_dir.replace(
+                        container_raw_root, host_s3_root + "/inbox", 1
+                    )
+                    output_dir_host = str(output_dir).replace(
+                        container_processed_root, host_outbox, 1
                     )
-                )
-                memory_per_worker = int(memory_total * 0.8 / n_workers)
 
-                memory_limit_env = os.getenv("MINIAN_MEMORY_LIMIT")
-                if memory_limit_env:
+                    # Write config JSON to shared filesystem
+                    n_workers = int(os.getenv("MINIAN_NWORKERS", 2))
+                    memory_limit_env = os.getenv("MINIAN_MEMORY_LIMIT", "4")
                     memory_limit = (
                         memory_limit_env
                         if any(c.isalpha() for c in memory_limit_env)
                         else f"{memory_limit_env}GB"
                     )
-                else:
-                    memory_limit = f"{memory_per_worker // (1024**3)}GB"
-
-                # Set minian intermediate storage paths
-                minian_data_path = str(output_dir / "minian_data")
-                os.makedirs(minian_data_path, exist_ok=True)
-                os.environ["MINIAN_INTERMEDIATE"] = minian_data_path
-
-                # Helper function to clean intermediate files
-                def clean_intermediate_files(directory_path, file_names):
-                    """Remove intermediate zarr files to avoid conflicts."""
-                    for fname in file_names:
-                        path = os.path.join(directory_path, f"{fname}.zarr")
-                        if os.path.exists(path):
-                            shutil.rmtree(path)
-
-                def sanitize_array(arr, name="array"):
-                    """Replace NaN/Inf in an xarray DataArray with 0.
-                    Works with both lazy (dask-backed) and in-memory arrays."""
-                    logger.info(f"Sanitizing {name} (replacing NaN/Inf with 0)...")
-                    if hasattr(arr.data, "dask"):
-                        sanitized = xr.apply_ufunc(
-                            lambda x: np.nan_to_num(
-                                x, nan=0.0, posinf=0.0, neginf=0.0
-                            ),
-                            arr,
-                            dask="parallelized",
-                            output_dtypes=[arr.dtype],
-                        )
-                    else:
-                        sanitized = xr.DataArray(
-                            np.nan_to_num(
-                                arr.values, nan=0.0, posinf=0.0, neginf=0.0
-                            ),
-                            coords=arr.coords,
-                            dims=arr.dims,
-                        )
-                    return sanitized
-
-                # Start Dask cluster
-                logger.info(
-                    f"Starting Minian processing with {n_workers} workers, "
-                    f"{memory_limit} memory limit per worker..."
-                )
-                cluster = LocalCluster(
-                    n_workers=n_workers,
-                    memory_limit=memory_limit,
-                    resources={"MEM": 1},
-                    threads_per_worker=2,
-                    dashboard_address=None,
-                )
-                annotation_plugin = TaskAnnotation()
-                cluster.scheduler.add_plugin(annotation_plugin)
-                client = Client(cluster)
-                client.register_worker_plugin(SolveTriangularNaNGuard())
-
-                try:
-                    # ===== LOAD VIDEOS =====
-                    logger.info("Loading videos...")
-                    default_load_params = {
-                        "pattern": r".*\.avi$",
-                        "dtype": np.uint8,
-                        "downsample": dict(frame=1, height=1, width=1),
-                        "downsample_strategy": "subset",
-                    }
-                    param_load_videos = {
-                        **default_load_params,
-                        **params.get("param_load_videos", {}),
+                    config = {
+                        "input_dir": "/data/input",
+                        "output_dir": "/data/output",
+                        "intermediate_dir": "/data/output/minian_data",
+                        "params": params,
+                        "n_workers": n_workers,
+                        "memory_limit": memory_limit,
                     }
-                    varr = load_videos(
-                        str(pathlib.Path(avi_files[0]).parent), **param_load_videos
-                    )
-                    chk, _ = get_optimal_chk(varr, dtype=float)
-
-                    # Save raw video to zarr
-                    logger.info("Saving raw video to zarr...")
-                    varr = save_minian(
-                        varr.chunk({"frame": chk["frame"], "height": -1, "width": -1}).rename("varr"),
-                        minian_data_path,
-                        overwrite=True,
+                    config_path = output_dir / "minian_config.json"
+                    with open(config_path, "w") as f:
+                        json_mod.dump(config, f, indent=2, default=str)
+
+                    # Spawn sibling container
+                    client = docker_sdk.from_env()
+                    container_result = client.containers.run(
+                        image=docker_image,
+                        command=[
+                            "python",
+                            "/opt/run_minian.py",
+                            "/data/output/minian_config.json",
+                        ],
+                        volumes={
+                            input_dir_host: {"bind": "/data/input", "mode": "ro"},
+                            output_dir_host: {"bind": "/data/output", "mode": "rw"},
+                        },
+                        mem_limit=extra_params.get("container_mem_limit", "24g"),
+                        environment={
+                            "MINIAN_NWORKERS": str(n_workers),
+                            "MINIAN_MEMORY_LIMIT": memory_limit_env,
+                            "MKL_NUM_THREADS": "1",
+                            "OPENBLAS_NUM_THREADS": "1",
+                            "OMP_NUM_THREADS": "1",
+                        },
+                        remove=True,
+                        detach=False,
+                        stdout=True,
+                        stderr=True,
                     )
                     logger.info(
-                        f"Loaded video: {varr.sizes['frame']} frames, "
-                        f"{varr.sizes['height']}x{varr.sizes['width']} pixels"
-                    )
-
-                    # ===== PREPROCESSING =====
-                    logger.info("Preprocessing: glow removal...")
-                    varr_min = varr.min("frame").compute()
-                    varr_ref = varr - varr_min
-                    varr_ref = varr_ref.clip(min=0)
-
-                    logger.info("Preprocessing: denoising...")
-                    param_denoise = params.get(
-                        "param_denoise", {"method": "median", "ksize": 7}
-                    )
-                    varr_ref = denoise(varr_ref, **param_denoise)
-
-                    logger.info("Preprocessing: background removal...")
-                    param_background_removal = params.get(
-                        "param_background_removal", {"method": "tophat", "wnd": 10}
+                        f"Container output:\n{container_result.decode()}"
                     )
-                    varr_ref = remove_background(varr_ref, **param_background_removal)
 
-                    # Keep it chunked
-                    varr_ref = varr_ref.chunk({"frame": chk["frame"], "height": -1, "width": -1})
-
-                    # ===== MOTION CORRECTION =====
-                    logger.info("Estimating motion...")
-                    param_estimate_motion = params.get(
-                        "param_estimate_motion", {"dim": "frame"}
-                    )
-                    motion = estimate_motion(varr_ref, **param_estimate_motion)
-                    motion = save_minian(
-                        motion.rename("motion").chunk({"frame": chk["frame"]}),
-                        minian_data_path,
-                        overwrite=True,
-                    )
-
-                    logger.info("Applying motion correction...")
-                    Y = apply_transform(varr_ref, motion, fill=0)
-
-                    # <<< NEW: Sanitize after motion correction >>>
-                    # apply_transform can introduce NaN at frame borders where the
-                    # shift goes beyond the image boundary. The fill=0 param should
-                    # handle this, but edge cases slip through on large datasets
-                    # with float64 precision. This prevents downstream
-                    # solve_triangular crashes in CNMF.
-                    Y = sanitize_array(Y, "Y_post_motion_correction")
-
-                    # Save two versions with different chunking
-                    logger.info("Saving motion-corrected video (frame-chunked)...")
-                    Y_fm_chk = save_minian(
-                        Y.astype(np.float32).rename("Y_fm_chk"),
-                        minian_data_path,
-                        overwrite=True,
-                    )
-
-                    logger.info("Saving motion-corrected video (spatial-chunked)...")
-                    Y_hw_chk = save_minian(
-                        Y_fm_chk.rename("Y_hw_chk"),
-                        minian_data_path,
-                        overwrite=True,
-                        chunks={"frame": -1, "height": chk["height"], "width": chk["width"]},
-                    )
-
-                    # # Save motion corrected video as mp4
-                    # logger.info("Writing motion corrected video...")
-                    # write_video(Y_fm_chk, "motion_corrected.mp4", str(output_dir))
+                    # Verify completion marker
+                    if (output_dir / ".minian_error").exists():
+                        with open(output_dir / ".minian_error") as f:
+                            err = json_mod.load(f)
+                        raise RuntimeError(
+                            f"Minian container error: {err.get('error')}"
+                        )
+                    if not (output_dir / ".minian_complete").exists():
+                        raise RuntimeError(
+                            "Minian container exited without completion marker"
+                        )
 
-                    # Create and save max projection
-                    logger.info("Computing max projection...")
-                    max_proj = Y_fm_chk.max("frame").compute()
-                    max_proj = save_minian(max_proj.rename("max_proj"), minian_data_path, overwrite=True)
+                    # Load results (reuses existing MinianLoader)
+                    minian_loader = MinianLoader(str(output_dir))
+                    key["processing_time"] = minian_loader.creation_time
+                    key["package_version"] = "1.2.1-py38-docker"
 
-                    # ===== SEED INITIALIZATION =====
-                    logger.info("Initializing seeds...")
-                    param_seeds_init = params.get(
-                        "param_seeds_init",
+                    file_entries = [
                         {
-                            "wnd_size": 1000,
-                            "method": "rolling",
-                            "stp_size": 500,
-                            "max_wnd": 15,
-                            "diff_thres": 3,
-                        },
-                    )
-                    seeds = seeds_init(Y_fm_chk, **param_seeds_init)
-                    logger.info(f"Initial seeds: {len(seeds)}")
-
-                    logger.info("Refining seeds with PNR...")
-                    param_pnr_refine = params.get(
-                        "param_pnr_refine", {"noise_freq": 0.06, "thres": 1}
-                    )
-                    seeds, pnr, gmm = pnr_refine(Y_hw_chk, seeds, **param_pnr_refine)
-                    logger.info(f"Seeds after PNR refine: {seeds['mask_pnr'].sum()} / {len(seeds)}")
-
-                    logger.info("Refining seeds with KS test...")
-                    param_ks_refine = params.get("param_ks_refine", {"sig": 0.05})
-                    seeds = ks_refine(Y_hw_chk, seeds, **param_ks_refine)
-                    logger.info(f"Seeds after KS refine: {seeds['mask_ks'].sum()} / {len(seeds)}")
-
-                    logger.info("Merging seeds...")
-                    param_seeds_merge = params.get(
-                        "param_seeds_merge",
-                        {"thres_dist": 10, "thres_corr": 0.8, "noise_freq": 0.06},
-                    )
-                    seeds_final = seeds[seeds["mask_ks"] & seeds["mask_pnr"]].reset_index(drop=True)
-                    seeds_final = seeds_merge(Y_hw_chk, max_proj, seeds_final, **param_seeds_merge)
-                    n_seeds = seeds_final["mask_mrg"].sum()
-                    logger.info(f"Seeds after merge: {n_seeds} / {len(seeds_final)}")
-
-                    if n_seeds == 0:
-                        raise ValueError(
-                            "No seeds remaining after refinement. "
-                            "Consider adjusting param_seeds_init or param_pnr_refine thresholds."
-                        )
+                            **key,
+                            "file_name": f.name,
+                            "file": f.as_posix(),
+                        }
+                        for f in output_dir.rglob("*")
+                        if f.is_file()
+                    ]
 
-                    # ===== INITIALIZE A AND C =====
-                    logger.info("Initializing spatial footprints (A)...")
-                    param_initialize = params.get(
-                        "param_initialize", {"thres_corr": 0.8, "wnd": 10, "noise_freq": 0.06}
-                    )
-                    A_init = initA(Y_hw_chk, seeds_final[seeds_final["mask_mrg"]], **param_initialize)
-                    A_init = save_minian(A_init.rename("A_init"), minian_data_path, overwrite=True)
-                    logger.info(f"A_init shape: {A_init.shape}")
-
-                    logger.info("Initializing temporal traces (C)...")
-                    C_init = initC(Y_fm_chk, A_init)
-                    C_init = save_minian(
-                        C_init.rename("C_init"),
-                        minian_data_path,
-                        overwrite=True,
-                        chunks={"unit_id": 1, "frame": -1},
-                    )
-                    logger.info(f"C_init shape: {C_init.shape}")
-
-                    # Initial unit merge
-                    logger.info("Initial unit merge...")
-                    param_init_merge = params.get("param_init_merge", {"thres_corr": 0.8})
-                    A, C = unit_merge(A_init, C_init, **param_init_merge)
-                    A = save_minian(A.rename("A"), minian_data_path, overwrite=True)
-                    C = save_minian(C.rename("C"), minian_data_path, overwrite=True)
-                    C_chk = save_minian(
-                        C.rename("C_chk"),
-                        minian_data_path,
-                        overwrite=True,
-                        chunks={"unit_id": -1, "frame": chk["frame"]},
-                    )
-                    logger.info(f"Units after initial merge: {A.sizes['unit_id']}")
-
-                    # ===== INITIALIZE BACKGROUND =====
-                    logger.info("Initializing background terms...")
-                    b, f = update_background(Y_fm_chk, A, C_chk)
-                    f = save_minian(f.rename("f"), minian_data_path, overwrite=True)
-                    b = save_minian(b.rename("b"), minian_data_path, overwrite=True)
-                    logger.info(f"Background initialized - b: {b.shape}, f: {f.shape}")
-
-                    # ===== COMPUTE NOISE STATISTICS =====
-                    logger.info("Computing noise statistics...")
-                    param_get_noise = params.get("param_get_noise", {"noise_range": (0.06, 0.5)})
-                    sn_spatial = get_noise_fft(Y_hw_chk, **param_get_noise)
-                    sn_spatial = save_minian(sn_spatial.rename("sn_spatial"), minian_data_path, overwrite=True)
-
-                    # ========================================
-                    # CNMF ITERATION 1
-                    # ========================================
-
-                    # ----- First Spatial Update -----
-                    logger.info("CNMF Iteration 1: Spatial update...")
-                    param_first_spatial = params.get(
-                        "param_first_spatial",
-                        {"dl_wnd": 10, "sparse_penal": 0.01, "size_thres": (25, None)},
-                    )
-                    A_new, mask, norm_fac = update_spatial(
-                        Y_hw_chk, A, C, sn_spatial, **param_first_spatial
-                    )
-                    C_new = save_minian(
-                        (C.sel(unit_id=mask) * norm_fac).rename("C_new"),
-                        minian_data_path,
-                        overwrite=True,
+                else:
+                    # === EXISTING INLINE EXECUTION PATH ===
+                    import multiprocessing
+                    import psutil
+                    import shutil
+                    import dask
+                    import dask.array as darr
+                    import scipy.linalg                          # <<< NEW: for solve_triangular patch
+                    import xarray as xr                          # <<< NEW: for sanitize_array
+                    from dask.distributed import Client, LocalCluster
+    
+                    # <<< NEW: Prevent infinite retry loops on NaN/Inf errors >>>
+                    # If a Dask task fails (e.g. solve_triangular ValueError), don't
+                    # keep retrying until OOM — fail fast after 3 attempts.
+                    dask.config.set({
+                        "distributed.scheduler.allowed-failures": 3,
+                        "distributed.comm.timeouts.connect": "300s",
+                        "distributed.comm.timeouts.tcp": "7200s",
+                        "distributed.scheduler.work-stealing": False,
+                        "distributed.scheduler.worker-ttl": "7200s",    # ← ADD THIS: 1 hour before declaring worker dead
+                    })
+    
+                    # ===== APPLY COMPATIBILITY PATCHES =====
+                    
+                    # Fix for NetworkX 3.0+ API changes
+                    import networkx as nx
+                    import scipy.sparse
+                    from scipy.sparse import issparse
+                    import minian.cnmf as cnmf_module
+    
+                    def label_connected_fixed(adj, only_connected=False):
+                        """Fixed label_connected for NetworkX 3.0+ compatibility."""
+                        if issparse(adj):
+                            adj = adj.toarray()
+                        adj = adj.copy()
+                        np.fill_diagonal(adj, 0)
+                        adj = np.triu(adj)
+                        g = nx.from_numpy_array(adj)
+                        labels = np.zeros(adj.shape[0], dtype=int)
+                        for icomp, comp in enumerate(nx.connected_components(g)):
+                            for node in comp:
+                                labels[node] = icomp
+                        if only_connected:
+                            iso_mask = np.array([len(c) == 1 for c in nx.connected_components(g)])
+                            labels[np.isin(labels, np.where(iso_mask)[0])] = -1
+                        return labels
+    
+                    cnmf_module.label_connected = label_connected_fixed
+    
+                    # Fix for sparse array auto-densification
+                    import sparse
+                    import sparse.numba_backend._sparse_array as sparse_mod
+                    sparse_mod.AUTO_DENSIFY = True
+    
+                    # Fix for darr.block with mixed sparse array types
+                    _original_darr_block = darr.block
+    
+                    def patched_darr_block(arrays, allow_unknown_chunksizes=False):
+                        """Patched darr.block that ensures sparse array type consistency."""
+                        def convert_to_coo(arr):
+                            if arr is None:
+                                return arr
+                            if isinstance(arr, sparse.COO):
+                                return arr
+                            if isinstance(arr, sparse.SparseArray):
+                                return sparse.COO(arr)
+                            if isinstance(arr, np.ndarray):
+                                return sparse.COO.from_numpy(arr)
+                            if hasattr(arr, 'todense'):
+                                return sparse.COO.from_numpy(np.asarray(arr.todense()))
+                            return arr
+    
+                        def recursive_convert(obj):
+                            if isinstance(obj, list):
+                                return [recursive_convert(item) for item in obj]
+                            elif isinstance(obj, np.ndarray) and obj.dtype == object:
+                                result = np.empty_like(obj)
+                                for idx in np.ndindex(obj.shape):
+                                    result[idx] = convert_to_coo(obj[idx])
+                                return result
+                            else:
+                                return convert_to_coo(obj)
+    
+                        try:
+                            return _original_darr_block(arrays, allow_unknown_chunksizes=allow_unknown_chunksizes)
+                        except ValueError as e:
+                            if "All arrays must be instances of SparseArray" in str(e):
+                                converted = recursive_convert(arrays)
+                                return _original_darr_block(converted, allow_unknown_chunksizes=allow_unknown_chunksizes)
+                            raise
+    
+                    darr.block = patched_darr_block
+                    darr.core.block = patched_darr_block
+    
+                    # ===== PATCH: sparse/dense concatenation compatibility =====
+                    import sparse
+                    import sparse.numba_backend._coo.common as _sparse_coo_common
+                    import numpy as np
+    
+                    _original_sparse_concat = _sparse_coo_common.concatenate
+    
+                    def _patched_sparse_concat(arrays, axis=0):
+                        """Convert any dense arrays to sparse.COO before concatenation."""
+                        converted = []
+                        for arr in arrays:
+                            if isinstance(arr, sparse.SparseArray):
+                                converted.append(arr)
+                            elif isinstance(arr, np.ndarray):
+                                converted.append(sparse.COO.from_numpy(arr))
+                            else:
+                                try:
+                                    converted.append(sparse.COO.from_numpy(np.asarray(arr)))
+                                except Exception:
+                                    converted.append(arr)
+                        return _original_sparse_concat(converted, axis=axis)
+    
+                    _sparse_coo_common.concatenate = _patched_sparse_concat
+    
+                    # ===== PATCH: sparse/dense concatenation compatibility =====
+                    import sparse
+                    import numpy as np
+                    import sparse.numba_backend._coo.common as _sparse_coo_common
+    
+                    _original_check = _sparse_coo_common.check_consistent_fill_value
+    
+                    def _patched_check(arrays):
+                        """Auto-convert any dense arrays to sparse.COO before validation."""
+                        for i, arr in enumerate(arrays):
+                            if not isinstance(arr, sparse.SparseArray):
+                                arrays[i] = sparse.COO.from_numpy(np.asarray(arr))
+                        return _original_check(arrays)
+    
+                    _sparse_coo_common.check_consistent_fill_value = _patched_check
+                    # Patch update_temporal to handle sparse.COO arrays
+                    _original_update_temporal = cnmf_module.update_temporal
+    
+                    def patched_update_temporal(A, C, b=None, f=None, Y=None, YrA=None, 
+                                                noise_freq=0.25, p=2, add_lag="p", jac_thres=0.1, 
+                                                sparse_penal=1, bseg=None, med_wd=None, 
+                                                zero_thres=1e-8, max_iters=200, use_smooth=True, 
+                                                normalize=True, warm_start=False, post_scal=False, 
+                                                scs_fallback=False, concurrent_update=False):
+                        """Patched update_temporal that handles sparse.COO arrays properly."""
+                        _original_csc_matrix = scipy.sparse.csc_matrix
+    
+                        class PatchedCSCMatrix(scipy.sparse.csc_matrix):
+                            def __new__(cls, arg1, shape=None, dtype=None, copy=False):
+                                if hasattr(arg1, 'todense'):
+                                    arg1 = arg1.todense()
+                                return _original_csc_matrix(arg1, shape=shape, dtype=dtype, copy=copy)
+    
+                        scipy.sparse.csc_matrix = PatchedCSCMatrix
+                        try:
+                            result = _original_update_temporal(
+                                A, C, b=b, f=f, Y=Y, YrA=YrA, noise_freq=noise_freq,
+                                p=p, add_lag=add_lag, jac_thres=jac_thres, sparse_penal=sparse_penal,
+                                bseg=bseg, med_wd=med_wd, zero_thres=zero_thres, max_iters=max_iters,
+                                use_smooth=use_smooth, normalize=normalize, warm_start=warm_start,
+                                post_scal=post_scal, scs_fallback=scs_fallback, 
+                                concurrent_update=concurrent_update
+                            )
+                        finally:
+                            scipy.sparse.csc_matrix = _original_csc_matrix
+                        return result
+    
+                    cnmf_module.update_temporal = patched_update_temporal
+    
+                    # <<< NEW PATCH: scipy.linalg.solve_triangular NaN/Inf guard >>>
+                    # Prevents ValueError('array must not contain infs or NaNs') from
+                    # crashing Dask tasks and triggering infinite retry → OOM loops.
+                    from distributed import WorkerPlugin
+    
+                    class SolveTriangularNaNGuard(WorkerPlugin):
+                        """Patch scipy.linalg.solve_triangular on each Dask worker process
+                        to sanitize NaN/Inf inputs instead of crashing."""
+                        
+                        def setup(self, worker):
+                            import scipy.linalg
+                            import numpy as np
+                            
+                            _original = scipy.linalg.solve_triangular
+                            
+                            def patched_solve_triangular(a, b, **kwargs):
+                                a = np.nan_to_num(a, nan=0.0, posinf=0.0, neginf=0.0)
+                                b = np.nan_to_num(b, nan=0.0, posinf=0.0, neginf=0.0)
+                                return _original(a, b, **kwargs)
+                            
+                            scipy.linalg.solve_triangular = patched_solve_triangular
+    
+                    logger.info("Applied minian compatibility patches (NetworkX, sparse arrays, dask.block, solve_triangular NaN guard)")
+    
+                    # Now import minian modules (after patches are applied)
+                    from minian.cnmf import (
+                        get_noise_fft,
+                        unit_merge,
+                        update_spatial,
+                        update_temporal,
+                        update_background,
                     )
-                    C_chk_new = save_minian(
-                        (C_chk.sel(unit_id=mask) * norm_fac).rename("C_chk_new"),
-                        minian_data_path,
-                        overwrite=True,
+                    from minian.initialization import (
+                        initA,
+                        initC,
+                        ks_refine,
+                        pnr_refine,
+                        seeds_init,
+                        seeds_merge,
                     )
-                    logger.info(f"Units after first spatial update: {A_new.sizes['unit_id']}")
-
-                    # Update background after first spatial
-                    logger.info("Updating background...")
-                    b_new, f_new = update_background(Y_fm_chk, A_new, C_chk_new)
-
-                    # ----- First Temporal Update -----
-                    logger.info("CNMF Iteration 1: Temporal update...")
-                    param_first_temporal = params.get(
-                        "param_first_temporal",
-                        {
-                            "noise_freq": 0.06,
-                            "sparse_penal": 1,
-                            "p": 1,
-                            "add_lag": 20,
-                            "jac_thres": 0.2,
-                        },
+                    from minian.motion_correction import apply_transform, estimate_motion
+                    from minian.preprocessing import denoise, remove_background
+                    from minian.utilities import (
+                        TaskAnnotation,
+                        get_optimal_chk,
+                        load_videos,
+                        save_minian,
                     )
-                    C_new, S_new, b0_new, c0_new, g, mask = update_temporal(
-                        A_new, C_new,
-                        Y=Y_fm_chk,
-                        b=b_new, f=f_new,
-                        **param_first_temporal
+                    from minian.visualization import write_video
+    
+                    # ===== CONTAINER-AWARE MEMORY DETECTION =====
+                    def get_container_memory_limit():
+                        """Get memory limit respecting container cgroups (v1 and v2)."""
+                        try:
+                            with open("/sys/fs/cgroup/memory.max") as f:
+                                limit = f.read().strip()
+                                if limit != "max":
+                                    return int(limit)
+                        except (FileNotFoundError, PermissionError):
+                            pass
+                        try:
+                            with open("/sys/fs/cgroup/memory/memory.limit_in_bytes") as f:
+                                limit = int(f.read().strip())
+                                if limit < 9223372036854771712:
+                                    return limit
+                        except (FileNotFoundError, PermissionError):
+                            pass
+                        return psutil.virtual_memory().total
+    
+                    # ===== DASK CLUSTER CONFIGURATION =====
+                    memory_total = get_container_memory_limit()
+                    n_workers = int(
+                        os.getenv(
+                            "MINIAN_NWORKERS",
+                            max(1, min(int(multiprocessing.cpu_count() * 0.4), 8)),
+                        )
                     )
-                    logger.info(f"Units after first temporal update: {C_new.sizes['unit_id']}")
-                    C_new = sanitize_array(C_new, "C_new_pre_merge")
-                    S_new = sanitize_array(S_new, "S_new_pre_merge")
-                    # ----- First Merge -----
-                    logger.info("CNMF Iteration 1: Merging units...")
-                    param_first_merge = params.get("param_first_merge", {"thres_corr": 0.8})
-
-                    # <<< PATCHED: unit_merge coordinate fix
-                    # update_temporal returns C_new/S_new already filtered by mask internally.
-                    # Sync A with C_new's actual coordinates instead of using the boolean mask,
-                    # which can cause alignment mismatches on large datasets.
-                    A_filtered = A_new.sel(unit_id=C_new.coords["unit_id"].values)
+                    memory_per_worker = int(memory_total * 0.8 / n_workers)
+    
+                    memory_limit_env = os.getenv("MINIAN_MEMORY_LIMIT")
+                    if memory_limit_env:
+                        memory_limit = (
+                            memory_limit_env
+                            if any(c.isalpha() for c in memory_limit_env)
+                            else f"{memory_limit_env}GB"
+                        )
+                    else:
+                        memory_limit = f"{memory_per_worker // (1024**3)}GB"
+    
+                    # Set minian intermediate storage paths
+                    minian_data_path = str(output_dir / "minian_data")
+                    os.makedirs(minian_data_path, exist_ok=True)
+                    os.environ["MINIAN_INTERMEDIATE"] = minian_data_path
+    
+                    # Helper function to clean intermediate files
+                    def clean_intermediate_files(directory_path, file_names):
+                        """Remove intermediate zarr files to avoid conflicts."""
+                        for fname in file_names:
+                            path = os.path.join(directory_path, f"{fname}.zarr")
+                            if os.path.exists(path):
+                                shutil.rmtree(path)
+    
+                    def sanitize_array(arr, name="array"):
+                        """Replace NaN/Inf in an xarray DataArray with 0.
+                        Works with both lazy (dask-backed) and in-memory arrays."""
+                        logger.info(f"Sanitizing {name} (replacing NaN/Inf with 0)...")
+                        if hasattr(arr.data, "dask"):
+                            sanitized = xr.apply_ufunc(
+                                lambda x: np.nan_to_num(
+                                    x, nan=0.0, posinf=0.0, neginf=0.0
+                                ),
+                                arr,
+                                dask="parallelized",
+                                output_dtypes=[arr.dtype],
+                            )
+                        else:
+                            sanitized = xr.DataArray(
+                                np.nan_to_num(
+                                    arr.values, nan=0.0, posinf=0.0, neginf=0.0
+                                ),
+                                coords=arr.coords,
+                                dims=arr.dims,
+                            )
+                        return sanitized
+    
+                    # Start Dask cluster
                     logger.info(
-                        f"Unit sync check — A: {A_filtered.sizes['unit_id']}, "
-                        f"C: {C_new.sizes['unit_id']}, S: {S_new.sizes['unit_id']}"
-                    )
-                    A_mrg, C_mrg, [S_mrg] = unit_merge(
-                        A_filtered, C_new, [S_new], **param_first_merge
-                    )
-                    # >>> END PATCH
-                    logger.info(f"Units after first merge: {A_mrg.sizes['unit_id']}")
-                    A_mrg = save_minian(A_mrg.rename("A_mrg"), minian_data_path, overwrite=True)
-                    C_mrg = save_minian(C_mrg.rename("C_mrg"), minian_data_path, overwrite=True)
-                    S_mrg = save_minian(S_mrg.rename("S_mrg"), minian_data_path, overwrite=True)
-                    gc.collect()
-                    logger.info("Saved merged results to zarr for iteration 2")
-
-                    # ========================================
-                    # CNMF ITERATION 2
-                    # ========================================
-
-                    # ----- Second Spatial Update -----
-                    logger.info("CNMF Iteration 2: Spatial update...")
-                    param_second_spatial = params.get(
-                        "param_second_spatial",
-                        {"dl_wnd": 10, "sparse_penal": 0.01, "size_thres": (25, None)},
+                        f"Starting Minian processing with {n_workers} workers, "
+                        f"{memory_limit} memory limit per worker..."
                     )
-                    A_new2, mask2, norm_fac2 = update_spatial(
-                        Y_hw_chk, A_mrg, C_mrg, sn_spatial, **param_second_spatial
+                    cluster = LocalCluster(
+                        n_workers=n_workers,
+                        memory_limit=memory_limit,
+                        resources={"MEM": 1},
+                        threads_per_worker=2,
+                        dashboard_address=None,
                     )
-                    C_new2 = C_mrg.sel(unit_id=mask2) * norm_fac2
-                    logger.info(f"Units after second spatial update: {A_new2.sizes['unit_id']}")
-
-                    # Update background after second spatial
-                    logger.info("Updating background...")
-                    b_new2, f_new2 = update_background(Y_fm_chk, A_new2, C_new2)
-
-                    # ----- Second Temporal Update -----
-                    logger.info("CNMF Iteration 2: Temporal update...")
-                    param_second_temporal = params.get(
-                        "param_second_temporal",
+                    annotation_plugin = TaskAnnotation()
+                    cluster.scheduler.add_plugin(annotation_plugin)
+                    client = Client(cluster)
+                    client.register_worker_plugin(SolveTriangularNaNGuard())
+    
+                    try:
+                        # ===== LOAD VIDEOS =====
+                        logger.info("Loading videos...")
+                        default_load_params = {
+                            "pattern": r".*\.avi$",
+                            "dtype": np.uint8,
+                            "downsample": dict(frame=1, height=1, width=1),
+                            "downsample_strategy": "subset",
+                        }
+                        param_load_videos = {
+                            **default_load_params,
+                            **params.get("param_load_videos", {}),
+                        }
+                        varr = load_videos(
+                            str(pathlib.Path(avi_files[0]).parent), **param_load_videos
+                        )
+                        chk, _ = get_optimal_chk(varr, dtype=float)
+    
+                        # Save raw video to zarr
+                        logger.info("Saving raw video to zarr...")
+                        varr = save_minian(
+                            varr.chunk({"frame": chk["frame"], "height": -1, "width": -1}).rename("varr"),
+                            minian_data_path,
+                            overwrite=True,
+                        )
+                        logger.info(
+                            f"Loaded video: {varr.sizes['frame']} frames, "
+                            f"{varr.sizes['height']}x{varr.sizes['width']} pixels"
+                        )
+    
+                        # ===== PREPROCESSING =====
+                        logger.info("Preprocessing: glow removal...")
+                        varr_min = varr.min("frame").compute()
+                        varr_ref = varr - varr_min
+                        varr_ref = varr_ref.clip(min=0)
+    
+                        logger.info("Preprocessing: denoising...")
+                        param_denoise = params.get(
+                            "param_denoise", {"method": "median", "ksize": 7}
+                        )
+                        varr_ref = denoise(varr_ref, **param_denoise)
+    
+                        logger.info("Preprocessing: background removal...")
+                        param_background_removal = params.get(
+                            "param_background_removal", {"method": "tophat", "wnd": 10}
+                        )
+                        varr_ref = remove_background(varr_ref, **param_background_removal)
+    
+                        # Keep it chunked
+                        varr_ref = varr_ref.chunk({"frame": chk["frame"], "height": -1, "width": -1})
+    
+                        # ===== MOTION CORRECTION =====
+                        logger.info("Estimating motion...")
+                        param_estimate_motion = params.get(
+                            "param_estimate_motion", {"dim": "frame"}
+                        )
+                        motion = estimate_motion(varr_ref, **param_estimate_motion)
+                        motion = save_minian(
+                            motion.rename("motion").chunk({"frame": chk["frame"]}),
+                            minian_data_path,
+                            overwrite=True,
+                        )
+    
+                        logger.info("Applying motion correction...")
+                        Y = apply_transform(varr_ref, motion, fill=0)
+    
+                        # <<< NEW: Sanitize after motion correction >>>
+                        # apply_transform can introduce NaN at frame borders where the
+                        # shift goes beyond the image boundary. The fill=0 param should
+                        # handle this, but edge cases slip through on large datasets
+                        # with float64 precision. This prevents downstream
+                        # solve_triangular crashes in CNMF.
+                        Y = sanitize_array(Y, "Y_post_motion_correction")
+    
+                        # Save two versions with different chunking
+                        logger.info("Saving motion-corrected video (frame-chunked)...")
+                        Y_fm_chk = save_minian(
+                            Y.astype(np.float32).rename("Y_fm_chk"),
+                            minian_data_path,
+                            overwrite=True,
+                        )
+    
+                        logger.info("Saving motion-corrected video (spatial-chunked)...")
+                        Y_hw_chk = save_minian(
+                            Y_fm_chk.rename("Y_hw_chk"),
+                            minian_data_path,
+                            overwrite=True,
+                            chunks={"frame": -1, "height": chk["height"], "width": chk["width"]},
+                        )
+    
+                        # # Save motion corrected video as mp4
+                        # logger.info("Writing motion corrected video...")
+                        # write_video(Y_fm_chk, "motion_corrected.mp4", str(output_dir))
+    
+                        # Create and save max projection
+                        logger.info("Computing max projection...")
+                        max_proj = Y_fm_chk.max("frame").compute()
+                        max_proj = save_minian(max_proj.rename("max_proj"), minian_data_path, overwrite=True)
+    
+                        # ===== SEED INITIALIZATION =====
+                        logger.info("Initializing seeds...")
+                        param_seeds_init = params.get(
+                            "param_seeds_init",
+                            {
+                                "wnd_size": 1000,
+                                "method": "rolling",
+                                "stp_size": 500,
+                                "max_wnd": 15,
+                                "diff_thres": 3,
+                            },
+                        )
+                        seeds = seeds_init(Y_fm_chk, **param_seeds_init)
+                        logger.info(f"Initial seeds: {len(seeds)}")
+    
+                        logger.info("Refining seeds with PNR...")
+                        param_pnr_refine = params.get(
+                            "param_pnr_refine", {"noise_freq": 0.06, "thres": 1}
+                        )
+                        seeds, pnr, gmm = pnr_refine(Y_hw_chk, seeds, **param_pnr_refine)
+                        logger.info(f"Seeds after PNR refine: {seeds['mask_pnr'].sum()} / {len(seeds)}")
+    
+                        logger.info("Refining seeds with KS test...")
+                        param_ks_refine = params.get("param_ks_refine", {"sig": 0.05})
+                        seeds = ks_refine(Y_hw_chk, seeds, **param_ks_refine)
+                        logger.info(f"Seeds after KS refine: {seeds['mask_ks'].sum()} / {len(seeds)}")
+    
+                        logger.info("Merging seeds...")
+                        param_seeds_merge = params.get(
+                            "param_seeds_merge",
+                            {"thres_dist": 10, "thres_corr": 0.8, "noise_freq": 0.06},
+                        )
+                        seeds_final = seeds[seeds["mask_ks"] & seeds["mask_pnr"]].reset_index(drop=True)
+                        seeds_final = seeds_merge(Y_hw_chk, max_proj, seeds_final, **param_seeds_merge)
+                        n_seeds = seeds_final["mask_mrg"].sum()
+                        logger.info(f"Seeds after merge: {n_seeds} / {len(seeds_final)}")
+    
+                        if n_seeds == 0:
+                            raise ValueError(
+                                "No seeds remaining after refinement. "
+                                "Consider adjusting param_seeds_init or param_pnr_refine thresholds."
+                            )
+    
+                        # ===== INITIALIZE A AND C =====
+                        logger.info("Initializing spatial footprints (A)...")
+                        param_initialize = params.get(
+                            "param_initialize", {"thres_corr": 0.8, "wnd": 10, "noise_freq": 0.06}
+                        )
+                        A_init = initA(Y_hw_chk, seeds_final[seeds_final["mask_mrg"]], **param_initialize)
+                        A_init = save_minian(A_init.rename("A_init"), minian_data_path, overwrite=True)
+                        logger.info(f"A_init shape: {A_init.shape}")
+    
+                        logger.info("Initializing temporal traces (C)...")
+                        C_init = initC(Y_fm_chk, A_init)
+                        C_init = save_minian(
+                            C_init.rename("C_init"),
+                            minian_data_path,
+                            overwrite=True,
+                            chunks={"unit_id": 1, "frame": -1},
+                        )
+                        logger.info(f"C_init shape: {C_init.shape}")
+    
+                        # Initial unit merge
+                        logger.info("Initial unit merge...")
+                        param_init_merge = params.get("param_init_merge", {"thres_corr": 0.8})
+                        A, C = unit_merge(A_init, C_init, **param_init_merge)
+                        A = save_minian(A.rename("A"), minian_data_path, overwrite=True)
+                        C = save_minian(C.rename("C"), minian_data_path, overwrite=True)
+                        C_chk = save_minian(
+                            C.rename("C_chk"),
+                            minian_data_path,
+                            overwrite=True,
+                            chunks={"unit_id": -1, "frame": chk["frame"]},
+                        )
+                        logger.info(f"Units after initial merge: {A.sizes['unit_id']}")
+    
+                        # ===== INITIALIZE BACKGROUND =====
+                        logger.info("Initializing background terms...")
+                        b, f = update_background(Y_fm_chk, A, C_chk)
+                        f = save_minian(f.rename("f"), minian_data_path, overwrite=True)
+                        b = save_minian(b.rename("b"), minian_data_path, overwrite=True)
+                        logger.info(f"Background initialized - b: {b.shape}, f: {f.shape}")
+    
+                        # ===== COMPUTE NOISE STATISTICS =====
+                        logger.info("Computing noise statistics...")
+                        param_get_noise = params.get("param_get_noise", {"noise_range": (0.06, 0.5)})
+                        sn_spatial = get_noise_fft(Y_hw_chk, **param_get_noise)
+                        sn_spatial = save_minian(sn_spatial.rename("sn_spatial"), minian_data_path, overwrite=True)
+    
+                        # ========================================
+                        # CNMF ITERATION 1
+                        # ========================================
+    
+                        # ----- First Spatial Update -----
+                        logger.info("CNMF Iteration 1: Spatial update...")
+                        param_first_spatial = params.get(
+                            "param_first_spatial",
+                            {"dl_wnd": 10, "sparse_penal": 0.01, "size_thres": (25, None)},
+                        )
+                        A_new, mask, norm_fac = update_spatial(
+                            Y_hw_chk, A, C, sn_spatial, **param_first_spatial
+                        )
+                        C_new = save_minian(
+                            (C.sel(unit_id=mask) * norm_fac).rename("C_new"),
+                            minian_data_path,
+                            overwrite=True,
+                        )
+                        C_chk_new = save_minian(
+                            (C_chk.sel(unit_id=mask) * norm_fac).rename("C_chk_new"),
+                            minian_data_path,
+                            overwrite=True,
+                        )
+                        logger.info(f"Units after first spatial update: {A_new.sizes['unit_id']}")
+    
+                        # Update background after first spatial
+                        logger.info("Updating background...")
+                        b_new, f_new = update_background(Y_fm_chk, A_new, C_chk_new)
+    
+                        # ----- First Temporal Update -----
+                        logger.info("CNMF Iteration 1: Temporal update...")
+                        param_first_temporal = params.get(
+                            "param_first_temporal",
+                            {
+                                "noise_freq": 0.06,
+                                "sparse_penal": 1,
+                                "p": 1,
+                                "add_lag": 20,
+                                "jac_thres": 0.2,
+                            },
+                        )
+                        C_new, S_new, b0_new, c0_new, g, mask = update_temporal(
+                            A_new, C_new,
+                            Y=Y_fm_chk,
+                            b=b_new, f=f_new,
+                            **param_first_temporal
+                        )
+                        logger.info(f"Units after first temporal update: {C_new.sizes['unit_id']}")
+                        C_new = sanitize_array(C_new, "C_new_pre_merge")
+                        S_new = sanitize_array(S_new, "S_new_pre_merge")
+                        # ----- First Merge -----
+                        logger.info("CNMF Iteration 1: Merging units...")
+                        param_first_merge = params.get("param_first_merge", {"thres_corr": 0.8})
+    
+                        # <<< PATCHED: unit_merge coordinate fix
+                        # update_temporal returns C_new/S_new already filtered by mask internally.
+                        # Sync A with C_new's actual coordinates instead of using the boolean mask,
+                        # which can cause alignment mismatches on large datasets.
+                        A_filtered = A_new.sel(unit_id=C_new.coords["unit_id"].values)
+                        logger.info(
+                            f"Unit sync check — A: {A_filtered.sizes['unit_id']}, "
+                            f"C: {C_new.sizes['unit_id']}, S: {S_new.sizes['unit_id']}"
+                        )
+                        A_mrg, C_mrg, [S_mrg] = unit_merge(
+                            A_filtered, C_new, [S_new], **param_first_merge
+                        )
+                        # >>> END PATCH
+                        logger.info(f"Units after first merge: {A_mrg.sizes['unit_id']}")
+                        A_mrg = save_minian(A_mrg.rename("A_mrg"), minian_data_path, overwrite=True)
+                        C_mrg = save_minian(C_mrg.rename("C_mrg"), minian_data_path, overwrite=True)
+                        S_mrg = save_minian(S_mrg.rename("S_mrg"), minian_data_path, overwrite=True)
+                        gc.collect()
+                        logger.info("Saved merged results to zarr for iteration 2")
+    
+                        # ========================================
+                        # CNMF ITERATION 2
+                        # ========================================
+    
+                        # ----- Second Spatial Update -----
+                        logger.info("CNMF Iteration 2: Spatial update...")
+                        param_second_spatial = params.get(
+                            "param_second_spatial",
+                            {"dl_wnd": 10, "sparse_penal": 0.01, "size_thres": (25, None)},
+                        )
+                        A_new2, mask2, norm_fac2 = update_spatial(
+                            Y_hw_chk, A_mrg, C_mrg, sn_spatial, **param_second_spatial
+                        )
+                        C_new2 = C_mrg.sel(unit_id=mask2) * norm_fac2
+                        logger.info(f"Units after second spatial update: {A_new2.sizes['unit_id']}")
+    
+                        # Update background after second spatial
+                        logger.info("Updating background...")
+                        b_new2, f_new2 = update_background(Y_fm_chk, A_new2, C_new2)
+    
+                        # ----- Second Temporal Update -----
+                        logger.info("CNMF Iteration 2: Temporal update...")
+                        param_second_temporal = params.get(
+                            "param_second_temporal",
+                            {
+                                "noise_freq": 0.06,
+                                "sparse_penal": 1,
+                                "p": 1,
+                                "add_lag": 20,
+                                "jac_thres": 0.4,
+                            },
+                        )
+                        C_final, S_final, b0_final, c0_final, g_final, mask_final = update_temporal(
+                            A_new2, C_new2,
+                            Y=Y_fm_chk,
+                            b=b_new2, f=f_new2,
+                            **param_second_temporal
+                        )
+                        C_final = sanitize_array(C_final, "C_final_post_temporal_2")
+                        S_final = sanitize_array(S_final, "S_final_post_temporal_2")
+    
+                        # <<< PATCHED: unit_merge coordinate fix (same pattern as iteration 1)
+                        # Sync A with C_final's actual coordinates
+                        A_final = A_new2.sel(unit_id=C_final.coords["unit_id"].values)
+                        # >>> END PATCH
+                        logger.info(f"Final units: {A_final.sizes['unit_id']}")
+    
+                        # ===== SAVE FINAL RESULTS =====
+                        logger.info("Saving final results to output directory...")
+                        final_save_params = {"dpath": str(output_dir), "overwrite": True}
+    
+                        A_final = save_minian(A_final.rename("A"), **final_save_params)
+                        C_final = save_minian(C_final.rename("C"), **final_save_params)
+                        S_final = save_minian(S_final.rename("S"), **final_save_params)
+                        b_final = save_minian(b_new2.rename("b"), **final_save_params)
+                        f_final = save_minian(f_new2.rename("f"), **final_save_params)
+                        motion_final = save_minian(motion.rename("motion"), **final_save_params)
+                        max_proj_final = save_minian(max_proj.rename("max_proj"), **final_save_params)
+    
+                        logger.info(
+                            f"Minian processing complete. {A_final.sizes['unit_id']} units detected."
+                        )
+    
+                    except Exception as e:
+                        logger.error(f"Minian processing failed: {e}")
+                        raise e
+                    finally:
+                        client.close()
+                        cluster.close()
+    
+                    # Load results and prepare for insertion
+                    minian_loader = MinianLoader(minian_data_path)
+                    key["processing_time"] = minian_loader.creation_time
+    
+                    # Get minian version if available
+                    try:
+                        import minian
+                        key["package_version"] = getattr(minian, "__version__", "")
+                    except (ImportError, AttributeError):
+                        key["package_version"] = ""
+    
+                    file_entries = [
                         {
-                            "noise_freq": 0.06,
-                            "sparse_penal": 1,
-                            "p": 1,
-                            "add_lag": 20,
-                            "jac_thres": 0.4,
-                        },
-                    )
-                    C_final, S_final, b0_final, c0_final, g_final, mask_final = update_temporal(
-                        A_new2, C_new2,
-                        Y=Y_fm_chk,
-                        b=b_new2, f=f_new2,
-                        **param_second_temporal
-                    )
-                    C_final = sanitize_array(C_final, "C_final_post_temporal_2")
-                    S_final = sanitize_array(S_final, "S_final_post_temporal_2")
-
-                    # <<< PATCHED: unit_merge coordinate fix (same pattern as iteration 1)
-                    # Sync A with C_final's actual coordinates
-                    A_final = A_new2.sel(unit_id=C_final.coords["unit_id"].values)
-                    # >>> END PATCH
-                    logger.info(f"Final units: {A_final.sizes['unit_id']}")
-
-                    # ===== SAVE FINAL RESULTS =====
-                    logger.info("Saving final results to output directory...")
-                    final_save_params = {"dpath": str(output_dir), "overwrite": True}
-
-                    A_final = save_minian(A_final.rename("A"), **final_save_params)
-                    C_final = save_minian(C_final.rename("C"), **final_save_params)
-                    S_final = save_minian(S_final.rename("S"), **final_save_params)
-                    b_final = save_minian(b_new2.rename("b"), **final_save_params)
-                    f_final = save_minian(f_new2.rename("f"), **final_save_params)
-                    motion_final = save_minian(motion.rename("motion"), **final_save_params)
-                    max_proj_final = save_minian(max_proj.rename("max_proj"), **final_save_params)
-
-                    logger.info(
-                        f"Minian processing complete. {A_final.sizes['unit_id']} units detected."
-                    )
-
-                except Exception as e:
-                    logger.error(f"Minian processing failed: {e}")
-                    raise e
-                finally:
-                    client.close()
-                    cluster.close()
-
-                # Load results and prepare for insertion
-                minian_loader = MinianLoader(minian_data_path)
-                key["processing_time"] = minian_loader.creation_time
-
-                # Get minian version if available
-                try:
-                    import minian
-                    key["package_version"] = getattr(minian, "__version__", "")
-                except (ImportError, AttributeError):
-                    key["package_version"] = ""
-
-                file_entries = [
-                    {
-                        **key,
-                        "file_name": f.name,
-                        "file": f.as_posix(),
-                    }
-                    for f in output_dir.rglob("*")
-                    if f.is_file()
-                ]
+                            **key,
+                            "file_name": f.name,
+                            "file": f.as_posix(),
+                        }
+                        for f in output_dir.rglob("*")
+                        if f.is_file()
+                    ]
 
         else:
             raise ValueError(f"Unknown task mode: {task_mode}")

From 001822c3799ca100c8619e7faa980f3870959ad8 Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Mon, 23 Feb 2026 13:20:22 -0500
Subject: [PATCH 33/36] Replace inline minian with Docker-only execution path

Remove the ~650 lines of inline minian processing + compatibility
patches. The minian method now always runs via the py38 sibling
container. Docker image configurable via MINIAN_DOCKER_IMAGE env var.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 element_miniscope/miniscope.py | 857 ++++-----------------------------
 1 file changed, 95 insertions(+), 762 deletions(-)

diff --git a/element_miniscope/miniscope.py b/element_miniscope/miniscope.py
index 69fc3b1..1c6e96e 100644
--- a/element_miniscope/miniscope.py
+++ b/element_miniscope/miniscope.py
@@ -969,776 +969,109 @@ def _run_processing():
                 ]
 
             elif method == "minian":
-                extra_params = params.pop("extra_dj_params", {})
-                docker_image = extra_params.get("docker_image", None)
-
-                if docker_image:
-                    # === DOCKER CONTAINER EXECUTION PATH ===
-                    # Runs the original minian (Python 3.8) in a sibling container
-                    import docker as docker_sdk
-                    import json as json_mod
+                import docker as docker_sdk
+                import json as json_mod
 
-                    logger.info(
-                        f"Running minian via Docker container: {docker_image}"
-                    )
+                docker_image = os.environ.get(
+                    "MINIAN_DOCKER_IMAGE", "datajoint/minian-py38:latest"
+                )
+                logger.info(f"Running minian via Docker container: {docker_image}")
 
-                    # Resolve host-level paths for sibling container volume mounts
-                    host_s3_root = os.environ["HOST_S3_ROOT"]
-                    host_outbox = os.environ["HOST_OUTBOX"]
-                    container_raw_root = os.environ.get(
-                        "RAW_ROOT_DATA_DIR", "/home/jovyan/s3/inbox"
-                    )
-                    container_processed_root = os.environ.get(
-                        "PROCESSED_ROOT_DATA_DIR", "/home/jovyan/efs/outbox"
-                    )
+                # Resolve host-level paths for sibling container volume mounts
+                host_s3_root = os.environ["HOST_S3_ROOT"]
+                host_outbox = os.environ["HOST_OUTBOX"]
+                container_raw_root = os.environ.get(
+                    "RAW_ROOT_DATA_DIR", "/home/jovyan/s3/inbox"
+                )
+                container_processed_root = os.environ.get(
+                    "PROCESSED_ROOT_DATA_DIR", "/home/jovyan/efs/outbox"
+                )
 
-                    # Map container paths -> host paths for sibling container mounts
-                    input_dir = str(pathlib.Path(avi_files[0]).parent)
-                    input_dir_host = input_dir.replace(
-                        container_raw_root, host_s3_root + "/inbox", 1
-                    )
-                    output_dir_host = str(output_dir).replace(
-                        container_processed_root, host_outbox, 1
-                    )
+                # Map container paths -> host paths for sibling container mounts
+                input_dir = str(pathlib.Path(avi_files[0]).parent)
+                input_dir_host = input_dir.replace(
+                    container_raw_root, host_s3_root + "/inbox", 1
+                )
+                output_dir_host = str(output_dir).replace(
+                    container_processed_root, host_outbox, 1
+                )
 
-                    # Write config JSON to shared filesystem
-                    n_workers = int(os.getenv("MINIAN_NWORKERS", 2))
-                    memory_limit_env = os.getenv("MINIAN_MEMORY_LIMIT", "4")
-                    memory_limit = (
-                        memory_limit_env
-                        if any(c.isalpha() for c in memory_limit_env)
-                        else f"{memory_limit_env}GB"
-                    )
-                    config = {
-                        "input_dir": "/data/input",
-                        "output_dir": "/data/output",
-                        "intermediate_dir": "/data/output/minian_data",
-                        "params": params,
-                        "n_workers": n_workers,
-                        "memory_limit": memory_limit,
-                    }
-                    config_path = output_dir / "minian_config.json"
-                    with open(config_path, "w") as f:
-                        json_mod.dump(config, f, indent=2, default=str)
-
-                    # Spawn sibling container
-                    client = docker_sdk.from_env()
-                    container_result = client.containers.run(
-                        image=docker_image,
-                        command=[
-                            "python",
-                            "/opt/run_minian.py",
-                            "/data/output/minian_config.json",
-                        ],
-                        volumes={
-                            input_dir_host: {"bind": "/data/input", "mode": "ro"},
-                            output_dir_host: {"bind": "/data/output", "mode": "rw"},
-                        },
-                        mem_limit=extra_params.get("container_mem_limit", "24g"),
-                        environment={
-                            "MINIAN_NWORKERS": str(n_workers),
-                            "MINIAN_MEMORY_LIMIT": memory_limit_env,
-                            "MKL_NUM_THREADS": "1",
-                            "OPENBLAS_NUM_THREADS": "1",
-                            "OMP_NUM_THREADS": "1",
-                        },
-                        remove=True,
-                        detach=False,
-                        stdout=True,
-                        stderr=True,
+                # Write config JSON to shared filesystem
+                n_workers = int(os.getenv("MINIAN_NWORKERS", 2))
+                memory_limit_env = os.getenv("MINIAN_MEMORY_LIMIT", "4")
+                memory_limit = (
+                    memory_limit_env
+                    if any(c.isalpha() for c in memory_limit_env)
+                    else f"{memory_limit_env}GB"
+                )
+                container_mem_limit = os.getenv("MINIAN_CONTAINER_MEM_LIMIT", "24g")
+
+                config = {
+                    "input_dir": "/data/input",
+                    "output_dir": "/data/output",
+                    "intermediate_dir": "/data/output/minian_data",
+                    "params": params,
+                    "n_workers": n_workers,
+                    "memory_limit": memory_limit,
+                }
+                config_path = output_dir / "minian_config.json"
+                with open(config_path, "w") as f:
+                    json_mod.dump(config, f, indent=2, default=str)
+
+                # Spawn sibling container
+                docker_client = docker_sdk.from_env()
+                container_result = docker_client.containers.run(
+                    image=docker_image,
+                    command=[
+                        "python",
+                        "/opt/run_minian.py",
+                        "/data/output/minian_config.json",
+                    ],
+                    volumes={
+                        input_dir_host: {"bind": "/data/input", "mode": "ro"},
+                        output_dir_host: {"bind": "/data/output", "mode": "rw"},
+                    },
+                    mem_limit=container_mem_limit,
+                    environment={
+                        "MINIAN_NWORKERS": str(n_workers),
+                        "MINIAN_MEMORY_LIMIT": memory_limit_env,
+                        "MKL_NUM_THREADS": "1",
+                        "OPENBLAS_NUM_THREADS": "1",
+                        "OMP_NUM_THREADS": "1",
+                    },
+                    remove=True,
+                    detach=False,
+                    stdout=True,
+                    stderr=True,
+                )
+                logger.info(f"Container output:\n{container_result.decode()}")
+
+                # Verify completion marker
+                if (output_dir / ".minian_error").exists():
+                    with open(output_dir / ".minian_error") as f:
+                        err = json_mod.load(f)
+                    raise RuntimeError(
+                        f"Minian container error: {err.get('error')}"
                     )
-                    logger.info(
-                        f"Container output:\n{container_result.decode()}"
+                if not (output_dir / ".minian_complete").exists():
+                    raise RuntimeError(
+                        "Minian container exited without completion marker"
                     )
 
-                    # Verify completion marker
-                    if (output_dir / ".minian_error").exists():
-                        with open(output_dir / ".minian_error") as f:
-                            err = json_mod.load(f)
-                        raise RuntimeError(
-                            f"Minian container error: {err.get('error')}"
-                        )
-                    if not (output_dir / ".minian_complete").exists():
-                        raise RuntimeError(
-                            "Minian container exited without completion marker"
-                        )
-
-                    # Load results (reuses existing MinianLoader)
-                    minian_loader = MinianLoader(str(output_dir))
-                    key["processing_time"] = minian_loader.creation_time
-                    key["package_version"] = "1.2.1-py38-docker"
-
-                    file_entries = [
-                        {
-                            **key,
-                            "file_name": f.name,
-                            "file": f.as_posix(),
-                        }
-                        for f in output_dir.rglob("*")
-                        if f.is_file()
-                    ]
+                # Load results
+                minian_loader = MinianLoader(str(output_dir))
+                key["processing_time"] = minian_loader.creation_time
+                key["package_version"] = "1.2.1-py38-docker"
 
-                else:
-                    # === EXISTING INLINE EXECUTION PATH ===
-                    import multiprocessing
-                    import psutil
-                    import shutil
-                    import dask
-                    import dask.array as darr
-                    import scipy.linalg                          # <<< NEW: for solve_triangular patch
-                    import xarray as xr                          # <<< NEW: for sanitize_array
-                    from dask.distributed import Client, LocalCluster
-    
-                    # <<< NEW: Prevent infinite retry loops on NaN/Inf errors >>>
-                    # If a Dask task fails (e.g. solve_triangular ValueError), don't
-                    # keep retrying until OOM — fail fast after 3 attempts.
-                    dask.config.set({
-                        "distributed.scheduler.allowed-failures": 3,
-                        "distributed.comm.timeouts.connect": "300s",
-                        "distributed.comm.timeouts.tcp": "7200s",
-                        "distributed.scheduler.work-stealing": False,
-                        "distributed.scheduler.worker-ttl": "7200s",    # ← ADD THIS: 1 hour before declaring worker dead
-                    })
-    
-                    # ===== APPLY COMPATIBILITY PATCHES =====
-                    
-                    # Fix for NetworkX 3.0+ API changes
-                    import networkx as nx
-                    import scipy.sparse
-                    from scipy.sparse import issparse
-                    import minian.cnmf as cnmf_module
-    
-                    def label_connected_fixed(adj, only_connected=False):
-                        """Fixed label_connected for NetworkX 3.0+ compatibility."""
-                        if issparse(adj):
-                            adj = adj.toarray()
-                        adj = adj.copy()
-                        np.fill_diagonal(adj, 0)
-                        adj = np.triu(adj)
-                        g = nx.from_numpy_array(adj)
-                        labels = np.zeros(adj.shape[0], dtype=int)
-                        for icomp, comp in enumerate(nx.connected_components(g)):
-                            for node in comp:
-                                labels[node] = icomp
-                        if only_connected:
-                            iso_mask = np.array([len(c) == 1 for c in nx.connected_components(g)])
-                            labels[np.isin(labels, np.where(iso_mask)[0])] = -1
-                        return labels
-    
-                    cnmf_module.label_connected = label_connected_fixed
-    
-                    # Fix for sparse array auto-densification
-                    import sparse
-                    import sparse.numba_backend._sparse_array as sparse_mod
-                    sparse_mod.AUTO_DENSIFY = True
-    
-                    # Fix for darr.block with mixed sparse array types
-                    _original_darr_block = darr.block
-    
-                    def patched_darr_block(arrays, allow_unknown_chunksizes=False):
-                        """Patched darr.block that ensures sparse array type consistency."""
-                        def convert_to_coo(arr):
-                            if arr is None:
-                                return arr
-                            if isinstance(arr, sparse.COO):
-                                return arr
-                            if isinstance(arr, sparse.SparseArray):
-                                return sparse.COO(arr)
-                            if isinstance(arr, np.ndarray):
-                                return sparse.COO.from_numpy(arr)
-                            if hasattr(arr, 'todense'):
-                                return sparse.COO.from_numpy(np.asarray(arr.todense()))
-                            return arr
-    
-                        def recursive_convert(obj):
-                            if isinstance(obj, list):
-                                return [recursive_convert(item) for item in obj]
-                            elif isinstance(obj, np.ndarray) and obj.dtype == object:
-                                result = np.empty_like(obj)
-                                for idx in np.ndindex(obj.shape):
-                                    result[idx] = convert_to_coo(obj[idx])
-                                return result
-                            else:
-                                return convert_to_coo(obj)
-    
-                        try:
-                            return _original_darr_block(arrays, allow_unknown_chunksizes=allow_unknown_chunksizes)
-                        except ValueError as e:
-                            if "All arrays must be instances of SparseArray" in str(e):
-                                converted = recursive_convert(arrays)
-                                return _original_darr_block(converted, allow_unknown_chunksizes=allow_unknown_chunksizes)
-                            raise
-    
-                    darr.block = patched_darr_block
-                    darr.core.block = patched_darr_block
-    
-                    # ===== PATCH: sparse/dense concatenation compatibility =====
-                    import sparse
-                    import sparse.numba_backend._coo.common as _sparse_coo_common
-                    import numpy as np
-    
-                    _original_sparse_concat = _sparse_coo_common.concatenate
-    
-                    def _patched_sparse_concat(arrays, axis=0):
-                        """Convert any dense arrays to sparse.COO before concatenation."""
-                        converted = []
-                        for arr in arrays:
-                            if isinstance(arr, sparse.SparseArray):
-                                converted.append(arr)
-                            elif isinstance(arr, np.ndarray):
-                                converted.append(sparse.COO.from_numpy(arr))
-                            else:
-                                try:
-                                    converted.append(sparse.COO.from_numpy(np.asarray(arr)))
-                                except Exception:
-                                    converted.append(arr)
-                        return _original_sparse_concat(converted, axis=axis)
-    
-                    _sparse_coo_common.concatenate = _patched_sparse_concat
-    
-                    # ===== PATCH: sparse/dense concatenation compatibility =====
-                    import sparse
-                    import numpy as np
-                    import sparse.numba_backend._coo.common as _sparse_coo_common
-    
-                    _original_check = _sparse_coo_common.check_consistent_fill_value
-    
-                    def _patched_check(arrays):
-                        """Auto-convert any dense arrays to sparse.COO before validation."""
-                        for i, arr in enumerate(arrays):
-                            if not isinstance(arr, sparse.SparseArray):
-                                arrays[i] = sparse.COO.from_numpy(np.asarray(arr))
-                        return _original_check(arrays)
-    
-                    _sparse_coo_common.check_consistent_fill_value = _patched_check
-                    # Patch update_temporal to handle sparse.COO arrays
-                    _original_update_temporal = cnmf_module.update_temporal
-    
-                    def patched_update_temporal(A, C, b=None, f=None, Y=None, YrA=None, 
-                                                noise_freq=0.25, p=2, add_lag="p", jac_thres=0.1, 
-                                                sparse_penal=1, bseg=None, med_wd=None, 
-                                                zero_thres=1e-8, max_iters=200, use_smooth=True, 
-                                                normalize=True, warm_start=False, post_scal=False, 
-                                                scs_fallback=False, concurrent_update=False):
-                        """Patched update_temporal that handles sparse.COO arrays properly."""
-                        _original_csc_matrix = scipy.sparse.csc_matrix
-    
-                        class PatchedCSCMatrix(scipy.sparse.csc_matrix):
-                            def __new__(cls, arg1, shape=None, dtype=None, copy=False):
-                                if hasattr(arg1, 'todense'):
-                                    arg1 = arg1.todense()
-                                return _original_csc_matrix(arg1, shape=shape, dtype=dtype, copy=copy)
-    
-                        scipy.sparse.csc_matrix = PatchedCSCMatrix
-                        try:
-                            result = _original_update_temporal(
-                                A, C, b=b, f=f, Y=Y, YrA=YrA, noise_freq=noise_freq,
-                                p=p, add_lag=add_lag, jac_thres=jac_thres, sparse_penal=sparse_penal,
-                                bseg=bseg, med_wd=med_wd, zero_thres=zero_thres, max_iters=max_iters,
-                                use_smooth=use_smooth, normalize=normalize, warm_start=warm_start,
-                                post_scal=post_scal, scs_fallback=scs_fallback, 
-                                concurrent_update=concurrent_update
-                            )
-                        finally:
-                            scipy.sparse.csc_matrix = _original_csc_matrix
-                        return result
-    
-                    cnmf_module.update_temporal = patched_update_temporal
-    
-                    # <<< NEW PATCH: scipy.linalg.solve_triangular NaN/Inf guard >>>
-                    # Prevents ValueError('array must not contain infs or NaNs') from
-                    # crashing Dask tasks and triggering infinite retry → OOM loops.
-                    from distributed import WorkerPlugin
-    
-                    class SolveTriangularNaNGuard(WorkerPlugin):
-                        """Patch scipy.linalg.solve_triangular on each Dask worker process
-                        to sanitize NaN/Inf inputs instead of crashing."""
-                        
-                        def setup(self, worker):
-                            import scipy.linalg
-                            import numpy as np
-                            
-                            _original = scipy.linalg.solve_triangular
-                            
-                            def patched_solve_triangular(a, b, **kwargs):
-                                a = np.nan_to_num(a, nan=0.0, posinf=0.0, neginf=0.0)
-                                b = np.nan_to_num(b, nan=0.0, posinf=0.0, neginf=0.0)
-                                return _original(a, b, **kwargs)
-                            
-                            scipy.linalg.solve_triangular = patched_solve_triangular
-    
-                    logger.info("Applied minian compatibility patches (NetworkX, sparse arrays, dask.block, solve_triangular NaN guard)")
-    
-                    # Now import minian modules (after patches are applied)
-                    from minian.cnmf import (
-                        get_noise_fft,
-                        unit_merge,
-                        update_spatial,
-                        update_temporal,
-                        update_background,
-                    )
-                    from minian.initialization import (
-                        initA,
-                        initC,
-                        ks_refine,
-                        pnr_refine,
-                        seeds_init,
-                        seeds_merge,
-                    )
-                    from minian.motion_correction import apply_transform, estimate_motion
-                    from minian.preprocessing import denoise, remove_background
-                    from minian.utilities import (
-                        TaskAnnotation,
-                        get_optimal_chk,
-                        load_videos,
-                        save_minian,
-                    )
-                    from minian.visualization import write_video
-    
-                    # ===== CONTAINER-AWARE MEMORY DETECTION =====
-                    def get_container_memory_limit():
-                        """Get memory limit respecting container cgroups (v1 and v2)."""
-                        try:
-                            with open("/sys/fs/cgroup/memory.max") as f:
-                                limit = f.read().strip()
-                                if limit != "max":
-                                    return int(limit)
-                        except (FileNotFoundError, PermissionError):
-                            pass
-                        try:
-                            with open("/sys/fs/cgroup/memory/memory.limit_in_bytes") as f:
-                                limit = int(f.read().strip())
-                                if limit < 9223372036854771712:
-                                    return limit
-                        except (FileNotFoundError, PermissionError):
-                            pass
-                        return psutil.virtual_memory().total
-    
-                    # ===== DASK CLUSTER CONFIGURATION =====
-                    memory_total = get_container_memory_limit()
-                    n_workers = int(
-                        os.getenv(
-                            "MINIAN_NWORKERS",
-                            max(1, min(int(multiprocessing.cpu_count() * 0.4), 8)),
-                        )
-                    )
-                    memory_per_worker = int(memory_total * 0.8 / n_workers)
-    
-                    memory_limit_env = os.getenv("MINIAN_MEMORY_LIMIT")
-                    if memory_limit_env:
-                        memory_limit = (
-                            memory_limit_env
-                            if any(c.isalpha() for c in memory_limit_env)
-                            else f"{memory_limit_env}GB"
-                        )
-                    else:
-                        memory_limit = f"{memory_per_worker // (1024**3)}GB"
-    
-                    # Set minian intermediate storage paths
-                    minian_data_path = str(output_dir / "minian_data")
-                    os.makedirs(minian_data_path, exist_ok=True)
-                    os.environ["MINIAN_INTERMEDIATE"] = minian_data_path
-    
-                    # Helper function to clean intermediate files
-                    def clean_intermediate_files(directory_path, file_names):
-                        """Remove intermediate zarr files to avoid conflicts."""
-                        for fname in file_names:
-                            path = os.path.join(directory_path, f"{fname}.zarr")
-                            if os.path.exists(path):
-                                shutil.rmtree(path)
-    
-                    def sanitize_array(arr, name="array"):
-                        """Replace NaN/Inf in an xarray DataArray with 0.
-                        Works with both lazy (dask-backed) and in-memory arrays."""
-                        logger.info(f"Sanitizing {name} (replacing NaN/Inf with 0)...")
-                        if hasattr(arr.data, "dask"):
-                            sanitized = xr.apply_ufunc(
-                                lambda x: np.nan_to_num(
-                                    x, nan=0.0, posinf=0.0, neginf=0.0
-                                ),
-                                arr,
-                                dask="parallelized",
-                                output_dtypes=[arr.dtype],
-                            )
-                        else:
-                            sanitized = xr.DataArray(
-                                np.nan_to_num(
-                                    arr.values, nan=0.0, posinf=0.0, neginf=0.0
-                                ),
-                                coords=arr.coords,
-                                dims=arr.dims,
-                            )
-                        return sanitized
-    
-                    # Start Dask cluster
-                    logger.info(
-                        f"Starting Minian processing with {n_workers} workers, "
-                        f"{memory_limit} memory limit per worker..."
-                    )
-                    cluster = LocalCluster(
-                        n_workers=n_workers,
-                        memory_limit=memory_limit,
-                        resources={"MEM": 1},
-                        threads_per_worker=2,
-                        dashboard_address=None,
-                    )
-                    annotation_plugin = TaskAnnotation()
-                    cluster.scheduler.add_plugin(annotation_plugin)
-                    client = Client(cluster)
-                    client.register_worker_plugin(SolveTriangularNaNGuard())
-    
-                    try:
-                        # ===== LOAD VIDEOS =====
-                        logger.info("Loading videos...")
-                        default_load_params = {
-                            "pattern": r".*\.avi$",
-                            "dtype": np.uint8,
-                            "downsample": dict(frame=1, height=1, width=1),
-                            "downsample_strategy": "subset",
-                        }
-                        param_load_videos = {
-                            **default_load_params,
-                            **params.get("param_load_videos", {}),
-                        }
-                        varr = load_videos(
-                            str(pathlib.Path(avi_files[0]).parent), **param_load_videos
-                        )
-                        chk, _ = get_optimal_chk(varr, dtype=float)
-    
-                        # Save raw video to zarr
-                        logger.info("Saving raw video to zarr...")
-                        varr = save_minian(
-                            varr.chunk({"frame": chk["frame"], "height": -1, "width": -1}).rename("varr"),
-                            minian_data_path,
-                            overwrite=True,
-                        )
-                        logger.info(
-                            f"Loaded video: {varr.sizes['frame']} frames, "
-                            f"{varr.sizes['height']}x{varr.sizes['width']} pixels"
-                        )
-    
-                        # ===== PREPROCESSING =====
-                        logger.info("Preprocessing: glow removal...")
-                        varr_min = varr.min("frame").compute()
-                        varr_ref = varr - varr_min
-                        varr_ref = varr_ref.clip(min=0)
-    
-                        logger.info("Preprocessing: denoising...")
-                        param_denoise = params.get(
-                            "param_denoise", {"method": "median", "ksize": 7}
-                        )
-                        varr_ref = denoise(varr_ref, **param_denoise)
-    
-                        logger.info("Preprocessing: background removal...")
-                        param_background_removal = params.get(
-                            "param_background_removal", {"method": "tophat", "wnd": 10}
-                        )
-                        varr_ref = remove_background(varr_ref, **param_background_removal)
-    
-                        # Keep it chunked
-                        varr_ref = varr_ref.chunk({"frame": chk["frame"], "height": -1, "width": -1})
-    
-                        # ===== MOTION CORRECTION =====
-                        logger.info("Estimating motion...")
-                        param_estimate_motion = params.get(
-                            "param_estimate_motion", {"dim": "frame"}
-                        )
-                        motion = estimate_motion(varr_ref, **param_estimate_motion)
-                        motion = save_minian(
-                            motion.rename("motion").chunk({"frame": chk["frame"]}),
-                            minian_data_path,
-                            overwrite=True,
-                        )
-    
-                        logger.info("Applying motion correction...")
-                        Y = apply_transform(varr_ref, motion, fill=0)
-    
-                        # <<< NEW: Sanitize after motion correction >>>
-                        # apply_transform can introduce NaN at frame borders where the
-                        # shift goes beyond the image boundary. The fill=0 param should
-                        # handle this, but edge cases slip through on large datasets
-                        # with float64 precision. This prevents downstream
-                        # solve_triangular crashes in CNMF.
-                        Y = sanitize_array(Y, "Y_post_motion_correction")
-    
-                        # Save two versions with different chunking
-                        logger.info("Saving motion-corrected video (frame-chunked)...")
-                        Y_fm_chk = save_minian(
-                            Y.astype(np.float32).rename("Y_fm_chk"),
-                            minian_data_path,
-                            overwrite=True,
-                        )
-    
-                        logger.info("Saving motion-corrected video (spatial-chunked)...")
-                        Y_hw_chk = save_minian(
-                            Y_fm_chk.rename("Y_hw_chk"),
-                            minian_data_path,
-                            overwrite=True,
-                            chunks={"frame": -1, "height": chk["height"], "width": chk["width"]},
-                        )
-    
-                        # # Save motion corrected video as mp4
-                        # logger.info("Writing motion corrected video...")
-                        # write_video(Y_fm_chk, "motion_corrected.mp4", str(output_dir))
-    
-                        # Create and save max projection
-                        logger.info("Computing max projection...")
-                        max_proj = Y_fm_chk.max("frame").compute()
-                        max_proj = save_minian(max_proj.rename("max_proj"), minian_data_path, overwrite=True)
-    
-                        # ===== SEED INITIALIZATION =====
-                        logger.info("Initializing seeds...")
-                        param_seeds_init = params.get(
-                            "param_seeds_init",
-                            {
-                                "wnd_size": 1000,
-                                "method": "rolling",
-                                "stp_size": 500,
-                                "max_wnd": 15,
-                                "diff_thres": 3,
-                            },
-                        )
-                        seeds = seeds_init(Y_fm_chk, **param_seeds_init)
-                        logger.info(f"Initial seeds: {len(seeds)}")
-    
-                        logger.info("Refining seeds with PNR...")
-                        param_pnr_refine = params.get(
-                            "param_pnr_refine", {"noise_freq": 0.06, "thres": 1}
-                        )
-                        seeds, pnr, gmm = pnr_refine(Y_hw_chk, seeds, **param_pnr_refine)
-                        logger.info(f"Seeds after PNR refine: {seeds['mask_pnr'].sum()} / {len(seeds)}")
-    
-                        logger.info("Refining seeds with KS test...")
-                        param_ks_refine = params.get("param_ks_refine", {"sig": 0.05})
-                        seeds = ks_refine(Y_hw_chk, seeds, **param_ks_refine)
-                        logger.info(f"Seeds after KS refine: {seeds['mask_ks'].sum()} / {len(seeds)}")
-    
-                        logger.info("Merging seeds...")
-                        param_seeds_merge = params.get(
-                            "param_seeds_merge",
-                            {"thres_dist": 10, "thres_corr": 0.8, "noise_freq": 0.06},
-                        )
-                        seeds_final = seeds[seeds["mask_ks"] & seeds["mask_pnr"]].reset_index(drop=True)
-                        seeds_final = seeds_merge(Y_hw_chk, max_proj, seeds_final, **param_seeds_merge)
-                        n_seeds = seeds_final["mask_mrg"].sum()
-                        logger.info(f"Seeds after merge: {n_seeds} / {len(seeds_final)}")
-    
-                        if n_seeds == 0:
-                            raise ValueError(
-                                "No seeds remaining after refinement. "
-                                "Consider adjusting param_seeds_init or param_pnr_refine thresholds."
-                            )
-    
-                        # ===== INITIALIZE A AND C =====
-                        logger.info("Initializing spatial footprints (A)...")
-                        param_initialize = params.get(
-                            "param_initialize", {"thres_corr": 0.8, "wnd": 10, "noise_freq": 0.06}
-                        )
-                        A_init = initA(Y_hw_chk, seeds_final[seeds_final["mask_mrg"]], **param_initialize)
-                        A_init = save_minian(A_init.rename("A_init"), minian_data_path, overwrite=True)
-                        logger.info(f"A_init shape: {A_init.shape}")
-    
-                        logger.info("Initializing temporal traces (C)...")
-                        C_init = initC(Y_fm_chk, A_init)
-                        C_init = save_minian(
-                            C_init.rename("C_init"),
-                            minian_data_path,
-                            overwrite=True,
-                            chunks={"unit_id": 1, "frame": -1},
-                        )
-                        logger.info(f"C_init shape: {C_init.shape}")
-    
-                        # Initial unit merge
-                        logger.info("Initial unit merge...")
-                        param_init_merge = params.get("param_init_merge", {"thres_corr": 0.8})
-                        A, C = unit_merge(A_init, C_init, **param_init_merge)
-                        A = save_minian(A.rename("A"), minian_data_path, overwrite=True)
-                        C = save_minian(C.rename("C"), minian_data_path, overwrite=True)
-                        C_chk = save_minian(
-                            C.rename("C_chk"),
-                            minian_data_path,
-                            overwrite=True,
-                            chunks={"unit_id": -1, "frame": chk["frame"]},
-                        )
-                        logger.info(f"Units after initial merge: {A.sizes['unit_id']}")
-    
-                        # ===== INITIALIZE BACKGROUND =====
-                        logger.info("Initializing background terms...")
-                        b, f = update_background(Y_fm_chk, A, C_chk)
-                        f = save_minian(f.rename("f"), minian_data_path, overwrite=True)
-                        b = save_minian(b.rename("b"), minian_data_path, overwrite=True)
-                        logger.info(f"Background initialized - b: {b.shape}, f: {f.shape}")
-    
-                        # ===== COMPUTE NOISE STATISTICS =====
-                        logger.info("Computing noise statistics...")
-                        param_get_noise = params.get("param_get_noise", {"noise_range": (0.06, 0.5)})
-                        sn_spatial = get_noise_fft(Y_hw_chk, **param_get_noise)
-                        sn_spatial = save_minian(sn_spatial.rename("sn_spatial"), minian_data_path, overwrite=True)
-    
-                        # ========================================
-                        # CNMF ITERATION 1
-                        # ========================================
-    
-                        # ----- First Spatial Update -----
-                        logger.info("CNMF Iteration 1: Spatial update...")
-                        param_first_spatial = params.get(
-                            "param_first_spatial",
-                            {"dl_wnd": 10, "sparse_penal": 0.01, "size_thres": (25, None)},
-                        )
-                        A_new, mask, norm_fac = update_spatial(
-                            Y_hw_chk, A, C, sn_spatial, **param_first_spatial
-                        )
-                        C_new = save_minian(
-                            (C.sel(unit_id=mask) * norm_fac).rename("C_new"),
-                            minian_data_path,
-                            overwrite=True,
-                        )
-                        C_chk_new = save_minian(
-                            (C_chk.sel(unit_id=mask) * norm_fac).rename("C_chk_new"),
-                            minian_data_path,
-                            overwrite=True,
-                        )
-                        logger.info(f"Units after first spatial update: {A_new.sizes['unit_id']}")
-    
-                        # Update background after first spatial
-                        logger.info("Updating background...")
-                        b_new, f_new = update_background(Y_fm_chk, A_new, C_chk_new)
-    
-                        # ----- First Temporal Update -----
-                        logger.info("CNMF Iteration 1: Temporal update...")
-                        param_first_temporal = params.get(
-                            "param_first_temporal",
-                            {
-                                "noise_freq": 0.06,
-                                "sparse_penal": 1,
-                                "p": 1,
-                                "add_lag": 20,
-                                "jac_thres": 0.2,
-                            },
-                        )
-                        C_new, S_new, b0_new, c0_new, g, mask = update_temporal(
-                            A_new, C_new,
-                            Y=Y_fm_chk,
-                            b=b_new, f=f_new,
-                            **param_first_temporal
-                        )
-                        logger.info(f"Units after first temporal update: {C_new.sizes['unit_id']}")
-                        C_new = sanitize_array(C_new, "C_new_pre_merge")
-                        S_new = sanitize_array(S_new, "S_new_pre_merge")
-                        # ----- First Merge -----
-                        logger.info("CNMF Iteration 1: Merging units...")
-                        param_first_merge = params.get("param_first_merge", {"thres_corr": 0.8})
-    
-                        # <<< PATCHED: unit_merge coordinate fix
-                        # update_temporal returns C_new/S_new already filtered by mask internally.
-                        # Sync A with C_new's actual coordinates instead of using the boolean mask,
-                        # which can cause alignment mismatches on large datasets.
-                        A_filtered = A_new.sel(unit_id=C_new.coords["unit_id"].values)
-                        logger.info(
-                            f"Unit sync check — A: {A_filtered.sizes['unit_id']}, "
-                            f"C: {C_new.sizes['unit_id']}, S: {S_new.sizes['unit_id']}"
-                        )
-                        A_mrg, C_mrg, [S_mrg] = unit_merge(
-                            A_filtered, C_new, [S_new], **param_first_merge
-                        )
-                        # >>> END PATCH
-                        logger.info(f"Units after first merge: {A_mrg.sizes['unit_id']}")
-                        A_mrg = save_minian(A_mrg.rename("A_mrg"), minian_data_path, overwrite=True)
-                        C_mrg = save_minian(C_mrg.rename("C_mrg"), minian_data_path, overwrite=True)
-                        S_mrg = save_minian(S_mrg.rename("S_mrg"), minian_data_path, overwrite=True)
-                        gc.collect()
-                        logger.info("Saved merged results to zarr for iteration 2")
-    
-                        # ========================================
-                        # CNMF ITERATION 2
-                        # ========================================
-    
-                        # ----- Second Spatial Update -----
-                        logger.info("CNMF Iteration 2: Spatial update...")
-                        param_second_spatial = params.get(
-                            "param_second_spatial",
-                            {"dl_wnd": 10, "sparse_penal": 0.01, "size_thres": (25, None)},
-                        )
-                        A_new2, mask2, norm_fac2 = update_spatial(
-                            Y_hw_chk, A_mrg, C_mrg, sn_spatial, **param_second_spatial
-                        )
-                        C_new2 = C_mrg.sel(unit_id=mask2) * norm_fac2
-                        logger.info(f"Units after second spatial update: {A_new2.sizes['unit_id']}")
-    
-                        # Update background after second spatial
-                        logger.info("Updating background...")
-                        b_new2, f_new2 = update_background(Y_fm_chk, A_new2, C_new2)
-    
-                        # ----- Second Temporal Update -----
-                        logger.info("CNMF Iteration 2: Temporal update...")
-                        param_second_temporal = params.get(
-                            "param_second_temporal",
-                            {
-                                "noise_freq": 0.06,
-                                "sparse_penal": 1,
-                                "p": 1,
-                                "add_lag": 20,
-                                "jac_thres": 0.4,
-                            },
-                        )
-                        C_final, S_final, b0_final, c0_final, g_final, mask_final = update_temporal(
-                            A_new2, C_new2,
-                            Y=Y_fm_chk,
-                            b=b_new2, f=f_new2,
-                            **param_second_temporal
-                        )
-                        C_final = sanitize_array(C_final, "C_final_post_temporal_2")
-                        S_final = sanitize_array(S_final, "S_final_post_temporal_2")
-    
-                        # <<< PATCHED: unit_merge coordinate fix (same pattern as iteration 1)
-                        # Sync A with C_final's actual coordinates
-                        A_final = A_new2.sel(unit_id=C_final.coords["unit_id"].values)
-                        # >>> END PATCH
-                        logger.info(f"Final units: {A_final.sizes['unit_id']}")
-    
-                        # ===== SAVE FINAL RESULTS =====
-                        logger.info("Saving final results to output directory...")
-                        final_save_params = {"dpath": str(output_dir), "overwrite": True}
-    
-                        A_final = save_minian(A_final.rename("A"), **final_save_params)
-                        C_final = save_minian(C_final.rename("C"), **final_save_params)
-                        S_final = save_minian(S_final.rename("S"), **final_save_params)
-                        b_final = save_minian(b_new2.rename("b"), **final_save_params)
-                        f_final = save_minian(f_new2.rename("f"), **final_save_params)
-                        motion_final = save_minian(motion.rename("motion"), **final_save_params)
-                        max_proj_final = save_minian(max_proj.rename("max_proj"), **final_save_params)
-    
-                        logger.info(
-                            f"Minian processing complete. {A_final.sizes['unit_id']} units detected."
-                        )
-    
-                    except Exception as e:
-                        logger.error(f"Minian processing failed: {e}")
-                        raise e
-                    finally:
-                        client.close()
-                        cluster.close()
-    
-                    # Load results and prepare for insertion
-                    minian_loader = MinianLoader(minian_data_path)
-                    key["processing_time"] = minian_loader.creation_time
-    
-                    # Get minian version if available
-                    try:
-                        import minian
-                        key["package_version"] = getattr(minian, "__version__", "")
-                    except (ImportError, AttributeError):
-                        key["package_version"] = ""
-    
-                    file_entries = [
-                        {
-                            **key,
-                            "file_name": f.name,
-                            "file": f.as_posix(),
-                        }
-                        for f in output_dir.rglob("*")
-                        if f.is_file()
-                    ]
+                file_entries = [
+                    {
+                        **key,
+                        "file_name": f.name,
+                        "file": f.as_posix(),
+                    }
+                    for f in output_dir.rglob("*")
+                    if f.is_file()
+                ]
 
         else:
             raise ValueError(f"Unknown task mode: {task_mode}")

From db39743f6802b5ce61fad5894da652e74a3676da Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Tue, 24 Feb 2026 10:09:58 -0500
Subject: [PATCH 34/36] use dockerized minian

---
 element_miniscope/miniscope.py | 361 ++++++++++++++++++++++++---------
 1 file changed, 264 insertions(+), 97 deletions(-)

diff --git a/element_miniscope/miniscope.py b/element_miniscope/miniscope.py
index 1c6e96e..cc2f90e 100644
--- a/element_miniscope/miniscope.py
+++ b/element_miniscope/miniscope.py
@@ -76,6 +76,17 @@ def activate(
         add_objects=_linking_module.__dict__,
     )
 
+# ─── Configuration ────────────────────────────────────────────────────────
+
+# Docker image for minian processing.
+# Override with env var MINIAN_DOCKER_IMAGE if using a custom registry.
+MINIAN_DOCKER_IMAGE = os.getenv(
+    "MINIAN_DOCKER_IMAGE", "datajoint/minian-base:0.1.0"
+)
+
+# Timeout for the docker run command (seconds). Default: 24 hours.
+# Large datasets (70k+ frames) can take many hours.
+MINIAN_DOCKER_TIMEOUT = int(os.getenv("MINIAN_DOCKER_TIMEOUT", "86400"))
 
 # Functions required by the element-miniscope  -----------------------------------------
 
@@ -781,6 +792,17 @@ def make_compute(
                 raise NotImplementedError(
                     f"Loading of {method} data is not yet supported"
                 )
+            file_entries = [
+                {
+                    **key,
+                    "file_name": f.relative_to(
+                        get_processed_root_data_dir()
+                    ).as_posix(),
+                    "file": f.as_posix(),
+                }
+                for f in output_dir.rglob("*")
+                if f.is_file()
+            ]
         elif task_mode == "trigger":
             avi_files = [
                 find_full_path(get_miniscope_root_data_dir(), avi_file).as_posix()
@@ -967,102 +989,25 @@ def _run_processing():
                     for f in output_dir.rglob("*")
                     if f.is_file()
                 ]
-
             elif method == "minian":
-                import docker as docker_sdk
-                import json as json_mod
-
-                docker_image = os.environ.get(
-                    "MINIAN_DOCKER_IMAGE", "datajoint/minian-py38:latest"
-                )
-                logger.info(f"Running minian via Docker container: {docker_image}")
-
-                # Resolve host-level paths for sibling container volume mounts
-                host_s3_root = os.environ["HOST_S3_ROOT"]
-                host_outbox = os.environ["HOST_OUTBOX"]
-                container_raw_root = os.environ.get(
-                    "RAW_ROOT_DATA_DIR", "/home/jovyan/s3/inbox"
-                )
-                container_processed_root = os.environ.get(
-                    "PROCESSED_ROOT_DATA_DIR", "/home/jovyan/efs/outbox"
-                )
+                logger.info("Running minian via Docker container...")
 
-                # Map container paths -> host paths for sibling container mounts
-                input_dir = str(pathlib.Path(avi_files[0]).parent)
-                input_dir_host = input_dir.replace(
-                    container_raw_root, host_s3_root + "/inbox", 1
-                )
-                output_dir_host = str(output_dir).replace(
-                    container_processed_root, host_outbox, 1
+                # Run the minian pipeline in a Docker container
+                status = _run_minian_in_container(
+                    avi_files=avi_files,
+                    output_dir=output_dir,
+                    params=params,
                 )
 
-                # Write config JSON to shared filesystem
-                n_workers = int(os.getenv("MINIAN_NWORKERS", 2))
-                memory_limit_env = os.getenv("MINIAN_MEMORY_LIMIT", "4")
-                memory_limit = (
-                    memory_limit_env
-                    if any(c.isalpha() for c in memory_limit_env)
-                    else f"{memory_limit_env}GB"
+                # Set processing time from container output
+                key["processing_time"] = status.get(
+                    "timestamp", time.strftime("%Y-%m-%dT%H:%M:%S")
                 )
-                container_mem_limit = os.getenv("MINIAN_CONTAINER_MEM_LIMIT", "24g")
-
-                config = {
-                    "input_dir": "/data/input",
-                    "output_dir": "/data/output",
-                    "intermediate_dir": "/data/output/minian_data",
-                    "params": params,
-                    "n_workers": n_workers,
-                    "memory_limit": memory_limit,
-                }
-                config_path = output_dir / "minian_config.json"
-                with open(config_path, "w") as f:
-                    json_mod.dump(config, f, indent=2, default=str)
-
-                # Spawn sibling container
-                docker_client = docker_sdk.from_env()
-                container_result = docker_client.containers.run(
-                    image=docker_image,
-                    command=[
-                        "python",
-                        "/opt/run_minian.py",
-                        "/data/output/minian_config.json",
-                    ],
-                    volumes={
-                        input_dir_host: {"bind": "/data/input", "mode": "ro"},
-                        output_dir_host: {"bind": "/data/output", "mode": "rw"},
-                    },
-                    mem_limit=container_mem_limit,
-                    environment={
-                        "MINIAN_NWORKERS": str(n_workers),
-                        "MINIAN_MEMORY_LIMIT": memory_limit_env,
-                        "MKL_NUM_THREADS": "1",
-                        "OPENBLAS_NUM_THREADS": "1",
-                        "OMP_NUM_THREADS": "1",
-                    },
-                    remove=True,
-                    detach=False,
-                    stdout=True,
-                    stderr=True,
-                )
-                logger.info(f"Container output:\n{container_result.decode()}")
-
-                # Verify completion marker
-                if (output_dir / ".minian_error").exists():
-                    with open(output_dir / ".minian_error") as f:
-                        err = json_mod.load(f)
-                    raise RuntimeError(
-                        f"Minian container error: {err.get('error')}"
-                    )
-                if not (output_dir / ".minian_complete").exists():
-                    raise RuntimeError(
-                        "Minian container exited without completion marker"
-                    )
 
-                # Load results
-                minian_loader = MinianLoader(str(output_dir))
-                key["processing_time"] = minian_loader.creation_time
-                key["package_version"] = "1.2.1-py38-docker"
+                # Get minian version (from status.json or default)
+                key["package_version"] = status.get("minian_version", "")
 
+                # Collect output files for insertion into DataJoint
                 file_entries = [
                     {
                         **key,
@@ -1072,9 +1017,8 @@ def _run_processing():
                     for f in output_dir.rglob("*")
                     if f.is_file()
                 ]
-
-        else:
-            raise ValueError(f"Unknown task mode: {task_mode}")
+            else:
+                raise ValueError(f"Unknown task mode: {task_mode}")
         return (file_entries, output_dir)
 
     def make_insert(self, key, file_entries, output_dir):
@@ -1087,9 +1031,14 @@ def make_insert(self, key, file_entries, output_dir):
                 ).as_posix(),
             }
         )
-        self.insert1(dict(**key, processing_time=datetime.now(timezone.utc)))
-        # for file in file_entries:
-        #     self.File.insert1(file, ignore_extra_fields=True)
+        self.insert1(
+            dict(
+                **key,
+                processing_time=key.get(
+                    "processing_time", datetime.now(timezone.utc)
+                ),
+            )
+        )
 
 
 # Motion Correction --------------------------------------------------------------------
@@ -1805,10 +1754,22 @@ def __init__(self, output_dir):
         Args:
             output_dir: Path to the directory containing Minian zarr outputs.
         """
-        from minian.utilities import open_minian
+        import xarray as xr
 
         self.output_dir = pathlib.Path(output_dir)
-        self._minian_ds = open_minian(str(self.output_dir))
+
+        # Load each zarr variable into a combined Dataset
+        # (replicates what minian's open_minian does without requiring
+        # the minian package to be installed)
+        ds = xr.Dataset()
+        for zarr_path in sorted(self.output_dir.glob("*.zarr")):
+            try:
+                var_ds = xr.open_zarr(str(zarr_path))
+                for name, da in var_ds.data_vars.items():
+                    ds[name] = da
+            except Exception:
+                logger.debug(f"Skipping non-zarr path: {zarr_path}")
+        self._minian_ds = ds
 
         # Load core arrays
         self._A = self._minian_ds.get(
@@ -1996,3 +1957,209 @@ def get_loader_result(key, table, full_output_dir=None) -> tuple:
         raise NotImplementedError("Unknown/unimplemented method: {}".format(method))
 
     return method, loaded_output
+
+
+def _run_minian_in_container(
+    avi_files: list,
+    output_dir: pathlib.Path,
+    params: dict,
+    docker_image: str = MINIAN_DOCKER_IMAGE,
+    timeout: int = MINIAN_DOCKER_TIMEOUT,
+):
+    """
+    Run the minian pipeline inside a Docker container.
+
+    This function:
+    1. Prepares a params.json file
+    2. Identifies the input directory containing the AVI files
+    3. Runs `docker run` with volume mounts for input, output, and params
+    4. Streams container logs to the pipeline logger
+    5. Checks the exit code and status.json for success/failure
+
+    Args:
+        avi_files:    List of paths to AVI video files (must all be in same directory)
+        output_dir:   Path where minian results will be saved
+        params:       Dictionary of minian pipeline parameters
+        docker_image: Docker image to use (default: datajoint/minian-base:0.1.0)
+        timeout:      Max seconds to wait for the container (default: 86400 = 24h)
+
+    Raises:
+        RuntimeError: If the container exits with non-zero code or status.json
+                      reports an error.
+        subprocess.TimeoutExpired: If the container exceeds the timeout.
+    """
+    import json
+    import subprocess
+    import tempfile
+
+    # ── Resolve input directory ──────────────────────────────────────────
+    # All AVI files should be in the same directory (or we use the parent
+    # of the first file). If files are in different directories, we'd need
+    # to copy them — but that's not typical for miniscope recordings.
+    input_dir = pathlib.Path(avi_files[0]).parent.resolve()
+
+    # Verify all files are in the same directory
+    for avi in avi_files:
+        if pathlib.Path(avi).parent.resolve() != input_dir:
+            raise ValueError(
+                f"All AVI files must be in the same directory. "
+                f"Found files in {input_dir} and {pathlib.Path(avi).parent}"
+            )
+
+    # ── Prepare params ───────────────────────────────────────────────────
+    # Write params to a temp directory that will be mounted into the container
+    params_dir = tempfile.mkdtemp(prefix="minian_params_")
+    params_file = os.path.join(params_dir, "params.json")
+
+    # Convert any tuple values to lists for JSON serialization
+    def _serialize_params(obj):
+        if isinstance(obj, tuple):
+            return list(obj)
+        if isinstance(obj, dict):
+            return {k: _serialize_params(v) for k, v in obj.items()}
+        if isinstance(obj, list):
+            return [_serialize_params(v) for v in obj]
+        return obj
+
+    serializable_params = _serialize_params(params)
+
+    with open(params_file, "w") as f:
+        json.dump(serializable_params, f, indent=2)
+
+    logger.info(f"Params written to {params_file}")
+
+    # ── Prepare output directory ─────────────────────────────────────────
+    output_dir = pathlib.Path(output_dir).resolve()
+    os.makedirs(output_dir, exist_ok=True)
+
+    # ── Build docker run command ─────────────────────────────────────────
+    #
+    # Volume mounts:
+    #   input_dir  -> /data/input  (read-only: we never modify source videos)
+    #   output_dir -> /data/output (read-write: results written here)
+    #   params_dir -> /data/params (read-only: params.json)
+    #
+    # We also pass through resource-related env vars if set.
+    docker_cmd = [
+        "docker",
+        "run",
+        "--rm",  # Auto-remove container after exit
+        # Volume mounts
+        "-v",
+        f"{input_dir}:/data/input:ro",
+        "-v",
+        f"{output_dir}:/data/output",
+        "-v",
+        f"{params_dir}:/data/params:ro",
+    ]
+
+    # Pass through resource configuration env vars
+    for env_var in ["MINIAN_NWORKERS", "MINIAN_MEMORY_LIMIT"]:
+        val = os.getenv(env_var)
+        if val:
+            docker_cmd.extend(["-e", f"{env_var}={val}"])
+
+    # Memory limit for the container itself (if set)
+    container_memory = os.getenv("MINIAN_CONTAINER_MEMORY")
+    if container_memory:
+        docker_cmd.extend(["--memory", container_memory])
+
+    # CPU limit (if set)
+    container_cpus = os.getenv("MINIAN_CONTAINER_CPUS")
+    if container_cpus:
+        docker_cmd.extend(["--cpus", container_cpus])
+
+    # SHM size — Dask workers need shared memory for inter-process comms
+    docker_cmd.extend(["--shm-size", os.getenv("MINIAN_SHM_SIZE", "8g")])
+
+    # The image
+    docker_cmd.append(docker_image)
+
+    logger.info(f"Docker command: {' '.join(docker_cmd)}")
+
+    # ── Pull image if not present ────────────────────────────────────────
+    try:
+        subprocess.run(
+            ["docker", "image", "inspect", docker_image],
+            capture_output=True,
+            check=True,
+        )
+        logger.info(f"Docker image {docker_image} found locally")
+    except subprocess.CalledProcessError:
+        logger.info(f"Pulling Docker image {docker_image}...")
+        subprocess.run(
+            ["docker", "pull", docker_image],
+            check=True,
+            timeout=600,  # 10 min pull timeout
+        )
+
+    # ── Run the container ────────────────────────────────────────────────
+    logger.info("Starting minian container...")
+    start_time = time.time()
+
+    process = subprocess.Popen(
+        docker_cmd,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        text=True,
+        bufsize=1,  # Line-buffered
+    )
+
+    # Stream container logs to the pipeline logger in real time
+    try:
+        for line in process.stdout:
+            line = line.rstrip()
+            if line:
+                logger.info(f"[minian-container] {line}")
+
+        process.wait(timeout=timeout)
+    except subprocess.TimeoutExpired:
+        logger.error(
+            f"Minian container timed out after {timeout}s. Killing..."
+        )
+        process.kill()
+        process.wait()
+        raise RuntimeError(
+            f"Minian container exceeded timeout of {timeout} seconds"
+        )
+
+    elapsed = time.time() - start_time
+    logger.info(
+        f"Container exited with code {process.returncode} "
+        f"after {elapsed:.1f}s ({elapsed/3600:.1f}h)"
+    )
+
+    # ── Check results ────────────────────────────────────────────────────
+    status_file = output_dir / "status.json"
+
+    if process.returncode != 0:
+        error_msg = f"Minian container exited with code {process.returncode}"
+        if status_file.exists():
+            with open(status_file) as f:
+                status = json.load(f)
+            error_msg += f": {status.get('message', 'unknown error')}"
+        raise RuntimeError(error_msg)
+
+    if not status_file.exists():
+        raise RuntimeError(
+            "Minian container exited successfully but no status.json found"
+        )
+
+    with open(status_file) as f:
+        status = json.load(f)
+
+    if status.get("status") != "success":
+        raise RuntimeError(
+            f"Minian container reported failure: {status.get('message', 'unknown')}"
+        )
+
+    logger.info(
+        f"Minian processing complete: {status.get('n_units', '?')} units detected"
+    )
+
+    # ── Cleanup temp params dir ──────────────────────────────────────────
+    import shutil
+
+    shutil.rmtree(params_dir, ignore_errors=True)
+
+    return status

From cc0440f396ea34b650c8fec981350e8d000e904d Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Tue, 10 Mar 2026 15:01:23 -0400
Subject: [PATCH 35/36] Update minian Docker integration with host path
 translation and Dask tuning

- Change default image to minian-py38:latest (conda-forge based)
- Refactor _run_minian_in_container: write params as unified config JSON,
  translate container paths to host paths via HOST_S3_ROOT/HOST_OUTBOX env
  vars, pass Dask timeout and memory allocator env vars to container
- Add test_minian_pipeline.py for end-to-end Minian integration testing

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 element_miniscope/miniscope.py    | 108 ++++----
 notebooks/test_minian_pipeline.py | 396 ++++++++++++++++++++++++++++++
 2 files changed, 449 insertions(+), 55 deletions(-)
 create mode 100644 notebooks/test_minian_pipeline.py

diff --git a/element_miniscope/miniscope.py b/element_miniscope/miniscope.py
index cc2f90e..777ad77 100644
--- a/element_miniscope/miniscope.py
+++ b/element_miniscope/miniscope.py
@@ -7,6 +7,7 @@
 import json
 import os
 import pathlib
+import time
 from datetime import datetime, timezone
 from typing import Union
 
@@ -81,7 +82,7 @@ def activate(
 # Docker image for minian processing.
 # Override with env var MINIAN_DOCKER_IMAGE if using a custom registry.
 MINIAN_DOCKER_IMAGE = os.getenv(
-    "MINIAN_DOCKER_IMAGE", "datajoint/minian-base:0.1.0"
+    "MINIAN_DOCKER_IMAGE", "datajoint/minian-py38:latest"
 )
 
 # Timeout for the docker run command (seconds). Default: 24 hours.
@@ -1966,39 +1967,12 @@ def _run_minian_in_container(
     docker_image: str = MINIAN_DOCKER_IMAGE,
     timeout: int = MINIAN_DOCKER_TIMEOUT,
 ):
-    """
-    Run the minian pipeline inside a Docker container.
-
-    This function:
-    1. Prepares a params.json file
-    2. Identifies the input directory containing the AVI files
-    3. Runs `docker run` with volume mounts for input, output, and params
-    4. Streams container logs to the pipeline logger
-    5. Checks the exit code and status.json for success/failure
-
-    Args:
-        avi_files:    List of paths to AVI video files (must all be in same directory)
-        output_dir:   Path where minian results will be saved
-        params:       Dictionary of minian pipeline parameters
-        docker_image: Docker image to use (default: datajoint/minian-base:0.1.0)
-        timeout:      Max seconds to wait for the container (default: 86400 = 24h)
-
-    Raises:
-        RuntimeError: If the container exits with non-zero code or status.json
-                      reports an error.
-        subprocess.TimeoutExpired: If the container exceeds the timeout.
-    """
     import json
     import subprocess
-    import tempfile
 
     # ── Resolve input directory ──────────────────────────────────────────
-    # All AVI files should be in the same directory (or we use the parent
-    # of the first file). If files are in different directories, we'd need
-    # to copy them — but that's not typical for miniscope recordings.
     input_dir = pathlib.Path(avi_files[0]).parent.resolve()
 
-    # Verify all files are in the same directory
     for avi in avi_files:
         if pathlib.Path(avi).parent.resolve() != input_dir:
             raise ValueError(
@@ -2006,12 +1980,15 @@ def _run_minian_in_container(
                 f"Found files in {input_dir} and {pathlib.Path(avi).parent}"
             )
 
-    # ── Prepare params ───────────────────────────────────────────────────
-    # Write params to a temp directory that will be mounted into the container
-    params_dir = tempfile.mkdtemp(prefix="minian_params_")
-    params_file = os.path.join(params_dir, "params.json")
+    # ── Prepare output directory ─────────────────────────────────────────
+    output_dir = pathlib.Path(output_dir).resolve()
+    os.makedirs(output_dir, exist_ok=True)
+
+    # ── Prepare params (under output_dir so it's on a shared volume) ─────
+    params_dir = output_dir / ".params"
+    os.makedirs(params_dir, exist_ok=True)
+    params_file = params_dir / "params.json"
 
-    # Convert any tuple values to lists for JSON serialization
     def _serialize_params(obj):
         if isinstance(obj, tuple):
             return list(obj)
@@ -2020,37 +1997,43 @@ def _serialize_params(obj):
         if isinstance(obj, list):
             return [_serialize_params(v) for v in obj]
         return obj
-
+    
     serializable_params = _serialize_params(params)
 
+    config = {
+        "input_dir": "/data/input",
+        "output_dir": "/data/output",
+        "params": serializable_params,
+    }
+
     with open(params_file, "w") as f:
-        json.dump(serializable_params, f, indent=2)
+        json.dump(config, f, indent=2)
 
     logger.info(f"Params written to {params_file}")
 
-    # ── Prepare output directory ─────────────────────────────────────────
-    output_dir = pathlib.Path(output_dir).resolve()
-    os.makedirs(output_dir, exist_ok=True)
+    # ── Translate container paths → host paths ───────────────────────────
+    # The host Docker daemon resolves bind-mount paths on the HOST
+    # filesystem, not inside this worker container. We use env vars
+    # set by docker-compose to map container paths to host paths.
+    host_s3_root = os.getenv("HOST_S3_ROOT")
+    host_outbox = os.getenv("HOST_OUTBOX")
+
+    def _to_host_path(container_path, container_prefix, host_prefix):
+        s = str(container_path)
+        if host_prefix and s.startswith(container_prefix):
+            return host_prefix + s[len(container_prefix):]
+        return s  # fallback: use as-is (works if running directly on host)
+
+    host_input_dir = _to_host_path(input_dir, "/home/jovyan/s3", host_s3_root)
+    host_output_dir = _to_host_path(output_dir, "/home/jovyan/efs/outbox", host_outbox)
+    host_params_dir = _to_host_path(params_dir, "/home/jovyan/efs/outbox", host_outbox)
 
     # ── Build docker run command ─────────────────────────────────────────
-    #
-    # Volume mounts:
-    #   input_dir  -> /data/input  (read-only: we never modify source videos)
-    #   output_dir -> /data/output (read-write: results written here)
-    #   params_dir -> /data/params (read-only: params.json)
-    #
-    # We also pass through resource-related env vars if set.
     docker_cmd = [
-        "docker",
-        "run",
-        "--rm",  # Auto-remove container after exit
-        # Volume mounts
-        "-v",
-        f"{input_dir}:/data/input:ro",
-        "-v",
-        f"{output_dir}:/data/output",
-        "-v",
-        f"{params_dir}:/data/params:ro",
+        "docker", "run", "--rm",
+        "-v", f"{host_input_dir}:/data/input:ro",
+        "-v", f"{host_output_dir}:/data/output",
+        "-v", f"{host_params_dir}:/data/params:ro",
     ]
 
     # Pass through resource configuration env vars
@@ -2059,6 +2042,20 @@ def _serialize_params(obj):
         if val:
             docker_cmd.extend(["-e", f"{env_var}={val}"])
 
+    # Dask timeout config (prevents worker kills during long computations)
+    dask_env = {
+        "DASK_DISTRIBUTED__SCHEDULER__WORKER_TTL": "3600s",
+        "DASK_DISTRIBUTED__COMM__TIMEOUTS__TCP": "7200s",
+        "DASK_DISTRIBUTED__COMM__TIMEOUTS__CONNECT": "300s",
+        "DASK_DISTRIBUTED__SCHEDULER__WORK_STEALING": "False",
+    }
+    for key, val in dask_env.items():
+        docker_cmd.extend(["-e", f"{key}={val}"])
+
+    # Memory allocator tuning (reduces heap fragmentation under heavy load)
+    docker_cmd.extend(["-e", "MALLOC_TRIM_THRESHOLD_=131072"])
+    docker_cmd.extend(["-e", "MALLOC_MMAP_THRESHOLD_=131072"])
+
     # Memory limit for the container itself (if set)
     container_memory = os.getenv("MINIAN_CONTAINER_MEMORY")
     if container_memory:
@@ -2074,6 +2071,7 @@ def _serialize_params(obj):
 
     # The image
     docker_cmd.append(docker_image)
+    docker_cmd.append("/data/params/params.json")
 
     logger.info(f"Docker command: {' '.join(docker_cmd)}")
 
diff --git a/notebooks/test_minian_pipeline.py b/notebooks/test_minian_pipeline.py
new file mode 100644
index 0000000..28d4e6f
--- /dev/null
+++ b/notebooks/test_minian_pipeline.py
@@ -0,0 +1,396 @@
+"""
+Test script for Minian integration in element-miniscope.
+
+This script sets up the database, populates required tables, and runs
+the Minian processing pipeline.
+
+Prerequisites:
+- DataJoint database connection configured (dj.config)
+- Minian package installed
+- Test miniscope video files available
+
+Usage:
+    python test_minian_pipeline.py --data-dir /path/to/miniscope/videos
+"""
+
+import os
+import sys
+import argparse
+import datajoint as dj
+import numpy as np
+from pathlib import Path
+from datetime import datetime
+
+# ============================================================================
+# Configuration - Modify these paths for your environment
+# ============================================================================
+
+# Root directory containing raw miniscope data
+# This should be the parent directory containing session folders with .avi files
+DEFAULT_DATA_DIR = "./miniscope_test/"
+
+# Root directory for processed outputs
+DEFAULT_PROCESSED_DIR = "./processed/"
+
+# Database schema prefix (schemas will be named: {prefix}_miniscope, etc.)
+SCHEMA_PREFIX = "test"
+
+
+# ============================================================================
+# Linking Module - Required functions for element-miniscope
+# ============================================================================
+
+def get_miniscope_root_data_dir():
+    """Return the root directory for raw miniscope data."""
+    return [os.environ.get("MINISCOPE_ROOT_DATA_DIR", DEFAULT_DATA_DIR)]
+
+
+def get_processed_root_data_dir():
+    """Return the root directory for processed data."""
+    processed_dir = os.environ.get("MINISCOPE_PROCESSED_DIR", DEFAULT_PROCESSED_DIR)
+    os.makedirs(processed_dir, exist_ok=True)
+    return processed_dir
+
+
+def get_session_directory(session_key):
+    """Return the session directory for a given session key.
+    
+    For testing, we assume the directory structure is:
+    {root_data_dir}/{subject}/{session_date}/
+    """
+    from element_miniscope import miniscope
+    
+    # For a simple test setup, use the session key directly
+    subject = session_key.get("subject", "test_subject")
+    session_datetime = session_key.get("session_datetime", datetime.now())
+    
+    if isinstance(session_datetime, datetime):
+        session_date = session_datetime.strftime("%Y-%m-%d")
+    else:
+        session_date = str(session_datetime).split()[0]
+    
+    return f"{subject}/{session_date}"
+
+
+# Create a module-like object for the linking module
+class LinkingModule:
+    get_miniscope_root_data_dir = staticmethod(get_miniscope_root_data_dir)
+    get_processed_root_data_dir = staticmethod(get_processed_root_data_dir)
+    get_session_directory = staticmethod(get_session_directory)
+
+
+# ============================================================================
+# Schema Setup
+# ============================================================================
+
+def setup_schemas():
+    """Activate the miniscope schemas."""
+    from element_miniscope import miniscope
+    from element_miniscope import miniscope_report
+    
+    # Create minimal upstream tables for testing
+    schema = dj.schema(f"{SCHEMA_PREFIX}_lab")
+    
+    @schema
+    class Subject(dj.Manual):
+        definition = """
+        subject: varchar(32)
+        """
+    
+    @schema
+    class Session(dj.Manual):
+        definition = """
+        -> Subject
+        session_datetime: datetime
+        """
+    
+    @schema
+    class Device(dj.Lookup):
+        definition = """
+        device: varchar(32)
+        """
+        contents = [("Miniscope_V4",)]
+    
+    @schema
+    class AnatomicalLocation(dj.Lookup):
+        definition = """
+        location: varchar(32)
+        """
+        contents = [("CA1",), ("mPFC",)]
+    
+    # Add required attributes to linking module
+    LinkingModule.Subject = Subject
+    LinkingModule.Session = Session
+    LinkingModule.Device = Device
+    LinkingModule.AnatomicalLocation = AnatomicalLocation
+    
+    # Activate miniscope schema
+    miniscope.activate(
+        f"{SCHEMA_PREFIX}_miniscope",
+        linking_module=LinkingModule,
+        create_schema=True,
+        create_tables=True,
+    )
+    
+    # Activate report schema
+    miniscope_report.activate(
+        f"{SCHEMA_PREFIX}_miniscope_report",
+        create_schema=True,
+        create_tables=True,
+    )
+    
+    print(f"Schemas activated: {SCHEMA_PREFIX}_lab, {SCHEMA_PREFIX}_miniscope, {SCHEMA_PREFIX}_miniscope_report")
+    
+    return Subject, Session, Device, miniscope
+
+
+def populate_test_data(Subject, Session, miniscope, data_dir):
+    """Populate the database with test data entries."""
+    
+    # Insert test subject
+    subject_key = {"subject": "test_mouse"}
+    Subject.insert1(subject_key, skip_duplicates=True)
+    print(f"Inserted subject: {subject_key}")
+    
+    # Insert test session
+    session_key = {
+        **subject_key,
+        "session_datetime": datetime.now().replace(microsecond=0),
+    }
+    Session.insert1(session_key, skip_duplicates=True)
+    print(f"Inserted session: {session_key}")
+    
+    # Insert recording
+    recording_key = {
+        **session_key,
+        "recording_id": 0,
+        "acq_software": "Miniscope-DAQ-V4",
+    }
+    miniscope.Recording.insert1(recording_key, skip_duplicates=True)
+    print(f"Inserted recording: {recording_key}")
+    
+    # Populate RecordingInfo (this reads metadata from the video files)
+    miniscope.RecordingInfo.populate(display_progress=True)
+    print("Populated RecordingInfo")
+    
+    return session_key, recording_key
+
+
+def create_minian_paramset(miniscope):
+    """Create a ProcessingParamSet for Minian analysis."""
+    
+    minian_params = {
+        # Video loading
+        "load_videos": {
+            "pattern": r".*\.avi$",
+            "dtype": "uint8",
+            "downsample": {"frame": 1, "height": 1, "width": 1},
+            "downsample_strategy": "subset",
+        },
+        # Preprocessing
+        "denoise": {"method": "median", "ksize": 7},
+        "background_removal": {"method": "tophat", "wnd": 10},
+        # Motion correction
+        "estimate_motion": {"dim": "frame"},
+        # Seeds initialization
+        "seeds_init": {
+            "wnd_size": 1000,
+            "method": "rolling",
+            "stp_size": 500,
+            "max_wnd": 15,
+            "diff_thres": 6.5,
+        },
+        "pnr_refine": {"noise_freq": 0.05, "thres": 1},
+        "ks_refine": {"sig": 0.05},
+        "seeds_merge": {"thres_dist": 10, "thres_corr": 0.8, "noise_freq": 0.06},
+        "initialize": {"thres_corr": 0.8, "wnd": 10, "noise_freq": 0.06},
+        "init_merge": {"thres_corr": 0.8},
+        # CNMF
+        "get_noise": {"noise_range": (0.06, 0.5)},
+        "first_spatial": {
+            "dl_wnd": 5,
+            "sparse_penal": 0.001,
+            "size_thres": (20, None),
+        },
+        "first_temporal": {
+            "noise_freq": 0.06,
+            "sparse_penal": 0.001,
+            "p": 1,
+            "add_lag": 10,
+            "jac_thres": 0.1,
+        },
+        "first_merge": {"thres_corr": 0.6},
+        "second_spatial": {
+            "dl_wnd": 10,
+            "sparse_penal": 0.001,
+            "size_thres": (20, None),
+        },
+        "second_temporal": {
+            "noise_freq": 0.06,
+            "sparse_penal": 0.01,
+            "p": 1,
+            "add_lag": 10,
+            "jac_thres": 0.2,
+            "zero_thres": 1e-10,
+        },
+    }
+    
+    paramset_idx = 0
+    
+    miniscope.ProcessingParamSet.insert_new_params(
+        processing_method="minian",
+        paramset_idx=paramset_idx,
+        paramset_desc="Default Minian parameters for miniscope analysis",
+        params=minian_params,
+    )
+    
+    print(f"Created Minian ProcessingParamSet with idx={paramset_idx}")
+    return paramset_idx
+
+
+def create_processing_task(miniscope, recording_key, paramset_idx):
+    """Create a ProcessingTask entry."""
+    
+    task_key = {
+        **recording_key,
+        "paramset_idx": paramset_idx,
+    }
+    
+    # Infer output directory
+    output_dir = miniscope.ProcessingTask.infer_output_dir(
+        task_key, relative=True, mkdir=True
+    )
+    
+    miniscope.ProcessingTask.insert1(
+        {
+            **task_key,
+            "processing_output_dir": str(output_dir),
+            "task_mode": "trigger",
+        },
+        skip_duplicates=True,
+    )
+    
+    print(f"Created ProcessingTask: {task_key}")
+    print(f"Output directory: {output_dir}")
+    
+    return task_key
+
+
+def run_processing(miniscope):
+    """Run the processing pipeline."""
+    print("\n" + "=" * 60)
+    print("Starting Minian Processing...")
+    print("=" * 60 + "\n")
+    
+    miniscope.Processing.populate(display_progress=True)
+    
+    print("\nProcessing complete!")
+    print(f"Entries in Processing table: {len(miniscope.Processing())}")
+
+
+def populate_downstream_tables(miniscope, miniscope_report):
+    """Populate downstream analysis tables."""
+    print("\n" + "=" * 60)
+    print("Populating downstream tables...")
+    print("=" * 60 + "\n")
+    
+    print("Populating MotionCorrection...")
+    miniscope.MotionCorrection.populate(display_progress=True)
+    
+    print("Populating Segmentation...")
+    miniscope.Segmentation.populate(display_progress=True)
+    
+    print("Populating Fluorescence...")
+    miniscope.Fluorescence.populate(display_progress=True)
+    
+    print("Populating Activity...")
+    miniscope.Activity.populate(display_progress=True)
+    
+    print("Populating visualizations...")
+    miniscope_report.MinianProcessingVisualization.populate(display_progress=True)
+    
+    print("\nDownstream tables populated!")
+
+
+def verify_results(miniscope):
+    """Print summary of results."""
+    print("\n" + "=" * 60)
+    print("Results Summary")
+    print("=" * 60 + "\n")
+    
+    print(f"Processing entries: {len(miniscope.Processing())}")
+    print(f"MotionCorrection entries: {len(miniscope.MotionCorrection())}")
+    print(f"Segmentation entries: {len(miniscope.Segmentation())}")
+    print(f"Segmentation.Mask entries: {len(miniscope.Segmentation.Mask())}")
+    print(f"Fluorescence entries: {len(miniscope.Fluorescence())}")
+    print(f"Fluorescence.Trace entries: {len(miniscope.Fluorescence.Trace())}")
+    print(f"Activity entries: {len(miniscope.Activity())}")
+    
+    # Show some mask statistics if available
+    if len(miniscope.Segmentation.Mask()) > 0:
+        masks = miniscope.Segmentation.Mask.fetch()
+        print(f"\nDetected {len(masks)} ROIs/cells")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Test Minian integration")
+    parser.add_argument(
+        "--data-dir",
+        type=str,
+        default=DEFAULT_DATA_DIR,
+        help="Path to directory containing miniscope video files",
+    )
+    parser.add_argument(
+        "--processed-dir",
+        type=str,
+        default=DEFAULT_PROCESSED_DIR,
+        help="Path to directory for processed outputs",
+    )
+    parser.add_argument(
+        "--skip-processing",
+        action="store_true",
+        help="Skip processing and just verify existing results",
+    )
+    args = parser.parse_args()
+    
+    # Set environment variables
+    os.environ["MINISCOPE_ROOT_DATA_DIR"] = os.path.abspath(args.data_dir)
+    os.environ["MINISCOPE_PROCESSED_DIR"] = os.path.abspath(args.processed_dir)
+    
+    print(f"Data directory: {os.environ['MINISCOPE_ROOT_DATA_DIR']}")
+    print(f"Processed directory: {os.environ['MINISCOPE_PROCESSED_DIR']}")
+    
+    # Import after setting paths
+    from element_miniscope import miniscope_report
+    
+    # Setup schemas
+    Subject, Session, Device, miniscope = setup_schemas()
+    
+    if not args.skip_processing:
+        # Populate test data
+        session_key, recording_key = populate_test_data(
+            Subject, Session, miniscope, args.data_dir
+        )
+        
+        # Create parameter set
+        paramset_idx = create_minian_paramset(miniscope)
+        
+        # Create processing task
+        task_key = create_processing_task(miniscope, recording_key, paramset_idx)
+        
+        # Run processing
+        run_processing(miniscope)
+        
+        # Populate downstream tables
+        populate_downstream_tables(miniscope, miniscope_report)
+    
+    # Verify results
+    verify_results(miniscope)
+    
+    print("\n" + "=" * 60)
+    print("Test complete!")
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    main()

From ac6d5dac9acab42e954a2518827035f9134c9ba7 Mon Sep 17 00:00:00 2001
From: Kushal Bakshi <kushalbakshi@gmail.com>
Date: Wed, 18 Mar 2026 13:48:41 -0400
Subject: [PATCH 36/36] Fix MinianLoader summary images and container env var
 passthrough

MinianLoader:
- Load ref_image and mean_proj zarrs directly instead of computing
  from full varr_ref video (which was never in the final output)
- Remove dead varr_ref fallback from max_proj_image

_run_minian_in_container:
- Pass through Dask env vars from environment instead of hardcoding
- Include DASK_DISTRIBUTED__WORKER__MEMORY__* vars
- Fix MINIAN_CONTAINER_MEMORY -> MINIAN_CONTAINER_MEM_LIMIT to match
  docker-compose naming
- Remove unused /data/params/params.json CLI argument

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 element_miniscope/miniscope.py | 46 +++++++++++++++++-----------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/element_miniscope/miniscope.py b/element_miniscope/miniscope.py
index 777ad77..97f16ce 100644
--- a/element_miniscope/miniscope.py
+++ b/element_miniscope/miniscope.py
@@ -1788,9 +1788,10 @@ def __init__(self, output_dir):
         # Try to load motion correction data
         self._motion = self._minian_ds.get("motion")
 
-        # Try to load reference/max projection images
+        # Summary images
         self._max_proj = self._minian_ds.get("max_proj")
-        self._varr_ref = self._minian_ds.get("varr_ref")
+        self._mean_proj = self._minian_ds.get("mean_proj")
+        self._ref_image = self._minian_ds.get("ref_image")
 
     @property
     def minian_dataset(self):
@@ -1845,16 +1846,15 @@ def extract_rigid_mc(self):
     @property
     def ref_image(self):
         """Return reference image used for motion correction."""
-        if self._varr_ref is not None:
-            # Take mean across frames for reference
-            return self._varr_ref.mean(dim="frame").compute().values[np.newaxis, :, :]
+        if self._ref_image is not None:
+            return self._ref_image.compute().values[np.newaxis, :, :]
         return None
 
     @property
     def mean_image(self):
         """Return mean image (average across frames)."""
-        if self._varr_ref is not None:
-            return self._varr_ref.mean(dim="frame").compute().values[np.newaxis, :, :]
+        if self._mean_proj is not None:
+            return self._mean_proj.compute().values[np.newaxis, :, :]
         return None
 
     @property
@@ -1862,8 +1862,6 @@ def max_proj_image(self):
         """Return maximum projection image."""
         if self._max_proj is not None:
             return self._max_proj.compute().values[np.newaxis, :, :]
-        elif self._varr_ref is not None:
-            return self._varr_ref.max(dim="frame").compute().values[np.newaxis, :, :]
         return None
 
     @property
@@ -2036,28 +2034,31 @@ def _to_host_path(container_path, container_prefix, host_prefix):
         "-v", f"{host_params_dir}:/data/params:ro",
     ]
 
-    # Pass through resource configuration env vars
-    for env_var in ["MINIAN_NWORKERS", "MINIAN_MEMORY_LIMIT"]:
+    # Pass through resource configuration and Dask env vars
+    _passthrough_env_vars = [
+        "MINIAN_NWORKERS",
+        "MINIAN_MEMORY_LIMIT",
+        # Dask distributed config (set via docker-compose with defaults)
+        "DASK_DISTRIBUTED__SCHEDULER__WORKER_TTL",
+        "DASK_DISTRIBUTED__COMM__TIMEOUTS__TCP",
+        "DASK_DISTRIBUTED__COMM__TIMEOUTS__CONNECT",
+        "DASK_DISTRIBUTED__SCHEDULER__WORK_STEALING",
+        "DASK_DISTRIBUTED__WORKER__MEMORY__TARGET",
+        "DASK_DISTRIBUTED__WORKER__MEMORY__SPILL",
+        "DASK_DISTRIBUTED__WORKER__MEMORY__PAUSE",
+        "DASK_DISTRIBUTED__WORKER__MEMORY__TERMINATE",
+    ]
+    for env_var in _passthrough_env_vars:
         val = os.getenv(env_var)
         if val:
             docker_cmd.extend(["-e", f"{env_var}={val}"])
 
-    # Dask timeout config (prevents worker kills during long computations)
-    dask_env = {
-        "DASK_DISTRIBUTED__SCHEDULER__WORKER_TTL": "3600s",
-        "DASK_DISTRIBUTED__COMM__TIMEOUTS__TCP": "7200s",
-        "DASK_DISTRIBUTED__COMM__TIMEOUTS__CONNECT": "300s",
-        "DASK_DISTRIBUTED__SCHEDULER__WORK_STEALING": "False",
-    }
-    for key, val in dask_env.items():
-        docker_cmd.extend(["-e", f"{key}={val}"])
-
     # Memory allocator tuning (reduces heap fragmentation under heavy load)
     docker_cmd.extend(["-e", "MALLOC_TRIM_THRESHOLD_=131072"])
     docker_cmd.extend(["-e", "MALLOC_MMAP_THRESHOLD_=131072"])
 
     # Memory limit for the container itself (if set)
-    container_memory = os.getenv("MINIAN_CONTAINER_MEMORY")
+    container_memory = os.getenv("MINIAN_CONTAINER_MEM_LIMIT")
     if container_memory:
         docker_cmd.extend(["--memory", container_memory])
 
@@ -2071,7 +2072,6 @@ def _to_host_path(container_path, container_prefix, host_prefix):
 
     # The image
     docker_cmd.append(docker_image)
-    docker_cmd.append("/data/params/params.json")
 
     logger.info(f"Docker command: {' '.join(docker_cmd)}")