From 8c2401a4672144ebf76b3ae1ca38011dde9fc34b Mon Sep 17 00:00:00 2001 From: Andrii Ryzhkov Date: Tue, 14 Apr 2026 16:40:26 +0200 Subject: [PATCH 1/2] Wait for encode thread before decoding to avoid race with warmup --- src/develop/masks/object.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/develop/masks/object.c b/src/develop/masks/object.c index 8a55a2f88d77..be04062c8a33 100644 --- a/src/develop/masks/object.c +++ b/src/develop/masks/object.c @@ -425,12 +425,9 @@ static gpointer _encode_thread_func(gpointer data) dt_seg_disk_cache_save(d->seg, imgid, distort_hash, rgb, out_w, out_h); - // signal ready immediately so the user can start placing points, - // the warmup below continues on this background thread - if the user - // clicks before it finishes, ORT serializes concurrent Run() calls on - // the same session, so the decode simply waits for the warmup to - // complete first - in practice, users need a moment to position their - // cursor, so the ~1 s warmup usually finishes before the first click + // signal ready so the user can start placing points; warmup continues + // on this thread; _run_decoder joins the thread on the first click to + // avoid a race with warmup on the shared segmentation context g_atomic_int_set(&d->encode_state, ok ? ENCODE_READY : ENCODE_ERROR); // warm up decoder with real encoder embeddings so the first user click @@ -690,6 +687,13 @@ static void _run_decoder(dt_masks_form_gui_t *gui) if(gui->guipoints_count <= 0) return; + // wait for encode thread: warmup may still be running after ENCODE_READY + if(d->encode_thread) + { + g_thread_join(d->encode_thread); + d->encode_thread = NULL; + } + dt_gui_cursor_set_busy(); const float *gp = dt_masks_dynbuf_buffer(gui->guipoints); From 6382e43a6be03a071684168a523d6f52f295d2be Mon Sep 17 00:00:00 2001 From: Andrii Ryzhkov Date: Tue, 14 Apr 2026 17:42:28 +0200 Subject: [PATCH 2/2] Keep decoder output shape arrays alive through dt_ai_run --- src/common/ai/segmentation.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/common/ai/segmentation.c b/src/common/ai/segmentation.c index 115c049c914d..946d26749d4c 100644 --- a/src/common/ai/segmentation.c +++ b/src/common/ai/segmentation.c @@ -570,6 +570,9 @@ void dt_seg_warmup_decoder(dt_seg_context_t *ctx) .shape = has_mask_shape, .ndim = 1}; int64_t masks_shape[4] = {1, nm, dec_h, dec_w}; + // shapes must outlive outputs[] used by dt_ai_run below + int64_t iou_shape[2] = {1, nm}; + int64_t lr_shape[4] = {1, nm, pm_dim, pm_dim}; float iou_buf[MAX_NUM_MASKS]; dt_ai_tensor_t outputs[3]; @@ -577,8 +580,6 @@ void dt_seg_warmup_decoder(dt_seg_context_t *ctx) if(is_sam) { - int64_t iou_shape[2] = {1, nm}; - int64_t lr_shape[4] = {1, nm, pm_dim, pm_dim}; const int dec_outputs = dt_ai_get_output_count(ctx->decoder); outputs[0] = (dt_ai_tensor_t){ @@ -847,6 +848,9 @@ float *dt_seg_compute_mask(dt_seg_context_t *ctx, dt_ai_tensor_t dec_outputs[3]; int n_dec_out; int64_t masks_shape[4] = {1, nm, dec_h, dec_w}; + // shapes must outlive dec_outputs[] used by dt_ai_run below + int64_t iou_shape[2] = {1, nm}; + int64_t low_res_shape[4] = {1, nm, pm_dim, pm_dim}; float iou_pred[MAX_NUM_MASKS]; float *low_res = NULL; @@ -854,7 +858,6 @@ float *dt_seg_compute_mask(dt_seg_context_t *ctx, if(is_sam) { // SAM: masks [1,N,H,W] + iou [1,N], optionally low_res [1,N,pm,pm] - int64_t iou_shape[2] = {1, nm}; const int dec_out_count = dt_ai_get_output_count(ctx->decoder); dec_outputs[0] = (dt_ai_tensor_t){ @@ -875,7 +878,6 @@ float *dt_seg_compute_mask(dt_seg_context_t *ctx, g_free(masks); return NULL; } - int64_t low_res_shape[4] = {1, nm, pm_dim, pm_dim}; dec_outputs[2] = (dt_ai_tensor_t){ .data = low_res, .type = DT_AI_FLOAT, .shape = low_res_shape, .ndim = 4}; n_dec_out = 3;