@@ -89,7 +89,6 @@ class DwtFlowHelper : public SchedulerStandard
8989 imageComponentFlow_[compno] = new ImageComponentFlow (numRes);
9090 if (regionDecompress)
9191 imageComponentFlow_[compno]->setRegionDecompression ();
92- // addTo must be called before graph to initialize composition tasks
9392 imageComponentFlow_[compno]->addTo (*this );
9493 SchedulerStandard::graph (compno);
9594 }
@@ -116,11 +115,17 @@ bool SchedulerFreebyrd::decompressTile(ITileProcessor* tileProcessor)
116115{
117116 success_ = true ;
118117
118+ #ifdef GRK_USE_SCX_SCHEDULING
119+ // SCX path: sequential T1 → DWT (Phase 3)
119120 if (!decodeBlocks (tileProcessor))
120121 return false ;
121-
122122 if (!runDWT (tileProcessor))
123123 return false ;
124+ #else
125+ // Interleaved T1+DWT: per-component DAG with T1 and DWT tasks (Phase 4)
126+ if (!decodeAndTransform (tileProcessor))
127+ return false ;
128+ #endif
124129
125130 if (!postProcess (tileProcessor))
126131 return false ;
@@ -377,6 +382,169 @@ bool SchedulerFreebyrd::runDWT(ITileProcessor* tileProcessor)
377382 return true ;
378383}
379384
385+ #ifndef GRK_USE_SCX_SCHEDULING
386+ bool SchedulerFreebyrd::decodeAndTransform (ITileProcessor* tileProcessor)
387+ {
388+ auto tcp = tileProcessor->getTCP ();
389+ bool cacheAll =
390+ (tileProcessor->getTileCacheStrategy () & GRK_TILE_CACHE_ALL) == GRK_TILE_CACHE_ALL;
391+ uint32_t num_threads = (uint32_t )TFSingleton::num_threads ();
392+ bool finalLayer = tcp->layersToDecompress_ == tcp->numLayers_ ;
393+
394+ for (uint16_t compno = 0 ; compno < numcomps_; ++compno)
395+ {
396+ if (!tileProcessor->shouldDecodeComponent (compno))
397+ continue ;
398+
399+ auto tccp = tcp->tccps_ + compno;
400+ uint16_t cbw = tccp->cblkw_expn_ ? (uint16_t )(1 << tccp->cblkw_expn_ ) : 0U ;
401+ uint16_t cbh = tccp->cblkh_expn_ ? (uint16_t )(1 << tccp->cblkh_expn_ ) : 0U ;
402+ auto activePool = &coderPool_;
403+ if (streamPool_ && streamPool_->contains (tccp->cblkw_expn_ , tccp->cblkh_expn_ ))
404+ activePool = streamPool_;
405+
406+ if (!cacheAll)
407+ {
408+ activePool->makeCoders (
409+ num_threads, tccp->cblkw_expn_ , tccp->cblkh_expn_ ,
410+ [tcp, cbw, cbh, tileProcessor]() -> std::shared_ptr<t1::ICoder> {
411+ return std::shared_ptr<t1::ICoder>(t1::CoderFactory::makeCoder (
412+ tcp->isHT (), false , cbw, cbh, tileProcessor->getTileCacheStrategy ()));
413+ });
414+ }
415+
416+ auto tilec = tileProcessor->getTile ()->comps_ + compno;
417+ auto wholeTileDecoding = tilec->isWholeTileDecoding ();
418+ uint8_t resBegin =
419+ cacheAll ? (uint8_t )tilec->currentPacketProgressionState_ .numResolutionsRead () : 0 ;
420+ uint8_t resUpperBound = tilec->nextPacketProgressionState_ .numResolutionsRead ();
421+ uint8_t numRes = tilec->nextPacketProgressionState_ .numResolutionsRead ();
422+
423+ if (numRes == 0 )
424+ continue ;
425+
426+ // 1. Set up ImageComponentFlow for this component
427+ dwtHelper_->release ();
428+ dwtHelper_->clear ();
429+ dwtHelper_->setupComponentFlow (compno, numRes, !wholeTileDecoding);
430+
431+ // 2. Collect blocks per resolution and schedule T1 decode into flow
432+ // Combine first two resolution levels (0+1) into a single flow slot (like standard scheduler)
433+ uint8_t flowResIdx = 0 ;
434+ for (uint8_t resno = resBegin; resno < resUpperBound; ++resno)
435+ {
436+ auto res = tilec->resolutions_ + resno;
437+ for (uint8_t bandIndex = 0 ; bandIndex < res->numBands_ ; ++bandIndex)
438+ {
439+ auto band = res->band + bandIndex;
440+ auto paddedBandWindow = tilec->getBandWindowPadded (resno, band->orientation_ );
441+ for (auto precinct : band->precincts_ )
442+ {
443+ if (!wholeTileDecoding && !paddedBandWindow->nonEmptyIntersection (precinct))
444+ continue ;
445+ for (uint32_t cblkno = 0 ; cblkno < precinct->getNumCblks (); ++cblkno)
446+ {
447+ auto cblkBounds = precinct->getCodeBlockBounds (cblkno);
448+ if (!wholeTileDecoding && !paddedBandWindow->nonEmptyIntersection (&cblkBounds))
449+ continue ;
450+
451+ auto cblk = precinct->getDecompressBlock (cblkno);
452+ auto block = std::make_shared<t1::DecompressBlockExec>(cacheAll);
453+ block->x = cblk->x0 ();
454+ block->y = cblk->y0 ();
455+ block->postProcessor_ =
456+ tcp->isHT () ? t1::DecompressBlockPostProcessor<int32_t >(
457+ [tilec](int32_t * srcData, t1::DecompressBlockExec* blk,
458+ uint16_t stride) {
459+ tilec->postProcessBlockHT (srcData, blk, stride);
460+ })
461+ : t1::DecompressBlockPostProcessor<int32_t >(
462+ [tilec](int32_t * srcData, t1::DecompressBlockExec* blk,
463+ [[maybe_unused]] uint16_t stride) {
464+ tilec->postProcessBlock (srcData, blk);
465+ });
466+ block->bandIndex = bandIndex;
467+ block->bandNumbps = band->maxBitPlanes_ ;
468+ block->bandOrientation = band->orientation_ ;
469+ block->cblk = cblk;
470+ block->cblk_sty = tccp->cblkStyle_ ;
471+ block->qmfbid = tccp->qmfbid_ ;
472+ block->resno = resno;
473+ block->roishift = tccp->roishift_ ;
474+ block->stepsize = band->stepsize_ ;
475+ block->k_msbs = (uint8_t )(band->maxBitPlanes_ - cblk->numbps ());
476+ block->R_b = prec_ + gain_b[band->orientation_ ];
477+ block->finalLayer_ = finalLayer;
478+
479+ // Schedule T1 decode into the flow
480+ auto imageComponentFlow = dwtHelper_->getImageComponentFlow (compno);
481+ auto resFlow = imageComponentFlow->getResflow (flowResIdx);
482+ resFlow->blocks_ ->nextTask ().work (
483+ [this , activePool, block, tccp, cbw, cbh, cacheAll, tileProcessor]() {
484+ if (!success_)
485+ {
486+ block.reset ();
487+ return ;
488+ }
489+ t1::ICoder* coder = nullptr ;
490+ if (block->needsCachedCoder ())
491+ {
492+ coder = t1::CoderFactory::makeCoder (tileProcessor->getTCP ()->isHT (), false , cbw,
493+ cbh, tileProcessor->getTileCacheStrategy ());
494+ }
495+ else if (!cacheAll)
496+ {
497+ auto threadnum = TFSingleton::get ().this_worker_id ();
498+ coder =
499+ activePool->getCoder ((size_t )threadnum, tccp->cblkw_expn_ , tccp->cblkh_expn_ )
500+ .get ();
501+ }
502+ try
503+ {
504+ if (!block->open (coder))
505+ success_ = false ;
506+ }
507+ catch (const std::runtime_error& rerr)
508+ {
509+ grklog.error (rerr.what ());
510+ success_ = false ;
511+ }
512+ });
513+ }
514+ }
515+ }
516+ // Combine res 0 with res 1 into the same flow slot (same as standard scheduler)
517+ if (resno == 0 && resUpperBound > 1 )
518+ continue ;
519+ flowResIdx++;
520+ }
521+ tilec->currentPacketProgressionState_ = tilec->nextPacketProgressionState_ ;
522+
523+ if (!success_)
524+ return false ;
525+
526+ // 3. Schedule DWT (if more than 1 resolution)
527+ if (numRes > 1 )
528+ {
529+ auto maxDim = std::max (tileProcessor->getCodingParams ()->t_width_ ,
530+ tileProcessor->getCodingParams ()->t_height_ );
531+
532+ WaveletReverse wavelet (dwtHelper_, tilec, compno, tilec->windowUnreducedBounds (), numRes,
533+ tccp->qmfbid_ , maxDim, tcp->wholeTileDecompress_ , waveletPoolData_);
534+ if (!wavelet.decompress ())
535+ return false ;
536+ }
537+
538+ // 4. Run the complete T1+DWT flow
539+ TFSingleton::get ().run (*dwtHelper_).wait ();
540+ dwtHelper_->release ();
541+ dwtHelper_->clear ();
542+ }
543+
544+ return success_;
545+ }
546+ #endif
547+
380548bool SchedulerFreebyrd::runCascadeDWT97 ([[maybe_unused]] ITileProcessor* tileProcessor,
381549 [[maybe_unused]] uint16_t compno)
382550{
0 commit comments