@@ -148,6 +148,12 @@ class _Source:
148148 # ComplexSource extras
149149 scale : float | None = None
150150 offset : float | None = None
151+ # GDAL ``<ComplexSource><ResampleAlg>`` values are ``NearestNeighbour``
152+ # (default), ``Bilinear``, ``Cubic``, ``CubicSpline``, ``Lanczos``,
153+ # ``Average``, ``Mode``. We parse the value so we can advertise it,
154+ # but only nearest-neighbour is implemented in the placement step
155+ # (issue #1694). Higher-quality resamplers are tracked for follow-up.
156+ resample_alg : str | None = None
151157
152158
153159@dataclass
@@ -330,6 +336,7 @@ def parse_vrt(xml_str: str, vrt_dir: str = '.') -> VRTDataset:
330336 # ComplexSource extras
331337 scale = None
332338 offset = None
339+ resample_alg = None
333340 if tag == 'ComplexSource' :
334341 scale_str = _text (src_elem , 'ScaleOffset' )
335342 offset_str = _text (src_elem , 'ScaleRatio' )
@@ -338,6 +345,11 @@ def parse_vrt(xml_str: str, vrt_dir: str = '.') -> VRTDataset:
338345 scale = float (offset_str )
339346 if scale_str :
340347 offset = float (scale_str )
348+ # ``<ResampleAlg>`` is informational for the placement
349+ # step: read_vrt currently always nearest-neighbours, so
350+ # we only record the value for later honouring. See
351+ # issue #1694.
352+ resample_alg = _text (src_elem , 'ResampleAlg' )
341353
342354 sources .append (_Source (
343355 filename = filename ,
@@ -347,6 +359,7 @@ def parse_vrt(xml_str: str, vrt_dir: str = '.') -> VRTDataset:
347359 nodata = src_nodata ,
348360 scale = scale ,
349361 offset = offset ,
362+ resample_alg = resample_alg ,
350363 ))
351364
352365 bands .append (_VRTBand (
@@ -367,6 +380,69 @@ def parse_vrt(xml_str: str, vrt_dir: str = '.') -> VRTDataset:
367380 )
368381
369382
383+ def _resample_nearest (src_arr : np .ndarray ,
384+ out_h : int , out_w : int ) -> np .ndarray :
385+ """Nearest-neighbour resample ``src_arr`` to ``(out_h, out_w)``.
386+
387+ Mirrors GDAL's ``SimpleSource`` semantics: each output pixel samples
388+ the source pixel whose centre is closest in the source grid. The
389+ centre-of-pixel mapping is ``src_idx = (out_idx + 0.5) * src_size /
390+ out_size``; ``floor`` of that, clamped into range, gives the index.
391+
392+ Used by :func:`read_vrt` to honour ``<SrcRect>``/``<DstRect>``
393+ scaling. See issue #1694 -- before this helper, a source array was
394+ pasted directly into the destination with no resample step, which
395+ raised a broadcast error for downsampled sources and left holes for
396+ upsampled ones.
397+
398+ Integer-ratio cases short-circuit to ``np.repeat`` (integer
399+ upsample) or strided slicing (integer downsample) to avoid building
400+ an index array for the common GDAL ``SrcRect`` shapes.
401+ """
402+ src_h , src_w = src_arr .shape [:2 ]
403+ # Reject an empty source up front: a SimpleSource with SrcRect
404+ # ``xSize=0`` / ``ySize=0`` (or a windowed read that clamps to an
405+ # empty slice) would otherwise reach the integer-ratio fast paths
406+ # below and divide / modulo by zero on ``src_h`` or ``src_w``. Raise
407+ # an explicit ValueError so the bad VRT surfaces with a clear cause
408+ # instead of an opaque ZeroDivisionError.
409+ if src_h == 0 or src_w == 0 :
410+ raise ValueError (
411+ "_resample_nearest received an empty source array; "
412+ "the VRT SourceFilename probably has SrcRect with zero size"
413+ )
414+ if src_h == out_h and src_w == out_w :
415+ return src_arr
416+ # Fast paths for integer ratios -- common when a VRT downsamples by
417+ # 2x / 4x or upsamples by an integer factor. ``np.repeat`` is a
418+ # cheaper nearest-neighbour upsample than the gather below.
419+ if (out_h % src_h == 0 and out_w % src_w == 0
420+ and out_h >= src_h and out_w >= src_w ):
421+ ry = out_h // src_h
422+ rx = out_w // src_w
423+ return np .repeat (np .repeat (src_arr , ry , axis = 0 ), rx , axis = 1 )
424+ if (src_h % out_h == 0 and src_w % out_w == 0
425+ and src_h >= out_h and src_w >= out_w ):
426+ sy = src_h // out_h
427+ sx = src_w // out_w
428+ # Sample the centre of each output pixel's source block (offset
429+ # by half a block, floored) so the integer-ratio downsample
430+ # agrees with the general non-integer path for the same shapes.
431+ return src_arr [sy // 2 ::sy , sx // 2 ::sx ][:out_h , :out_w ]
432+ # General case: build per-axis nearest indices and gather. ``floor``
433+ # plus a clamp matches GDAL's nearest-neighbour rounding for
434+ # non-integer ratios.
435+ y_idx = np .minimum (
436+ np .floor ((np .arange (out_h ) + 0.5 ) * src_h / out_h ).astype (np .intp ),
437+ src_h - 1 ,
438+ )
439+ x_idx = np .minimum (
440+ np .floor ((np .arange (out_w ) + 0.5 ) * src_w / out_w ).astype (np .intp ),
441+ src_w - 1 ,
442+ )
443+ return src_arr [y_idx [:, None ], x_idx [None , :]]
444+
445+
370446def read_vrt (vrt_path : str , * , window = None ,
371447 band : int | None = None ,
372448 max_pixels : int | None = None ) -> tuple [np .ndarray , VRTDataset ]:
@@ -479,15 +555,38 @@ def read_vrt(vrt_path: str, *, window=None,
479555 if clip_r0 >= clip_r1 or clip_c0 >= clip_c1 :
480556 continue # no overlap
481557
482- # Map back to source coordinates
483- # Scale factor: source pixels per destination pixel
484- scale_y = sr .y_size / dr .y_size if dr .y_size > 0 else 1.0
485- scale_x = sr .x_size / dr .x_size if dr .x_size > 0 else 1.0
486-
487- src_r0 = sr .y_off + int ((clip_r0 - dst_r0 ) * scale_y )
488- src_c0 = sr .x_off + int ((clip_c0 - dst_c0 ) * scale_x )
489- src_r1 = sr .y_off + int ((clip_r1 - dst_r0 ) * scale_y )
490- src_c1 = sr .x_off + int ((clip_c1 - dst_c0 ) * scale_x )
558+ # SrcRect / DstRect scaling. When sizes match (the common
559+ # SimpleSource case GDAL writes for tile mosaics), the source
560+ # rect maps 1:1 to the destination rect and we can issue a
561+ # windowed read for only the overlap region -- cheaper than
562+ # decoding the whole source rect. When the sizes differ the
563+ # source band is being upsampled or downsampled into its
564+ # destination cell, so we read the full source rect, apply
565+ # nodata masking, resample to ``(dr.y_size, dr.x_size)`` with
566+ # nearest-neighbour (matching GDAL's SimpleSource semantics),
567+ # and *then* take the clip subwindow. Reading the whole
568+ # source rect first keeps the resample math local to the
569+ # source/destination shapes; trying to resample a windowed
570+ # source slice independently introduces fence-post errors at
571+ # the clip boundary and breaks the integer-ratio fast paths.
572+ # See issue #1694.
573+ needs_resample = (sr .y_size != dr .y_size
574+ or sr .x_size != dr .x_size )
575+ if needs_resample :
576+ # TODO(#1704): when the caller passes a small ``window=``
577+ # this still reads the full SrcRect. Computing the
578+ # inverse of the nearest-neighbour mapping to read only
579+ # the SrcRect subset that feeds ``window`` would cut the
580+ # decode/memory cost for large SrcRects.
581+ read_r0 = sr .y_off
582+ read_c0 = sr .x_off
583+ read_r1 = sr .y_off + sr .y_size
584+ read_c1 = sr .x_off + sr .x_size
585+ else :
586+ read_r0 = sr .y_off + (clip_r0 - dst_r0 )
587+ read_c0 = sr .x_off + (clip_c0 - dst_c0 )
588+ read_r1 = sr .y_off + (clip_r1 - dst_r0 )
589+ read_c1 = sr .x_off + (clip_c1 - dst_c0 )
491590
492591 # Read from source file using windowed read.
493592 #
@@ -508,7 +607,7 @@ def read_vrt(vrt_path: str, *, window=None,
508607 try :
509608 src_arr , _ = read_to_array (
510609 src .filename ,
511- window = (src_r0 , src_c0 , src_r1 , src_c1 ),
610+ window = (read_r0 , read_c0 , read_r1 , read_c1 ),
512611 band = src .band - 1 , # convert 1-based to 0-based
513612 )
514613 except (
@@ -541,6 +640,16 @@ def read_vrt(vrt_path: str, *, window=None,
541640 # fallback: the earlier ``src.nodata or nodata`` shortcut treated
542641 # ``0.0`` as falsy and silently replaced it with the band-level
543642 # sentinel (issue #1655).
643+ #
644+ # Apply nodata masking *before* the resample. Nearest-neighbour
645+ # carries the sentinel value through unchanged, which is what
646+ # we want -- the resampled pixel inherits the NaN (or integer
647+ # sentinel) of whichever source pixel it sampled. Resampling
648+ # first and masking after would still work for the float case
649+ # but would force the integer-into-float promotion to allocate
650+ # the resampled-shape buffer before discovering the mask,
651+ # which is a waste when the source rect is much larger than
652+ # the destination rect.
544653 src_nodata = src .nodata if src .nodata is not None else nodata
545654 if src_nodata is not None and src_arr .dtype .kind == 'f' :
546655 src_arr = src_arr .copy ()
@@ -582,6 +691,24 @@ def read_vrt(vrt_path: str, *, window=None,
582691 if src .offset is not None and src .offset != 0.0 :
583692 src_arr = src_arr .astype (np .float64 ) + src .offset
584693
694+ if needs_resample :
695+ # Resample the full source rect to the destination rect
696+ # shape, then clip to the window subregion. Doing the
697+ # resample on the full rect keeps the nearest-neighbour
698+ # index math in one place; the post-resample slice is a
699+ # plain numpy view.
700+ src_arr = _resample_nearest (src_arr , dr .y_size , dr .x_size )
701+ # Take the clip subwindow out of the resampled array.
702+ # ``dst_r0`` / ``dst_c0`` anchor the resampled array in
703+ # destination coordinates; the clip rect is in
704+ # destination coordinates too, so the subtract gives the
705+ # right offsets.
706+ sub_r0 = clip_r0 - dst_r0
707+ sub_c0 = clip_c0 - dst_c0
708+ sub_r1 = clip_r1 - dst_r0
709+ sub_c1 = clip_c1 - dst_c0
710+ src_arr = src_arr [sub_r0 :sub_r1 , sub_c0 :sub_c1 ]
711+
585712 # Place into output
586713 out_r0 = clip_r0 - r0
587714 out_c0 = clip_c0 - c0
0 commit comments