4545except ImportError :
4646 cupy = None
4747
48- from .. utils import is_cupy_array , ngjit
48+ from .utils import ArrayTypeFunctionMapping , is_cupy_array , ngjit
4949
5050_regions_dtype = np .uint32
5151_visited_dtype = np .uint8
@@ -417,15 +417,14 @@ def _transform_points(
417417
418418@ngjit
419419def _scan (
420+ regions : np .ndarray , # _regions_dtype, shape (nx*ny,)
420421 values : np .ndarray , # shape (nx*ny,)
421422 mask : Optional [np .ndarray ], # shape (nx*ny,)
422423 connectivity_8 : bool ,
423424 transform : Optional [np .ndarray ], # shape (6,)
424425 nx : int ,
425426 ny : int ,
426427) -> Tuple [List [Union [int , float ]], List [List [np .ndarray ]]]:
427- regions = _calculate_regions (values , mask , connectivity_8 , nx , ny )
428-
429428 # Visited flags used to denote where boundaries have already been
430429 # followed and hence are not future start positions.
431430 visited = np .zeros_like (values , dtype = _visited_dtype )
@@ -503,6 +502,23 @@ def _to_spatialpandas(
503502 return df
504503
505504
505+ def _to_geojson (
506+ column : List [Union [int , float ]],
507+ polygon_points : List [List [np .ndarray ]],
508+ column_name : str ,
509+ ):
510+ """Convert to GeoJSON FeatureCollection dict."""
511+ features = []
512+ for value , rings in zip (column , polygon_points ):
513+ coords = [ring .tolist () for ring in rings ]
514+ features .append ({
515+ "type" : "Feature" ,
516+ "properties" : {column_name : value },
517+ "geometry" : {"type" : "Polygon" , "coordinates" : coords },
518+ })
519+ return {"type" : "FeatureCollection" , "features" : features }
520+
521+
506522def _polygonize_numpy (
507523 values : np .ndarray ,
508524 mask : Optional [np .ndarray ],
@@ -523,12 +539,12 @@ def _polygonize_numpy(
523539 mask = np .zeros_like (values , dtype = bool )
524540 mask [:, 0 ] = True
525541
526- values = values .ravel ()
527- if mask is not None :
528- mask = mask .ravel ()
542+ values_flat = values .ravel ()
543+ mask_flat = mask .ravel () if mask is not None else None
529544
545+ regions = _calculate_regions (values_flat , mask_flat , connectivity_8 , nx , ny )
530546 column , polygon_points = _scan (
531- values , mask , connectivity_8 , transform , nx , ny )
547+ regions , values_flat , mask_flat , connectivity_8 , transform , nx , ny )
532548
533549 return column , polygon_points
534550
@@ -537,6 +553,100 @@ def _polygonize_numpy(
537553_DIR_ANGLE = {(1 , 0 ): 0 , (0 , 1 ): 1 , (- 1 , 0 ): 2 , (0 , - 1 ): 3 }
538554
539555
556+ def _calculate_regions_cupy (data , mask_data , connectivity_8 ):
557+ """CuPy GPU backend for connected-component labeling.
558+
559+ Uses cupyx.scipy.ndimage.label per unique value to produce a regions
560+ array compatible with _scan. Returns a cupy uint32 2D array.
561+ """
562+ import cupy as cp
563+ from cupyx .scipy .ndimage import label as cp_label
564+
565+ if connectivity_8 :
566+ structure = cp .ones ((3 , 3 ), dtype = cp .int32 )
567+ else :
568+ structure = cp .array ([[0 , 1 , 0 ], [1 , 1 , 1 ], [0 , 1 , 0 ]], dtype = cp .int32 )
569+
570+ regions = cp .zeros (data .shape , dtype = cp .uint32 )
571+
572+ # Build valid mask (unmask + handle float NaN).
573+ is_float = cp .issubdtype (data .dtype , cp .floating )
574+ if mask_data is not None :
575+ valid = cp .asarray (mask_data , dtype = bool )
576+ if is_float :
577+ valid &= ~ cp .isnan (data )
578+ else :
579+ valid = ~ cp .isnan (data ) if is_float else None
580+
581+ unique_vals = data [valid ] if valid is not None else data .ravel ()
582+ unique_vals = cp .unique (unique_vals )
583+
584+ uid = 1
585+ for v in unique_vals :
586+ bin_mask = (data == v )
587+ if valid is not None :
588+ bin_mask &= valid
589+ labeled , n_features = cp_label (bin_mask , structure = structure )
590+ if n_features == 0 :
591+ continue
592+ # Vectorized assignment: offset labeled region IDs by (uid - 1) so
593+ # label 1 → uid, label 2 → uid+1, etc. Single kernel, no Python loop.
594+ where = labeled > 0
595+ regions [where ] = (labeled [where ].astype (cp .uint32 ) +
596+ cp .uint32 (uid - 1 ))
597+ uid += n_features
598+
599+ return regions
600+
601+
602+ @ngjit
603+ def _renumber_regions (regions , nx , ny ):
604+ """Renumber regions so IDs are sequential in raster-scan order.
605+
606+ _scan expects region 1 to have the smallest ij, region 2 the next, etc.
607+ GPU CCL numbers regions per-value, not spatially. This pass assigns
608+ new sequential IDs in the order regions are first encountered.
609+ """
610+ n = nx * ny
611+ max_old = 0
612+ for i in range (n ):
613+ if regions [i ] > max_old :
614+ max_old = regions [i ]
615+
616+ # Map from old region ID to new sequential ID.
617+ remap = np .zeros (max_old + 1 , dtype = _regions_dtype )
618+ new_id = _regions_dtype (0 )
619+ for ij in range (n ):
620+ old = regions [ij ]
621+ if old == 0 :
622+ continue
623+ if remap [old ] == 0 :
624+ new_id += 1
625+ remap [old ] = new_id
626+ regions [ij ] = remap [old ]
627+
628+ return regions
629+
630+
631+ def _polygonize_cupy (data , mask_data , connectivity_8 , transform ):
632+ """Hybrid GPU/CPU: GPU CCL, CPU boundary tracing."""
633+ np_data = cupy .asnumpy (data )
634+ np_mask = cupy .asnumpy (mask_data ) if mask_data is not None else None
635+ ny , nx = np_data .shape
636+ if nx == 1 :
637+ # Edge case: fall back to full numpy path (pads array).
638+ return _polygonize_numpy (np_data , np_mask , connectivity_8 , transform )
639+ regions_gpu = _calculate_regions_cupy (data , mask_data , connectivity_8 )
640+ regions = cupy .asnumpy (regions_gpu ).ravel ()
641+ # Renumber into raster-scan order for _scan compatibility.
642+ regions = _renumber_regions (regions , nx , ny )
643+ column , polygon_points = _scan (
644+ regions , np_data .ravel (),
645+ np_mask .ravel () if np_mask is not None else None ,
646+ connectivity_8 , transform , nx , ny )
647+ return column , polygon_points
648+
649+
540650def _to_numpy (arr ):
541651 """Convert array to numpy, handling cupy arrays."""
542652 if cupy is not None and isinstance (arr , cupy .ndarray ):
@@ -921,8 +1031,8 @@ def polygonize(
9211031
9221032 return_type: str, default="numpy"
9231033 Format of returned data. Allowed values are "numpy", "spatialpandas",
924- "geopandas" and "awkward ". Only "numpy" is always available, the
925- others require optional dependencies.
1034+ "geopandas", "awkward" and "geojson ". "numpy" and "geojson" are
1035+ always available, the others require optional dependencies.
9261036
9271037 Returns
9281038 -------
@@ -970,22 +1080,14 @@ def polygonize(
9701080 raise ValueError (
9711081 f"Incorrect transform length of { len (transform )} instead of 6" )
9721082
973- if isinstance (raster .data , np .ndarray ):
974- column , polygon_points = _polygonize_numpy (
975- raster .data , mask_data , connectivity_8 , transform )
976- elif cupy is not None and is_cupy_array (raster .data ):
977- # Hybrid GPU/CPU: transfer to CPU for boundary tracing.
978- np_data = cupy .asnumpy (raster .data )
979- np_mask = cupy .asnumpy (mask_data ) if mask_data is not None else None
980- column , polygon_points = _polygonize_numpy (
981- np_data , np_mask , connectivity_8 , transform )
982- elif da is not None and isinstance (raster .data , da .Array ):
983- # Handles both dask+numpy and dask+cupy (chunks converted in
984- # _polygonize_chunk).
985- column , polygon_points = _polygonize_dask (
986- raster .data , mask_data , connectivity_8 , transform )
987- else :
988- raise TypeError (f"Unsupported array type: { type (raster .data )} " )
1083+ mapper = ArrayTypeFunctionMapping (
1084+ numpy_func = _polygonize_numpy ,
1085+ cupy_func = _polygonize_cupy ,
1086+ dask_func = _polygonize_dask ,
1087+ dask_cupy_func = _polygonize_dask ,
1088+ )
1089+ column , polygon_points = mapper (raster )(
1090+ raster .data , mask_data , connectivity_8 , transform )
9891091
9901092 # Convert to requested return_type.
9911093 if return_type == "numpy" :
@@ -996,5 +1098,7 @@ def polygonize(
9961098 return _to_geopandas (column , polygon_points , column_name )
9971099 elif return_type == "spatialpandas" :
9981100 return _to_spatialpandas (column , polygon_points , column_name )
1101+ elif return_type == "geojson" :
1102+ return _to_geojson (column , polygon_points , column_name )
9991103 else :
10001104 raise ValueError (f"Invalid return_type '{ return_type } '" )
0 commit comments