Skip to content

Commit ff09e62

Browse files
Add support for scalar geospatial functions ( part 7 ) (#3857)
1 parent 82e510d commit ff09e62

3 files changed

Lines changed: 315 additions & 0 deletions

File tree

CHANGELOG.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,18 @@
88

99
- Added a new function `service` in `snowflake.snowpark.functions` that allows users to create a callable representing a Snowpark Container Services (SPCS) service.
1010
- Added support for `Session.begin_transaction`, `Session.commit` and `Session.rollback`.
11+
- Added support for the following functions in `functions.py`:
12+
- Geospatial functions:
13+
- `st_interpolate`
14+
- `st_intersection`
15+
- `st_intersection_agg`
16+
- `st_intersects`
17+
- `st_isvalid`
18+
- `st_length`
19+
- `st_makegeompoint`
20+
- `st_makeline`
21+
- `st_makepolygon`
22+
- `st_makepolygonoriented`
1123

1224
#### Bug Fixes
1325

docs/source/snowpark/functions.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,16 @@ Functions
451451
st_covers
452452
st_difference
453453
st_dimension
454+
st_interpolate
455+
st_intersection
456+
st_intersection_agg
457+
st_intersects
458+
st_isvalid
459+
st_length
460+
st_makegeompoint
461+
st_makeline
462+
st_makepolygon
463+
st_makepolygonoriented
454464
substr
455465
substring
456466
sum

src/snowflake/snowpark/_functions/scalar_functions.py

Lines changed: 293 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2502,3 +2502,296 @@ def st_dimension(
25022502
"""
25032503
c = _to_col_if_str(geography_or_geometry_expression, "st_dimension")
25042504
return builtin("st_dimension", _emit_ast=_emit_ast)(c)
2505+
2506+
2507+
@publicapi
2508+
def st_interpolate(
2509+
geography_expression: ColumnOrName,
2510+
tolerance: ColumnOrName = None,
2511+
_emit_ast: bool = True,
2512+
) -> Column:
2513+
"""
2514+
Returns a geography object with additional points interpolated along the edges of the input geography.
2515+
2516+
Args:
2517+
geography_expression (ColumnOrName): A geography data
2518+
tolerance (ColumnOrName, optional): The maximum distance between interpolated points in meters
2519+
2520+
Returns:
2521+
Column: A geography object with interpolated points along its edges
2522+
2523+
Examples::
2524+
>>> from snowflake.snowpark.functions import to_geography
2525+
>>> df = session.create_dataframe([
2526+
... ['POLYGON((2.365837 48.862456,-76.992874 39.009046,-16.091194 18.013997,2.365837 48.862456))']
2527+
... ], schema=["geog_wkt"])
2528+
>>> df.select(st_interpolate(to_geography(df["geog_wkt"])).alias("interpolated")).collect()
2529+
[Row(INTERPOLATED='{\\n "coordinates": [\\n [\\n [\\n 2.365837000000000e+00,\\n 4.886245600000001e+01\\n ... ],\\n [\\n 2.365837000000000e+00,\\n 4.886245600000001e+01\\n ]\\n ]\\n ],\\n "type": "Polygon"\\n}')]
2530+
2531+
>>> df.select(st_interpolate(to_geography(df["geog_wkt"]), lit(1000)).alias("interpolated_with_tolerance")).collect()
2532+
[Row(INTERPOLATED_WITH_TOLERANCE='{\\n "coordinates": [\\n [\\n [\\n 2.365837000000000e+00,\\n 4.886245600000... ],\\n [\\n 2.365837000000000e+00,\\n 4.886245600000001e+01\\n ]\\n ]\\n ],\\n "type": "Polygon"\\n}')]
2533+
"""
2534+
2535+
geography_col = _to_col_if_str(geography_expression, "st_interpolate")
2536+
2537+
if tolerance is None:
2538+
return builtin("st_interpolate", _emit_ast=_emit_ast)(geography_col)
2539+
else:
2540+
tolerance_col = _to_col_if_str(tolerance, "st_interpolate")
2541+
return builtin("st_interpolate", _emit_ast=_emit_ast)(
2542+
geography_col, tolerance_col
2543+
)
2544+
2545+
2546+
@publicapi
2547+
def st_intersection(
2548+
geography_expression_1: ColumnOrName,
2549+
geography_expression_2: ColumnOrName,
2550+
_emit_ast: bool = True,
2551+
) -> Column:
2552+
"""
2553+
Returns the intersection of two GEOGRAPHY objects. If the objects do not intersect, returns an empty geometry collection.
2554+
2555+
Args:
2556+
geography_expression_1 (ColumnOrName): A column containing GEOGRAPHY objects or a geography expression.
2557+
geography_expression_2 (ColumnOrName): A column containing GEOGRAPHY objects or a geography expression.
2558+
2559+
Returns:
2560+
Column: A column containing the intersection of the two input GEOGRAPHY objects as a GEOGRAPHY object.
2561+
2562+
Examples::
2563+
>>> from snowflake.snowpark.functions import to_geography
2564+
>>> df = session.create_dataframe([
2565+
... ['POLYGON((0 0, 1 0, 2 1, 1 2, 2 3, 1 4, 0 4, 0 0))', 'POLYGON((3 0, 3 4, 2 4, 1 3, 2 2, 1 1, 2 0, 3 0))']
2566+
... ], schema=["geog1", "geog2"])
2567+
>>> df.select(st_intersection(to_geography(df["geog1"]), to_geography(df["geog2"])).alias("intersection")).collect()
2568+
[Row(INTERSECTION='{\\n "coordinates": [\\n [\\n [\\n [\\n 1.500000000000000e+00,\\n 5.000571197534015e-01\\n ],\\n [\\n 2.000000000000000e+00,\\n 1.000000000000000e+00\\n ],\\n [\\n 1.500000000000000e+00,\\n 1.500171359265506e+00\\n ],\\n [\\n 9.999999999999998e-01,\\n 1.000000000000000e+00\\n ],\\n [\\n 1.500000000000000e+00,\\n 5.000571197534015e-01\\n ]\\n ]\\n ],\\n [\\n [\\n [\\n 1.500000000000000e+00,\\n 2.500285598878384e+00\\n ],\\n [\\n 2.000000000000000e+00,\\n 3.000000000000000e+00\\n ],\\n [\\n 1.500000000000000e+00,\\n 3.500399838942360e+00\\n ],\\n [\\n 1.000000000000000e+00,\\n 3.000000000000000e+00\\n ],\\n [\\n 1.500000000000000e+00,\\n 2.500285598878384e+00\\n ]\\n ]\\n ]\\n ],\\n "type": "MultiPolygon"\\n}')]
2569+
"""
2570+
c1 = _to_col_if_str(geography_expression_1, "st_intersection")
2571+
c2 = _to_col_if_str(geography_expression_2, "st_intersection")
2572+
return builtin("st_intersection", _emit_ast=_emit_ast)(c1, c2)
2573+
2574+
2575+
@publicapi
2576+
def st_intersection_agg(
2577+
geography_column: ColumnOrName, _emit_ast: bool = True
2578+
) -> Column:
2579+
"""
2580+
Returns the intersection of all geography objects in a group as an aggregate function.
2581+
2582+
Args:
2583+
geography_column (ColumnOrName): A column containing geography objects to find the intersection of in a group.
2584+
2585+
Returns:
2586+
Column: A column containing the intersection of all geography objects in the group
2587+
2588+
Examples::
2589+
>>> from snowflake.snowpark.functions import to_geography
2590+
>>> df = session.create_dataframe([
2591+
... ['POLYGON((10 10, 11 11, 11 10, 10 10))'],
2592+
... ['POLYGON((10 10, 11 10, 10 11, 10 10))'],
2593+
... ['POLYGON((10.5 10.5, 10 10, 11 10, 10.5 10.5))']
2594+
... ], schema=["g"])
2595+
>>> df.select(st_intersection_agg(to_geography(df["g"])).alias("intersection")).collect()
2596+
[Row(INTERSECTION='{\\n "coordinates": [\\n [\\n [\\n 1.050000000000000e+01,\\n 1.050000000000000e+01\\n ... ],\\n [\\n 1.050000000000000e+01,\\n 1.050000000000000e+01\\n ]\\n ]\\n ],\\n "type": "Polygon"\\n}')]
2597+
"""
2598+
c = _to_col_if_str(geography_column, "st_intersection_agg")
2599+
return builtin("st_intersection_agg", _emit_ast=_emit_ast)(c)
2600+
2601+
2602+
@publicapi
2603+
def st_intersects(
2604+
geography_or_geometry_expression_1: ColumnOrName,
2605+
geography_or_geometry_expression_2: ColumnOrName,
2606+
_emit_ast: bool = True,
2607+
) -> Column:
2608+
"""
2609+
Returns True if the two GEOGRAPHY or GEOMETRY objects intersect (i.e. have any points in common), False otherwise.
2610+
2611+
Args:
2612+
geography_or_geometry_expression_1 (ColumnOrName): A column containing GEOGRAPHY or GEOMETRY objects or a column name.
2613+
geography_or_geometry_expression_2 (ColumnOrName): A column containing GEOGRAPHY or GEOMETRY objects or a column name.
2614+
2615+
Returns:
2616+
Column: A column of boolean values indicating whether the two geography objects intersect.
2617+
2618+
Examples::
2619+
>>> from snowflake.snowpark.functions import to_geography
2620+
>>> df = session.create_dataframe([
2621+
... ["POLYGON((0 0, 2 0, 2 2, 0 2, 0 0))", "POLYGON((1 1, 3 1, 3 3, 1 3, 1 1))"],
2622+
... ["POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))", "POLYGON((2 2, 3 2, 3 3, 2 3, 2 2))"]
2623+
... ], schema=["geog1", "geog2"])
2624+
>>> df.select(st_intersects(to_geography(df["geog1"]), to_geography(df["geog2"])).alias("intersects")).collect()
2625+
[Row(INTERSECTS=True), Row(INTERSECTS=False)]
2626+
"""
2627+
c1 = _to_col_if_str(geography_or_geometry_expression_1, "st_intersects")
2628+
c2 = _to_col_if_str(geography_or_geometry_expression_2, "st_intersects")
2629+
return builtin("st_intersects", _emit_ast=_emit_ast)(c1, c2)
2630+
2631+
2632+
@publicapi
2633+
def st_isvalid(
2634+
geography_or_geometry_expression: ColumnOrName, _emit_ast: bool = True
2635+
) -> Column:
2636+
"""
2637+
Returns TRUE if the input GEOGRAPHY or GEOMETRY object is valid, FALSE otherwise.
2638+
2639+
Args:
2640+
geography_or_geometry_expression (ColumnOrName): A column containing GEOGRAPHY or GEOMETRY objects to validate.
2641+
2642+
Returns:
2643+
Column: A column of boolean values indicating whether each geography or geometry object is valid.
2644+
2645+
Examples::
2646+
>>> from snowflake.snowpark.functions import to_geography
2647+
>>> df = session.create_dataframe([["POLYGON((-93.086 37.557,-86.699 37.497,-93.198 35.123,-93.086 37.557))"]],schema=["geom"])
2648+
>>> df.select(st_isvalid(to_geography(df["geom"])).alias("is_valid")).collect()
2649+
[Row(IS_VALID=True)]
2650+
"""
2651+
c = _to_col_if_str(geography_or_geometry_expression, "st_isvalid")
2652+
return builtin("st_isvalid", _emit_ast=_emit_ast)(c)
2653+
2654+
2655+
@publicapi
2656+
def st_length(
2657+
geography_or_geometry_expression: ColumnOrName, _emit_ast: bool = True
2658+
) -> Column:
2659+
"""
2660+
Returns the length of a GEOGRAPHY or GEOMETRY object. The value is a REAL value, which represents the length:
2661+
- For GEOGRAPHY input values, the length is in meters.
2662+
- For GEOMETRY input values, the length is computed with the same units used to define the input coordinates.
2663+
2664+
Args:
2665+
geography_or_geometry_expression (ColumnOrName): A GEOGRAPHY or GEOMETRY objects
2666+
2667+
Returns:
2668+
Column: Returns a REAL value, which represents the length:
2669+
- For GEOGRAPHY input values, the length is in meters.
2670+
- For GEOMETRY input values, the length is computed with the same units used to define the input coordinates.
2671+
2672+
Examples::
2673+
>>> from snowflake.snowpark.functions import to_geography
2674+
>>> df = session.create_dataframe([
2675+
... "LINESTRING(0 0, 1 1)",
2676+
... "POINT(1 1)",
2677+
... "POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))"
2678+
... ], schema=["geometry_col"])
2679+
>>> df.select(st_length(to_geography(df["geometry_col"])).alias("length")).collect()
2680+
[Row(LENGTH=157249.6280925079), Row(LENGTH=0.0), Row(LENGTH=0.0)]
2681+
"""
2682+
c = _to_col_if_str(geography_or_geometry_expression, "st_length")
2683+
return builtin("st_length", _emit_ast=_emit_ast)(c)
2684+
2685+
2686+
@publicapi
2687+
def st_makegeompoint(
2688+
longitude: ColumnOrName, latitude: ColumnOrName, _emit_ast: bool = True
2689+
) -> Column:
2690+
"""
2691+
Constructs a GEOMETRY object that represents a point with the specified longitude and latitude values.
2692+
2693+
Args:
2694+
longitude (ColumnOrName): A column or column name containing the longitude values
2695+
latitude (ColumnOrName): A column or column name containing the latitude values
2696+
2697+
Returns:
2698+
Column: A column containing GEOMETRY objects representing points
2699+
2700+
Examples::
2701+
>>> df = session.create_dataframe([[-122.35, 37.55], [-74.006, 40.7128]], schema=["longitude", "latitude"])
2702+
>>> df.select(st_makegeompoint(df["longitude"], df["latitude"]).alias("geom_point")).collect()
2703+
[Row(GEOM_POINT='{\\n "coordinates": [\\n -1.223500000000000e+02,\\n 3.755000000000000e+01\\n ],\\n "type": "Point"\\n}'), Row(GEOM_POINT='{\\n "coordinates": [\\n -7.400600000000000e+01,\\n 4.071280000000000e+01\\n ],\\n "type": "Point"\\n}')]
2704+
"""
2705+
longitude_col = _to_col_if_str(longitude, "st_makegeompoint")
2706+
latitude_col = _to_col_if_str(latitude, "st_makegeompoint")
2707+
return builtin("st_makegeompoint", _emit_ast=_emit_ast)(longitude_col, latitude_col)
2708+
2709+
2710+
@publicapi
2711+
def st_makeline(
2712+
geography_or_geometry_expression_1: ColumnOrName,
2713+
geography_or_geometry_expression_2: ColumnOrName,
2714+
_emit_ast: bool = True,
2715+
) -> Column:
2716+
"""
2717+
Returns a LINESTRING geography object by connecting the input geography objects in the order they are passed to the function.
2718+
2719+
Args:
2720+
geography_or_geometry_expression_1 (ColumnOrName): A GEOGRAPHY or GEOMETRY object that represents the first point or set of points in the line.
2721+
geography_or_geometry_expression_2 (ColumnOrName): A GEOGRAPHY or GEOMETRY object that represents the second point or set of points in the line.
2722+
2723+
Returns:
2724+
Column: A LINESTRING geography object connecting the input geography objects.
2725+
2726+
Examples::
2727+
>>> from snowflake.snowpark.functions import to_geography
2728+
>>> df = session.create_dataframe([
2729+
... ["POINT(37.0 45.0)", "POINT(38.5 46.5)"],
2730+
... ["POINT(-122.306067 37.55412)", "MULTIPOINT((-122.32328 37.561801), (-122.325879 37.586852))"]
2731+
... ], schema=["geog1", "geog2"])
2732+
>>> df.select(st_makeline(to_geography(df["geog1"]), to_geography(df["geog2"])).alias("makeline")).collect()
2733+
[Row(MAKELINE='{\\n "coordinates": [\\n [\\n 37,\\n 45\\n ],\\n [\\n 38.5,\\n 46.5\\n ]\\n ],\\n "type": "LineString"\\n}'), Row(MAKELINE='{\\n "coordinates": [\\n [\\n -122.306067,\\n 37.55412\\n ],\\n [\\n -122.32328,\\n 37.561801\\n ],\\n [\\n -122.325879,\\n 37.586852\\n ]\\n ],\\n "type": "LineString"\\n}')]
2734+
"""
2735+
c1 = _to_col_if_str(geography_or_geometry_expression_1, "st_makeline")
2736+
c2 = _to_col_if_str(geography_or_geometry_expression_2, "st_makeline")
2737+
return builtin("st_makeline", _emit_ast=_emit_ast)(c1, c2)
2738+
2739+
2740+
@publicapi
2741+
def st_makepolygon(
2742+
geography_or_geometry_expression: ColumnOrName, _emit_ast: bool = True
2743+
) -> Column:
2744+
"""
2745+
Creates a polygon from a linestring that represents the exterior ring.
2746+
2747+
Args:
2748+
geography_or_geometry_expression (ColumnOrName): A column or column name containing a GEOGRAPHY or GEOMETRY object representing a linestring that forms the exterior ring of the polygon.
2749+
2750+
Returns:
2751+
Column: A new column containing the polygon created from the input linestring.
2752+
2753+
Examples::
2754+
>>> from snowflake.snowpark.functions import to_geometry
2755+
>>> df = session.create_dataframe([["LINESTRING(0.0 0.0, 1.0 0.0, 1.0 2.0, 0.0 2.0, 0.0 0.0)"]], schema=["linestring"])
2756+
>>> df.select(st_makepolygon(to_geometry(df["linestring"])).alias("polygon")).collect()
2757+
[Row(POLYGON='{\\n "coordinates": [\\n [\\n [\\n 0.000000000000000e+00,\\n 0.000000000000000e+00\\n ],\\n [\\n 1.000000000000000e+00,\\n 0.000000000000000e+00\\n ],\\n [\\n 1.000000000000000e+00,\\n 2.000000000000000e+00\\n ],\\n [\\n 0.000000000000000e+00,\\n 2.000000000000000e+00\\n ],\\n [\\n 0.000000000000000e+00,\\n 0.000000000000000e+00\\n ]\\n ]\\n ],\\n "type": "Polygon"\\n}')]
2758+
"""
2759+
c = _to_col_if_str(geography_or_geometry_expression, "st_makepolygon")
2760+
return builtin("st_makepolygon", _emit_ast=_emit_ast)(c)
2761+
2762+
2763+
@publicapi
2764+
def st_makepolygonoriented(
2765+
geography_expression: ColumnOrName, _emit_ast: bool = True
2766+
) -> Column:
2767+
"""
2768+
Returns a polygon with vertices oriented in a consistent direction (counter-clockwise for exterior rings, clockwise for interior rings).
2769+
2770+
Args:
2771+
geography_expression (ColumnOrName): The geography expression (typically a LINESTRING) to convert to an oriented polygon.
2772+
2773+
Returns:
2774+
Column: The oriented polygon geometry.
2775+
2776+
Examples::
2777+
>>> from snowflake.snowpark.functions import col, to_geography
2778+
>>> import json
2779+
>>> df = session.create_dataframe([
2780+
... "LINESTRING(0.0 0.0, 1.0 0.0, 1.0 2.0, 0.0 2.0, 0.0 0.0)"
2781+
... ], schema=["linestring"])
2782+
>>> result = df.select(st_makepolygonoriented(to_geography(col("linestring"))).alias("polygon")).collect()
2783+
>>> assert json.loads(result[0]["POLYGON"]) == {
2784+
... "coordinates": [
2785+
... [
2786+
... [0, 0],
2787+
... [1, 0],
2788+
... [1, 2],
2789+
... [0, 2],
2790+
... [0, 0]
2791+
... ]
2792+
... ],
2793+
... "type": "Polygon"
2794+
... }
2795+
"""
2796+
c = _to_col_if_str(geography_expression, "st_makepolygonoriented")
2797+
return builtin("st_makepolygonoriented", _emit_ast=_emit_ast)(c)

0 commit comments

Comments
 (0)