Skip to content

Commit ec7b377

Browse files
SNOW-2380327: Add support for scalar geospatial functions - part6 (#3827)
1 parent ff09e62 commit ec7b377

3 files changed

Lines changed: 315 additions & 0 deletions

File tree

CHANGELOG.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,16 @@
2020
- `st_makeline`
2121
- `st_makepolygon`
2222
- `st_makepolygonoriented`
23+
- `st_disjoint`
24+
- `st_distance`
25+
- `st_dwithin`
26+
- `st_endpoint`
27+
- `st_envelope`
28+
- `st_geohash`
29+
- `st_geomfromgeohash`
30+
- `st_geompointfromgeohash`
31+
- `st_hausdorffdistance`
32+
- `st_makepoint`
2333

2434
#### Bug Fixes
2535

docs/source/snowpark/functions.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,16 @@ Functions
451451
st_covers
452452
st_difference
453453
st_dimension
454+
st_disjoint
455+
st_distance
456+
st_dwithin
457+
st_endpoint
458+
st_envelope
459+
st_geohash
460+
st_geomfromgeohash
461+
st_geompointfromgeohash
462+
st_hausdorffdistance
463+
st_makepoint
454464
st_interpolate
455465
st_intersection
456466
st_intersection_agg

src/snowflake/snowpark/_functions/scalar_functions.py

Lines changed: 295 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2795,3 +2795,298 @@ def st_makepolygonoriented(
27952795
"""
27962796
c = _to_col_if_str(geography_expression, "st_makepolygonoriented")
27972797
return builtin("st_makepolygonoriented", _emit_ast=_emit_ast)(c)
2798+
2799+
2800+
@publicapi
2801+
def st_makepoint(
2802+
longitude: ColumnOrName, latitude: ColumnOrName, _emit_ast: bool = True
2803+
) -> Column:
2804+
"""
2805+
Creates a GEOGRAPHY object that represents a point with the specified longitude and latitude.
2806+
2807+
Args:
2808+
longitude (ColumnOrName): The longitude values.
2809+
latitude (ColumnOrName): The latitude values.
2810+
2811+
Returns:
2812+
Column: A GEOGRAPHY objects representing points.
2813+
2814+
Example::
2815+
>>> from snowflake.snowpark.functions import col
2816+
>>> df = session.create_dataframe([[37.5, 45.5], [-122.35, 37.55]], schema=["longitude", "latitude"])
2817+
>>> df.select(st_makepoint(col("longitude"), col("latitude")).alias("point")).collect()
2818+
[Row(POINT='{\\n "coordinates": [\\n 3.750000000000000e+01,\\n 4.550000000000000e+01\\n ],\\n "type": "Point"\\n}'), Row(POINT='{\\n "coordinates": [\\n -1.223500000000000e+02,\\n 3.755000000000000e+01\\n ],\\n "type": "Point"\\n}')]
2819+
"""
2820+
longitude_col = _to_col_if_str(longitude, "st_makepoint")
2821+
latitude_col = _to_col_if_str(latitude, "st_makepoint")
2822+
return builtin("st_makepoint", _emit_ast=_emit_ast)(longitude_col, latitude_col)
2823+
2824+
2825+
@publicapi
2826+
def st_disjoint(
2827+
geography_or_geometry_expression_1: ColumnOrName,
2828+
geography_or_geometry_expression_2: ColumnOrName,
2829+
_emit_ast: bool = True,
2830+
) -> Column:
2831+
"""
2832+
Returns TRUE if the two GEOGRAPHY or GEOMETRY objects are disjoint (do not intersect). Returns FALSE otherwise.
2833+
2834+
Args:
2835+
geography_or_geometry_expression_1 (ColumnOrName): A GEOGRAPHY or GEOMETRY object.
2836+
geography_or_geometry_expression_2 (ColumnOrName): A GEOGRAPHY or GEOMETRY object.
2837+
2838+
Returns:
2839+
Column: Boolean values indicating whether the two geography or geometry objects are disjoint.
2840+
2841+
Examples::
2842+
>>> from snowflake.snowpark.functions import col, to_geography
2843+
>>> df = session.create_dataframe([
2844+
... ["POLYGON((0 0, 2 0, 2 2, 0 2, 0 0))", "POLYGON((3 3, 5 3, 5 5, 3 5, 3 3))"],
2845+
... ["POLYGON((0 0, 2 0, 2 2, 0 2, 0 0))", "POLYGON((1 1, 3 1, 3 3, 1 3, 1 1))"]
2846+
... ], schema=["geog1", "geog2"])
2847+
>>> df.select(st_disjoint(to_geography(col("geog1")), to_geography(col("geog2"))).alias("disjoint")).collect()
2848+
[Row(DISJOINT=True), Row(DISJOINT=False)]
2849+
"""
2850+
c1 = _to_col_if_str(geography_or_geometry_expression_1, "st_disjoint")
2851+
c2 = _to_col_if_str(geography_or_geometry_expression_2, "st_disjoint")
2852+
return builtin("st_disjoint", _emit_ast=_emit_ast)(c1, c2)
2853+
2854+
2855+
@publicapi
2856+
def st_distance(
2857+
geography_or_geometry_expression_1: ColumnOrName,
2858+
geography_or_geometry_expression_2: ColumnOrName,
2859+
_emit_ast: bool = True,
2860+
) -> Column:
2861+
"""
2862+
Returns the minimum geodesic distance between two GEOGRAPHY objects or the Euclidean distance between two GEOMETRY objects.
2863+
2864+
Args:
2865+
geography_or_geometry_expression_1 (ColumnOrName): A GEOGRAPHY or GEOMETRY objects.
2866+
geography_or_geometry_expression_2 (ColumnOrName): A GEOGRAPHY or GEOMETRY objects.
2867+
2868+
Returns:
2869+
Column: The distance between the two geographic or geometric objects.
2870+
2871+
Examples:
2872+
>>> df = session.sql("select TO_GEOGRAPHY('POINT(0 0)') as point1, TO_GEOGRAPHY('POINT(1 0)') as point2")
2873+
>>> df.select(st_distance(df.point1, df.point2).alias("distance")).collect()
2874+
[Row(DISTANCE=111195.10117748393)]
2875+
"""
2876+
geography_or_geometry_expression_1 = _to_col_if_str(
2877+
geography_or_geometry_expression_1, "st_distance"
2878+
)
2879+
geography_or_geometry_expression_2 = _to_col_if_str(
2880+
geography_or_geometry_expression_2, "st_distance"
2881+
)
2882+
return builtin("st_distance", _emit_ast=_emit_ast)(
2883+
geography_or_geometry_expression_1, geography_or_geometry_expression_2
2884+
)
2885+
2886+
2887+
@publicapi
2888+
def st_dwithin(
2889+
geography_expression_1: ColumnOrName,
2890+
geography_expression_2: ColumnOrName,
2891+
distance_in_meters: ColumnOrName,
2892+
_emit_ast: bool = True,
2893+
) -> Column:
2894+
"""
2895+
Returns true if the distance between two GEOGRAPHY objects is within the specified distance in meters.
2896+
2897+
Args:
2898+
geography_expression_1 (ColumnOrName): The first geography expression to compare
2899+
geography_expression_2 (ColumnOrName): The second geography expression to compare
2900+
distance_in_meters (ColumnOrName): The maximum distance in meters for the comparison
2901+
2902+
Returns:
2903+
Column: A boolean column indicating whether the two geography objects are within the specified distance
2904+
2905+
Examples:
2906+
>>> from snowflake.snowpark.functions import st_makepoint, lit
2907+
>>> df = session.create_dataframe([
2908+
... [0.0, 0.0, 1.0, 0.0, 150000.0],
2909+
... [0.0, 0.0, 2.0, 0.0, 150000.0]
2910+
... ], schema=["x1", "y1", "x2", "y2", "distance"])
2911+
>>> df.select(
2912+
... st_dwithin(
2913+
... st_makepoint(df["x1"], df["y1"]),
2914+
... st_makepoint(df["x2"], df["y2"]),
2915+
... df["distance"]
2916+
... ).alias("within_distance")
2917+
... ).collect()
2918+
[Row(WITHIN_DISTANCE=True), Row(WITHIN_DISTANCE=False)]
2919+
"""
2920+
c1 = _to_col_if_str(geography_expression_1, "st_dwithin")
2921+
c2 = _to_col_if_str(geography_expression_2, "st_dwithin")
2922+
c3 = _to_col_if_str(distance_in_meters, "st_dwithin")
2923+
return builtin("st_dwithin", _emit_ast=_emit_ast)(c1, c2, c3)
2924+
2925+
2926+
@publicapi
2927+
def st_endpoint(
2928+
geography_or_geometry_expression: ColumnOrName, _emit_ast: bool = True
2929+
) -> Column:
2930+
"""
2931+
Returns the last point of a LINESTRING or MULTILINESTRING geometry or geography object.
2932+
2933+
Args:
2934+
geography_or_geometry_expression (ColumnOrName): A column containing LINESTRING or MULTILINESTRING geometry or geography data
2935+
2936+
Returns:
2937+
Column: A column containing the endpoint as a POINT geometry or geography object
2938+
2939+
Examples:
2940+
>>> from snowflake.snowpark.functions import to_geography
2941+
>>> df = session.create_dataframe([['LINESTRING(1 1, 2 2, 3 3, 4 4)']], schema=["linestring"])
2942+
>>> df.select(st_endpoint(to_geography(df["linestring"])).alias("endpoint")).collect()
2943+
[Row(ENDPOINT='{\\n "coordinates": [\\n 4.000000000000000e+00,\\n 4.000000000000000e+00\\n ],\\n "type": "Point"\\n}')]
2944+
"""
2945+
c = _to_col_if_str(geography_or_geometry_expression, "st_endpoint")
2946+
return builtin("st_endpoint", _emit_ast=_emit_ast)(c)
2947+
2948+
2949+
@publicapi
2950+
def st_envelope(geography_or_geometry_expression: ColumnOrName, _emit_ast=True):
2951+
"""
2952+
Returns the minimum bounding box (envelope) that contains the input GEOGRAPHY or GEOMETRY object.
2953+
2954+
Args:
2955+
geography_or_geometry_expression: The GEOGRAPHY or GEOMETRY data.
2956+
2957+
Returns:
2958+
Column: The envelope as a GEOGRAPHY or GEOMETRY object.
2959+
2960+
Example::
2961+
2962+
>>> from snowflake.snowpark.functions import to_geography
2963+
>>> df = session.create_dataframe([
2964+
... ['POLYGON((-122.306067 37.55412, -122.32328 37.561801, -122.325879 37.586852, -122.306067 37.55412))'],
2965+
... ['POINT(-122.32328 37.561801)'],
2966+
... ['LINESTRING(-122.32328 37.561801, -122.32328 37.562001)']
2967+
... ], schema=["geom"])
2968+
>>> df.select(st_envelope(to_geography(df["geom"])).alias("envelope")).collect()
2969+
[Row(ENVELOPE='{\\n "coordinates": [\\n [\\n [\\n -1.223258790000000e+02,\\n 3.755411999999995e+01\\n ],\\n [\\n -1.223060670000000e+02,\\n 3.755411999999995e+01\\n ],\\n [\\n -1.223060670000000e+02,\\n 3.758685200000006e+01\\n ],\\n [\\n -1.223258790000000e+02,\\n 3.758685200000006e+01\\n ],\\n [\\n -1.223258790000000e+02,\\n 3.755411999999995e+01\\n ]\\n ]\\n ],\\n "type": "Polygon"\\n}'), Row(ENVELOPE='{\\n "coordinates": [\\n -1.223232800000000e+02,\\n 3.756180100000000e+01\\n ],\\n "type": "Point"\\n}'), Row(ENVELOPE='{\\n "coordinates": [\\n [\\n -1.223232800000000e+02,\\n 3.756180099999997e+01\\n ],\\n [\\n -1.223232800000000e+02,\\n 3.756200100000003e+01\\n ]\\n ],\\n "type": "LineString"\\n}')]
2970+
"""
2971+
c = _to_col_if_str(geography_or_geometry_expression, "st_envelope")
2972+
return builtin("st_envelope", _emit_ast=_emit_ast)(c)
2973+
2974+
2975+
@publicapi
2976+
def st_geohash(
2977+
geography_or_geometry_expression: ColumnOrName,
2978+
precision: ColumnOrName = None,
2979+
_emit_ast: bool = True,
2980+
):
2981+
"""
2982+
Returns the geohash for a GEOGRAPHY or GEOMETRY object.
2983+
2984+
Args:
2985+
geography_or_geometry_expression (ColumnOrName): A GEOGRAPHY or GEOMETRY object for which to calculate the geohash
2986+
precision (ColumnOrName, optional): The precision of the geohash. If not specified, uses the default precision
2987+
2988+
Returns:
2989+
Column: A string representing the geohash of the input geography or geometry object
2990+
2991+
Examples::
2992+
>>> from snowflake.snowpark.functions import to_geography
2993+
>>> df = session.create_dataframe([["POINT(-122.306100 37.554162)"]], schema=["geom"])
2994+
>>> df.select(st_geohash(to_geography(df["geom"])).alias("geohash")).collect()
2995+
[Row(GEOHASH='9q9j8ue2v71y5zzy0s4q')]
2996+
2997+
>>> df2 = session.create_dataframe([["POINT(-122.306100 37.554162)"]], schema=["geom"])
2998+
>>> df2.select(st_geohash(to_geography(df2["geom"]), lit(5)).alias("geohash")).collect()
2999+
[Row(GEOHASH='9q9j8')]
3000+
"""
3001+
col = _to_col_if_str(geography_or_geometry_expression, "st_geohash")
3002+
3003+
if precision is not None:
3004+
precision_col = _to_col_if_str(precision, "st_geohash")
3005+
return builtin("st_geohash", _emit_ast=_emit_ast)(col, precision_col)
3006+
else:
3007+
return builtin("st_geohash", _emit_ast=_emit_ast)(col)
3008+
3009+
3010+
@publicapi
3011+
def st_geomfromgeohash(
3012+
geohash: ColumnOrName, precision: ColumnOrName = None, _emit_ast: bool = True
3013+
) -> Column:
3014+
"""
3015+
Constructs a GEOMETRY object from a geohash string.
3016+
3017+
Args:
3018+
geohash (ColumnOrName): A column or string containing the geohash value
3019+
precision (ColumnOrName, optional): A column or value specifying the precision level for the geohash conversion
3020+
3021+
Returns:
3022+
Column: A GEOMETRY object representing the polygon area covered by the geohash
3023+
3024+
Examples::
3025+
>>> from snowflake.snowpark.functions import col, lit
3026+
>>> df = session.create_dataframe([["9q9j8ue2v71y5zzy0s4q"], ["9q9j8u"]], schema=["geohash"])
3027+
>>> df.select(st_geomfromgeohash(col("geohash")).alias("geometry")).collect()
3028+
[Row(GEOMETRY='{\\n "coordinates": [\\n [\\n [\\n -1.223061000000001e+02,\\n 3.755416199999996e+01\\n ],\\n [\\n -1.223061000000001e+02,\\n 3.755416200000012e+01\\n ],\\n [\\n -1.223060999999998e+02,\\n 3.755416200000012e+01\\n ],\\n [\\n -1.223060999999998e+02,\\n 3.755416199999996e+01\\n ],\\n [\\n -1.223061000000001e+02,\\n 3.755416199999996e+01\\n ]\\n ]\\n ],\\n "type": "Polygon"\\n}'), Row(GEOMETRY='{\\n "coordinates": [\\n [\\n [\\n -1.223107910156250e+02,\\n 3.755126953125000e+01\\n ],\\n [\\n -1.223107910156250e+02,\\n 3.755676269531250e+01\\n ],\\n [\\n -1.222998046875000e+02,\\n 3.755676269531250e+01\\n ],\\n [\\n -1.222998046875000e+02,\\n 3.755126953125000e+01\\n ],\\n [\\n -1.223107910156250e+02,\\n 3.755126953125000e+01\\n ]\\n ]\\n ],\\n "type": "Polygon"\\n}')]
3029+
3030+
>>> df2 = session.create_dataframe([["9q9j8ue2v71y5zzy0s4q"]], schema=["geohash"])
3031+
>>> df2.select(st_geomfromgeohash(col("geohash"), lit(6)).alias("geometry")).collect()
3032+
[Row(GEOMETRY='{\\n "coordinates": [\\n [\\n [\\n -1.223107910156250e+02,\\n 3.755126953125000e+01\\n ],\\n [\\n -1.223107910156250e+02,\\n 3.755676269531250e+01\\n ],\\n [\\n -1.222998046875000e+02,\\n 3.755676269531250e+01\\n ],\\n [\\n -1.222998046875000e+02,\\n 3.755126953125000e+01\\n ],\\n [\\n -1.223107910156250e+02,\\n 3.755126953125000e+01\\n ]\\n ]\\n ],\\n "type": "Polygon"\\n}')]
3033+
"""
3034+
geohash_col = _to_col_if_str(geohash, "st_geomfromgeohash")
3035+
3036+
if precision is None:
3037+
return builtin("st_geomfromgeohash", _emit_ast=_emit_ast)(geohash_col)
3038+
else:
3039+
precision_col = _to_col_if_str(precision, "st_geomfromgeohash")
3040+
return builtin("st_geomfromgeohash", _emit_ast=_emit_ast)(
3041+
geohash_col, precision_col
3042+
)
3043+
3044+
3045+
@publicapi
3046+
def st_geompointfromgeohash(geohash: ColumnOrName, _emit_ast: bool = True) -> Column:
3047+
"""
3048+
Returns a GEOMETRY object that represents a point constructed from a geohash string.
3049+
3050+
Args:
3051+
geohash (ColumnOrName): A column or string representing the geohash value to convert to a geometry point
3052+
3053+
Returns:
3054+
Column: A GEOMETRY object representing the point decoded from the geohash
3055+
3056+
Examples::
3057+
>>> df = session.create_dataframe([['9q9j8ue2v71y5zzy0s4q'], ['9q9hpyb25d']], schema=["geohash"])
3058+
>>> df.select(st_geompointfromgeohash(df["geohash"]).alias("geometry_point")).collect()
3059+
[Row(GEOMETRY_POINT='{\\n "coordinates": [\\n -1.223061000000001e+02,\\n 3.755416199999996e+01\\n ],\\n "type": "Point"\\n}'), Row(GEOMETRY_POINT='{\\n "coordinates": [\\n -1.220026749372482e+02,\\n 3.730271726846695e+01\\n ],\\n "type": "Point"\\n}')]
3060+
"""
3061+
c = _to_col_if_str(geohash, "st_geompointfromgeohash")
3062+
return builtin("st_geompointfromgeohash", _emit_ast=_emit_ast)(c)
3063+
3064+
3065+
@publicapi
3066+
def st_hausdorffdistance(
3067+
geography_expression_1: ColumnOrName,
3068+
geography_expression_2: ColumnOrName,
3069+
_emit_ast: bool = True,
3070+
) -> Column:
3071+
"""
3072+
Returns the Hausdorff distance between two GEOGRAPHY objects.
3073+
3074+
Args:
3075+
geography_expression_1 (ColumnOrName): A column containing GEOGRAPHY objects or a geography expression.
3076+
geography_expression_2 (ColumnOrName): A column containing GEOGRAPHY objects or a geography expression.
3077+
3078+
Returns:
3079+
Column: A column containing the Hausdorff distance between the two geography objects.
3080+
3081+
Examples::
3082+
>>> from snowflake.snowpark.functions import to_geography
3083+
>>> df = session.create_dataframe([
3084+
... ("POINT(0 0)", "POINT(0 1)"),
3085+
... ("POLYGON((-1 0, 0 1, 1 0, 0 -1, -1 0))", "POLYGON((-1 0, 0 1, 2 0, 0 -1, -1 0))")
3086+
... ], schema=["geog1", "geog2"])
3087+
>>> df.select(st_hausdorffdistance(to_geography(df["geog1"]), to_geography(df["geog2"]))).collect()
3088+
[Row(ST_HAUSDORFFDISTANCE(TO_GEOGRAPHY("GEOG1"), TO_GEOGRAPHY("GEOG2"))=1.0), Row(ST_HAUSDORFFDISTANCE(TO_GEOGRAPHY("GEOG1"), TO_GEOGRAPHY("GEOG2"))=1.0)]
3089+
"""
3090+
c1 = _to_col_if_str(geography_expression_1, "st_hausdorffdistance")
3091+
c2 = _to_col_if_str(geography_expression_2, "st_hausdorffdistance")
3092+
return builtin("st_hausdorffdistance", _emit_ast=_emit_ast)(c1, c2)

0 commit comments

Comments
 (0)