From b6d4b324ab20564f92f9f8e84a4c6455ace1534a Mon Sep 17 00:00:00 2001 From: Kelly-Ann Dolor Date: Tue, 9 Sep 2025 12:09:16 -0700 Subject: [PATCH 01/25] adding docs and navigation for second bug bash --- docs/index.md | 6 - docs/reference/read-parquet-files.md | 41 ++ docs/reference/sql-joins.md | 35 ++ docs/reference/sql.md | 560 ++++++++++++++++++++++++++- docs/stylesheets/extra.css | 57 +++ mkdocs.yml | 88 +++-- 6 files changed, 734 insertions(+), 53 deletions(-) create mode 100644 docs/reference/read-parquet-files.md create mode 100644 docs/reference/sql-joins.md create mode 100644 docs/stylesheets/extra.css diff --git a/docs/index.md b/docs/index.md index 1395c6be1..cc64218c5 100644 --- a/docs/index.md +++ b/docs/index.md @@ -78,12 +78,6 @@ Here’s how to install SedonaDB with various build tools: pip install "apache-sedona[db]" ``` -=== "Rust" - - ```rust - cargo add sedona - ``` - === "R" ```bash diff --git a/docs/reference/read-parquet-files.md b/docs/reference/read-parquet-files.md new file mode 100644 index 000000000..47b4b6a71 --- /dev/null +++ b/docs/reference/read-parquet-files.md @@ -0,0 +1,41 @@ +# Reading Parquet Files + +To read a Parquet file, you must use the dedicated `sd.read_parquet()` method. You cannot query a file path directly within the `sd.sql()` `FROM` clause. + +The `sd.sql()` function is designed to query tables that have already been registered in the session. When you pass a path like `'s3://...'` to `FROM`, the SQL engine searches for a registered table with that literal name and fails when it's not found, producing a `table not found` error. + +## Usage + +The correct process is a two-step approach: + +1. **Load** the Parquet file into a DataFrame using `sd.read_parquet()`. +1. **Register** the DataFrame as a temporary view using `.createOrReplaceTempView()`. +1. **Query** the view using `sd.sql()`. + +```python +# 1. Load the Parquet file from a URL into a DataFrame +df = sd.read_parquet('s3://wherobots-benchmark-prod/SpatialBench_sf=1_format=parquet/building/building.parquet') + +# 2. Register the DataFrame as a temporary view named 'buildings' +df.createOrReplaceTempView('buildings') + +# 3. Now, query the view using SQL +sd.sql("SELECT * FROM buildings LIMIT 10").show() +``` + +### Common Errors + +Directly using a file path within `sd.sql()` is a common mistake that will result in an error. + +**Incorrect Code:** + +```python +# This will fail because the SQL engine looks for a table named 's3://...' +sd.sql("SELECT * FROM 's3://wherobots-benchmark-prod/SpatialBench_sf=1_format=parquet/building/building.parquet'") +``` + +**Resulting Error:** + +``` +sedonadb._lib.SedonaError: Error during planning: table '...s3://...' not found +``` diff --git a/docs/reference/sql-joins.md b/docs/reference/sql-joins.md new file mode 100644 index 000000000..7c39ea1d5 --- /dev/null +++ b/docs/reference/sql-joins.md @@ -0,0 +1,35 @@ +# Spatial Joins + +You can perform spatial joins using standard SQL `INNER JOIN` syntax. The join condition is defined in the `ON` clause using a spatial function that specifies the relationship between the geometries of the two tables. + +## General Spatial Join + +Use functions like `ST_Contains`, `ST_Intersects`, or `ST_Within` to join tables based on their spatial relationship. + +### Example + +Assign a country to each city by checking which country polygon contains each city point. + +```sql +SELECT + cities.name as city, + countries.name as country +FROM cities +INNER JOIN countries +ON ST_Contains(countries.geometry, cities.geometry) +``` + +## K-Nearest Neighbor (KNN) Join + +Use the specialized `ST_KNN` function to find the *k* nearest neighbors from one table for each geometry in another. This is useful for proximity analysis. + +### Example For each city, find the 5 other closest cities. + +```sql +SELECT + cities_l.name AS city, + cities_r.name AS nearest_neighbor +FROM cities AS cities_l +INNER JOIN cities AS cities_r +ON ST_KNN(cities_l.geometry, cities_r.geometry, 5, false) +``` diff --git a/docs/reference/sql.md b/docs/reference/sql.md index 0b221b077..9364aa6c3 100644 --- a/docs/reference/sql.md +++ b/docs/reference/sql.md @@ -19,13 +19,16 @@ # SQL API Reference -SedonaDB SQL is a derivative of [DataFusion SQL](https://datafusion.apache.org/user-guide/sql/index.html) -with support for additional functions, data types, and file formats built in to SQL syntax. +The following SQL functions are available for SedonaDB. -See the [Apache Sedona SQL documentation](https://sedona.apache.org/latest/api/sql/Overview/) for -additional function documentation and examples. +You can query data directly from files and URLs by treating them like database tables. This feature supports formats like **Parquet**, **CSV**, and **JSON**. -Here are the markdown files for each SQL function found in the provided `.rs` files. +To query a file, place its path or URL in single quotes within the `FROM` clause. + +```python +# Query a remote Parquet file directly +"SELECT * FROM 'https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_cities_geo.parquet'").show() +``` ## ST_Analyze_Aggr @@ -608,3 +611,550 @@ Computes the symmetric difference between geomA and geomB. ```sql SELECT ST_SymDifference(ST_GeomFromText('POLYGON ((1 1, 11 1, 1 11, 0 0))'), ST_GeomFromText('POLYGON ((0 0, 10 0, 0 10, 0 0))')) AS val ``` + +## ST_Area + +### Description + +Return the area of a geometry. + +### Format + +`ST_Area (A: Geometry)` + +### Arguments + + * **geom**: Input geometry. + +### SQL Example + +```sql +SELECT ST_Area(ST_GeomFromWKT('POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))')); +``` + +## ST_Centroid + +### Description + +Returns the centroid of geom. + +### Format + +`ST_Centroid (A: Geometry)` + +### Arguments + + * **geom**: Input geometry. + +### SQL Example + +```sql +SELECT ST_AsText(ST_Centroid(ST_GeomFromWKT('POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))'))); +``` + +## ST_Dimension + +### Description + +Return the dimension of the geometry. + +### Format + +`ST_Dimension (A: Geometry)` + +### Arguments + + * **geom**: Input geometry. + +### SQL Example + +```sql +SELECT ST_Dimension(ST_GeomFromWKT('POLYGON ((0 0, 1 0, 0 1, 0 0))')); +``` + +## ST_GeomFromWKB + +### Description + +Construct a Geometry from WKB. + +### Format + +`ST_GeomFromWKB (Wkb: Binary)` + +### Arguments + + * **WKB**: binary: Well-known binary representation of the geometry. + +### SQL Example + +```sql +-- Creates a POINT(1 2) geometry from its WKB representation +SELECT ST_AsText(ST_GeomFromWKB(FROM_HEX('0101000000000000000000F03F0000000000000040'))); +``` + +## ST_GeomFromWKT + +### Description + +Construct a Geometry from WKT. This function also has the alias **ST_GeomFromText**. + +### Format + +`ST_GeomFromWKT (Wkt: String)` + +### Arguments + + * **WKT**: string: Well-known text representation of the geometry. + +### SQL Example + +```sql +SELECT ST_AsText(ST_GeomFromWKT('POINT (30 10)')); +``` + +## ST_IsEmpty + +### Description + +Return true if the geometry is empty. + +### Format + +`ST_IsEmpty (A: Geometry)` + +### Arguments + + * **geom**: Input geometry. + +### SQL Example + +```sql +SELECT ST_IsEmpty(ST_GeomFromWKT('POLYGON EMPTY')); +``` + +## ST_Length + +### Description + +Returns the length of geom. This function only supports LineString, MultiLineString, and GeometryCollections containing linear geometries. Use ST_Perimeter for polygons. + +### Format + +`ST_Length (A: Geometry)` + +### Arguments + + * **geom**: geometry: Input geometry. + +### SQL Example + +```sql +SELECT ST_Length(ST_GeomFromWKT('LINESTRING(0 0, 10 0)')); +``` + +## ST_Perimeter + +### Description + +This function calculates the 2D perimeter of a given geometry. It supports Polygon, MultiPolygon, and GeometryCollection geometries (as long as the GeometryCollection contains polygonal geometries). For other types, it returns 0. To measure lines, use ST_Length. + +To get the perimeter in meters, set **use_spheroid** to true. This calculates the geodesic perimeter using the WGS84 spheroid. When using use_spheroid, the **lenient** parameter defaults to true, assuming the geometry uses EPSG:4326. To throw an exception instead, set lenient to false. + +### Format + +`ST_Perimeter(geom: Geometry)` +`ST_Perimeter(geom: Geometry, use_spheroid: Boolean)` +`ST_Perimeter(geom: Geometry, use_spheroid: Boolean, lenient: Boolean = True)` + +### Arguments + + * **geom**: Input geometry. + * **use_spheroid**: If true, calculates the geodesic perimeter using the WGS84 spheroid. Defaults to false. + * **lenient**: If true, assumes the geometry uses EPSG:4326 when use_spheroid is true. Defaults to true. + +### SQL Example + +```sql +SELECT ST_Perimeter(ST_GeomFromWKT('POLYGON((0 0, 10 0, 10 10, 0 10, 0 0))')); +``` + +## ST_Point + +### Description + +Construct a Point Geometry from X and Y. + +### Format + +`ST_Point (x: Double, y: Double)` + +### Arguments + + * **x**: X value. + * **y**: Y value. + +### SQL Example + +```sql +SELECT ST_AsText(ST_Point(-74.0060, 40.7128)); +``` + +Of course. Here is the documentation separated into individual functions. + +## ST_XMin + +### Description + +Returns the minimum **X-coordinate** of a geometry's bounding box. + +### Format + +`ST_XMin (A: Geometry)` + +### Arguments + + * **geom**: Input geometry. + +### SQL Example + +```sql +SELECT ST_XMin(ST_GeomFromWKT('LINESTRING(1 5, 10 15)')); +-- Returns: 1 +``` + +## ST_XMax + +### Description + +Returns the maximum **X-coordinate** of a geometry's bounding box. + +### Format + +`ST_XMax (A: Geometry)` + +### Arguments + + * **geom**: Input geometry. + +### SQL Example + +```sql +SELECT ST_XMax(ST_GeomFromWKT('LINESTRING(1 5, 10 15)')); +-- Returns: 10 +``` + +## ST_YMin + +### Description + +Returns the minimum **Y-coordinate** of a geometry's bounding box. + +### Format + +`ST_YMin (A: Geometry)` + +### Arguments + + * **geom**: Input geometry. + +### SQL Example + +```sql +SELECT ST_YMin(ST_GeomFromWKT('LINESTRING(1 5, 10 15)')); +-- Returns: 5 +``` + +## ST_YMax + +### Description + +Returns the maximum **Y-coordinate** of a geometry's bounding box. + +### Format + +`ST_YMax (A: Geometry)` + +### Arguments + + * **geom**: Input geometry. + +### SQL Example + +```sql +SELECT ST_YMax(ST_GeomFromWKT('LINESTRING(1 5, 10 15)')); +-- Returns: 15 +``` + +## ST_ZMin + +### Description + +Returns the minimum **Z-coordinate** of a geometry's bounding box. + +### Format + +`ST_ZMin (A: Geometry)` + +### Arguments + + * **geom**: Input geometry. + +### SQL Example + +```sql +SELECT ST_ZMin(ST_GeomFromWKT('LINESTRING ZM (1 2 3 4, 5 6 7 8)')); +-- Returns: 3 +``` + +## ST_ZMax + +### Description + +Returns the maximum **Z-coordinate** of a geometry's bounding box. + +### Format + +`ST_ZMax (A: Geometry)` + +### Arguments + + * **geom**: Input geometry. + +### SQL Example + +```sql +SELECT ST_ZMax(ST_GeomFromWKT('LINESTRING ZM (1 2 3 4, 5 6 7 8)')); +-- Returns: 7 +``` + +## ST_MMin + +### Description + +Returns the minimum **M-coordinate** (measure) of a geometry's bounding box. + +### Format + +`ST_MMin (A: Geometry)` + +### Arguments + + * **geom**: Input geometry. + +### SQL Example + +```sql +SELECT ST_MMin(ST_GeomFromWKT('LINESTRING ZM (1 2 3 4, 5 6 7 8)')); +-- Returns: 4 +``` + +## ST_MMax + +### Description + +Returns the maximum **M-coordinate** (measure) of a geometry's bounding box. + +### Format + +`ST_MMax (A: Geometry)` + +### Arguments + + * **geom**: Input geometry. + +### SQL Example + +```sql +SELECT ST_MMax(ST_GeomFromWKT('LINESTRING ZM (1 2 3 4, 5 6 7 8)')); +-- Returns: 8 +``` + +## ST_AsBinary + +### Description + +Return the Well-Known Binary representation of a geometry or geography. This function also has the alias **ST_AsWKB**. + +### Format + +`ST_AsBinary (A: Geometry)` + +### Arguments + + * **geom**: Input geometry or geography. + +### SQL Example + +```sql +SELECT ST_AsBinary(ST_Point(1.0, 2.0)); +``` + +## ST_Buffer + +### Description + +Returns a geometry that represents all points whose distance from the input geometry is less than or equal to a specified distance. + +### Format + +`ST_Buffer (A: Geometry, distance: Double)` + +### Arguments + + * **geom**: Input geometry. + * **distance**: Radius of the buffer. + +### SQL Example + +```sql +SELECT ST_Buffer(ST_GeomFromText('POLYGON ((10 10, 11 10, 10 11, 10 10))'), 1.0); +``` + +## ST_DWithin + +### Description + +Returns true if two geometries are within a specified distance of each other. + +### Format + +`ST_DWithin (A: Geometry, B: Geometry, distance: Double)` + +### Arguments + + * **geomA**: Input geometry or geography. + * **geomB**: Input geometry or geography. + * **distance**: Distance in units of the geometry's coordinate system. + +### SQL Example + +```sql +SELECT ST_DWithin(ST_Point(0.25, 0.25), ST_GeomFromText('POLYGON ((0 0, 1 0, 0 1, 0 0))'), 0.5); +``` + +## ST_Envelope_Aggr + +### Description + +An aggregate function that returns the collective bounding box (envelope) of a set of geometries. + +### Format + +`ST_Envelope_Aggr (geom: Geometry)` + +### Arguments + + * **geom**: A column of geometries to be aggregated. + +### SQL Example + +```sql +-- Create a table with geometries and calculate the aggregate envelope +WITH shapes(geom) AS ( + VALUES (ST_GeomFromWKT('POINT (0 1)')), + (ST_GeomFromWKT('POINT (10 11)')) +) +SELECT ST_AsText(ST_Envelope_Aggr(geom)) FROM shapes; +-- Returns: POLYGON ((0 1, 0 11, 10 11, 10 1, 0 1)) +``` + +## ST_Intersection_Aggr + +### Description + +An aggregate function that returns the geometric intersection of all geometries in a set. + +### Format + +`ST_Intersection_Aggr (geom: Geometry)` + +### Arguments + + * **geom**: A column of geometries to be aggregated. + +### SQL Example + +```sql +-- Create a table with overlapping polygons and find their common intersection +WITH shapes(geom) AS ( + VALUES (ST_GeomFromWKT('POLYGON((0 0, 2 0, 2 2, 0 2, 0 0))')), + (ST_GeomFromWKT('POLYGON((1 1, 3 1, 3 3, 1 3, 1 1))')) +) +SELECT ST_AsText(ST_Intersection_Aggr(geom)) FROM shapes; +-- Returns: POLYGON ((1 1, 1 2, 2 2, 2 1, 1 1)) +``` + +## ST_SetSRID + +### Description + +Sets the spatial reference system identifier (SRID) of a geometry. This only changes the metadata; it does not transform the coordinates. + +### Format + +`ST_SetSRID (geom: Geometry, srid: Integer)` + +### Arguments + + * **geom**: Input geometry or geography. + * **srid**: EPSG code to set (e.g., 4326). + +### SQL Example + +```sql +SELECT ST_SetSRID(ST_GeomFromWKT('POINT (-64.363049 45.091501)'), 4326); +``` + +## ST_Transform + +### Description + +Transforms the coordinates of a geometry from a source Coordinate Reference System (CRS) to a target CRS. + +If the source CRS is not specified, it will be read from the geometry's metadata. Sedona ensures that coordinates are handled in longitude/latitude order for geographic CRS transformations. + +### Format + +`ST_Transform (A: Geometry, TargetCRS: String)` +`ST_Transform (A: Geometry, SourceCRS: String, TargetCRS: String)` + +### Arguments + + * **geom**: Input geometry or geography. + * **source_crs**: The source CRS code (e.g., 'EPSG:4326'). + * **target_crs**: The target CRS code to transform into. + * **lenient**: A boolean that, if true, assumes the source is EPSG:4326 if not specified. Defaults to true. + +### SQL Example + +```sql +-- Transform a WGS84 polygon to UTM zone 49N +SELECT ST_Transform(ST_SetSRID(ST_GeomFromWkt('POLYGON((170 50,170 72,-130 72,-130 50,170 50))'), 4326), 'EPSG:32649'); +``` + +## ST_Union_Aggr + +### Description + +An aggregate function that returns the geometric union of all geometries in a set. + +### Format + +`ST_Union_Aggr (geom: Geometry)` + +### Arguments + + * **geom**: A column of geometries to be aggregated. + +### SQL Example + +```sql +-- Create a table with two separate polygons and unite them into a single multipolygon +WITH shapes(geom) AS ( + VALUES (ST_GeomFromWKT('POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))')), + (ST_GeomFromWKT('POLYGON ((2 2, 3 2, 3 3, 2 3, 2 2))')) +) +SELECT ST_AsText(ST_Union_Aggr(geom)) FROM shapes; +-- Returns: MULTIPOLYGON (((2 2, 3 2, 3 3, 2 3, 2 2)), ((0 0, 1 0, 1 1, 0 1, 0 0))) +``` diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css new file mode 100644 index 000000000..b197694e0 --- /dev/null +++ b/docs/stylesheets/extra.css @@ -0,0 +1,57 @@ +/* ========================================================================== + Global Color and Font Variables + ========================================================================== */ +:root { + --color-red: #CA463A; + --color-white: #fff; + --color-dark: #1C1C1C; + --font-inter: "Inter", + sans-serif; +} + +/* ========================================================================== + Header Styles + ========================================================================== */ + +/* Main header container (the top black bar) */ +.md-header { + box-shadow: none; + background-color: var(--color-dark); +} + +/* Inner content of the header */ +.md-header .md-header__inner { + background-color: var(--color-dark); + min-height: 80px; +} + +/* Styles for the logo image */ +.md-header .md-header__inner .header-logo img, +.md-header .md-header__inner .header-logo svg { + height: 42px; + width: auto; +} + +/* ========================================================================== + Navigation Tabs Styles + ========================================================================== */ + +/* The main navigation bar container (the red bar) */ +.md-tabs { + background-color: var(--color-red); +} + +/* This ensures the navigation links are centered */ +.md-tabs .md-tabs__list { + justify-content: center; +} + +/* Styles for each link in the navigation bar */ +.md-tabs__link { + font-family: var(--font-inter); + color: var(--color-white); + + /* You can adjust the padding here to control spacing */ + /* The first value is top/bottom, the second is left/right. */ + padding: .5rem .6rem; +} diff --git a/mkdocs.yml b/mkdocs.yml index a14c5fb0f..a39f8f722 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,29 +1,21 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. site_name: SedonaDB site_description: "Documentation for Apache SedonaDB" nav: - Home: index.md - - CLI Quickstart: quickstart-cli.md - - Python Quickstart: quickstart-python.ipynb - - Development: development.md - - API Reference: - - Python: reference/python.md - - SQL: reference/sql.md + - SedonaDB Guides: + - CLI Quickstart: quickstart-cli.md + - Python Quickstart: quickstart-python.ipynb + - Development: development.md + - SedonaDB Reference: + - Python: + Functions: reference/python.md + - SQL: + - Functions: reference/sql.md + - Spatial Joins: reference/sql-joins.md + - Read Parquet Files: reference/read-parquet-files.md + - Blog: "https://sedona.apache.org/latest/blog/" + - Apache Software Foundation: "https://sedona.apache.org/latest/asf/asf/" + - Return to Sedona Homepage: "https://sedona.apache.org/latest/" repo_url: https://github.com/apache/sedona-db repo_name: apache/sedona-db theme: @@ -32,7 +24,7 @@ theme: font: false name: 'material' palette: - primary: 'deep orange' + primary: custom accent: 'green' favicon: image/sedona_logo_symbol.png logo: image/sedona_logo_symbol_white.svg @@ -47,9 +39,19 @@ theme: - search.suggest - navigation.footer - navigation.instant + - navigation.top + - navigation.sections - navigation.tabs - navigation.tabs.sticky - - navigation.top +extra: + version: + provider: mike + default: + - 0.1 + + +extra_css: + - stylesheets/extra.css copyright: Copyright © 2025 The Apache Software Foundation. Apache Sedona, Sedona, Apache, the Apache feather logo, and the Apache Sedona project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries. All other marks mentioned may be trademarks or registered trademarks of their respective owners. Please visit Apache Software Foundation for more details. @@ -113,21 +115,23 @@ plugins: manual: false - tags - mkdocstrings: - enable_inventory: true - handlers: - python: - paths: [python/sedonadb] - inventories: - - https://docs.python.org/3/objects.inv - - https://geopandas.org/en/stable/objects.inv - - https://pandas.pydata.org/docs/objects.inv - options: - docstring_section_style: list - docstring_style: google - line_length: 80 - separate_signature: true - show_root_heading: true - show_signature_annotations: true - show_source: false - show_symbol_type_toc: true - signature_crossrefs: true + handlers: + python: + # 'inventories' is a direct setting for the Python handler + inventories: + - https://docs.python.org/3/objects.inv + - https://geopandas.org/en/stable/objects.inv + - https://pandas.pydata.org/docs/objects.inv + # All display and path options go under a SINGLE 'options' block + options: + docstring_section_style: list + docstring_style: google + line_length: 80 + separate_signature: true + show_root_heading: true + show_signature_annotations: true + show_source: false + show_symbol_type_toc: true + signature_crossrefs: true + extra: + paths: ['python'] \ No newline at end of file From 276b74ba19d132496481c90f710af4a0635dfb5e Mon Sep 17 00:00:00 2001 From: Kelly-Ann Dolor Date: Tue, 9 Sep 2025 12:25:15 -0700 Subject: [PATCH 02/25] adding Apache license --- README.md | 2 -- docs/index.md | 9 +++++++++ docs/reference/read-parquet-files.md | 20 ++++++++++++++++++++ docs/reference/sql-joins.md | 19 +++++++++++++++++++ 4 files changed, 48 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6bbb4302a..c61d0bce7 100644 --- a/README.md +++ b/README.md @@ -29,8 +29,6 @@ SedonaDB only runs on a single machine, so it’s perfect for processing smaller You can install Python SedonaDB with `pip install apache-sedona`. -You can also install Rust SedonaDB with `cargo add apache-sedona`. - ## Overture buildings example This section shows how to query the Overture buildings data. diff --git a/docs/index.md b/docs/index.md index cc64218c5..749f6f643 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,3 +1,8 @@ +--- +hide: + - toc +--- + + # Reading Parquet Files To read a Parquet file, you must use the dedicated `sd.read_parquet()` method. You cannot query a file path directly within the `sd.sql()` `FROM` clause. diff --git a/docs/reference/sql-joins.md b/docs/reference/sql-joins.md index 7c39ea1d5..1752ba272 100644 --- a/docs/reference/sql-joins.md +++ b/docs/reference/sql-joins.md @@ -1,3 +1,22 @@ + + # Spatial Joins You can perform spatial joins using standard SQL `INNER JOIN` syntax. The join condition is defined in the `ON` clause using a spatial function that specifies the relationship between the geometries of the two tables. From ea72545538e254417b20be4d8ff77cc1c298cd93 Mon Sep 17 00:00:00 2001 From: Kelly-Ann Dolor Date: Tue, 9 Sep 2025 12:29:46 -0700 Subject: [PATCH 03/25] fixing pre-commit --- docs/index.md | 16 +++++++++------- mkdocs.yml | 2 +- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/docs/index.md b/docs/index.md index 749f6f643..e8cc106a7 100644 --- a/docs/index.md +++ b/docs/index.md @@ -69,13 +69,15 @@ SedonaDB only runs on a single machine, so it’s perfect for processing smaller ## Key features -* **Blazing fast**: SedonaDB runs on a single machine, optimized for geospatial workflows. -* SedonaDB is a **dependency-free**, **small binary** that is only XX KB. -* Supports **various file formats**, including GeoJSON, Shapefile, GeoParquet, CSV, and PostGIS. -* Exposes **several language APIs,** including SQL, Python, Rust, and R. -* **Portable**: Easy to run on the command line, locally or in the cloud with AWS Sagemaker, AWS Lambda, Azure Functions, Azure Machine Learning, or Google Colab. -* **Extensible**: You can extend SedonaDB to build your own geospatial compute engine custom for your needs. -* **Open source**: Apache Sedona is an open-source project managed according to the Apache Software Foundation's guidelines. +SedonaDB has several advantages: + +* **Blazing-Fast Performance:** Built in Rust to process massive geospatial datasets with exceptional speed. +* **Unified Geospatial Toolkit:** Access a comprehensive suite of functions for both vector and raster data in a single, powerful library. +* **Seamless Ecosystem Integration:** Built on Apache Arrow for smooth interoperability with popular data science libraries like GeoPandas, DuckDB, and Polars. +* **Flexible APIs:** Effortlessly switch between Python and SQL interfaces to match your preferred workflow and skillset. +* **Guaranteed CRS Propagation:** Automatically manages coordinate reference systems (CRS) to ensure spatial accuracy and prevent common errors. +* **Broad File Format Support:** Work with a wide range of both modern and legacy geospatial file formats like geoparquet. +* **Highly Extensible:** Easily customize and extend the library's functionality to meet your project's unique requirements. ## Installation diff --git a/mkdocs.yml b/mkdocs.yml index a39f8f722..88db366dd 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -134,4 +134,4 @@ plugins: show_symbol_type_toc: true signature_crossrefs: true extra: - paths: ['python'] \ No newline at end of file + paths: ['python'] From d2719cd1436e6ebe75e4d6bc84b320d89e2d6e8b Mon Sep 17 00:00:00 2001 From: Kelly-Ann Dolor Date: Tue, 9 Sep 2025 12:53:15 -0700 Subject: [PATCH 04/25] fixing pre-commit --- docs/index.md | 2 +- mkdocs.yml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/index.md b/docs/index.md index e8cc106a7..3a398a745 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,6 +1,6 @@ --- hide: - - toc + - navigation --- - -# CLI Quickstart - -SedonaDB's command-line interface provides an interactive SQL shell that can be used to -leverage the SedonaDB engine for SQL-only/shell-centric workflows. SedonaDB's CLI is -based on the [DataFusion CLI](https://datafusion.apache.org/user-guide/cli/index.html), -whose documentation may be useful for advanced features not covered in detail here. - -## Installation - -You can install `sedona-cli` using Cargo: - -```shell -cargo install sedona-cli -``` - -## Usage - -Running `sedona-cli` from a terminal will start an interactive SQL shell. Queries must end -in a semicolon (`;`) and can be cleared with `Control-C`. - -``` -Sedona CLI v0.0.1 -> SELECT ST_Point(0, 1) as geom; -┌────────────┐ -│ geom │ -│ wkb │ -╞════════════╡ -│ POINT(0 1) │ -└────────────┘ - -1 row(s)/1 column(s) fetched. -Elapsed 0.024 seconds. -``` - -See the [SQL Reference]() for details on the SQL functions and features available to the CLI. - -## Help - -From the interactive shell, use `\?` for special command help: - -``` -> \? -Command,Description -\d,list tables -\d name,describe table -\q,quit datafusion-cli -\?,help -\h,function list -\h function,search function -\quiet (true|false)?,print or set quiet mode -\pset [NAME [VALUE]],"set table output option -(format)" -``` - -From the command line, use `--help` to list launch options and/or options for interacting -with the CLI in a non-interactive context. - -``` -Command Line Client for Sedona's DataFusion-based query engine. - -Usage: sedona-cli [OPTIONS] - -Options: - -p, --data-path Path to your data, default to current directory - -c, --command [...] Execute the given command string(s), then exit. Commands are expected to be non empty. - -f, --file [...] Execute commands from file(s), then exit - -r, --rc [...] Run the provided files on startup instead of ~/.datafusionrc - --format [default: automatic] [possible values: csv, tsv, table, json, nd-json, automatic] - -q, --quiet Reduce printing other than the results and work quietly - --maxrows The max number of rows to display for 'Table' format - [possible values: numbers(0/10/...), inf(no limit)] [default: 40] - --color Enables console syntax highlighting - -h, --help Print help - -V, --version Print version -``` diff --git a/docs/reference/quickstart-python.md b/docs/reference/quickstart-python.md new file mode 100644 index 000000000..2f59d0e32 --- /dev/null +++ b/docs/reference/quickstart-python.md @@ -0,0 +1,176 @@ + + +# Python Quickstart + +SedonaDB for Python can be installed from **PyPI**: + +```shell +pip install "apache-sedona[db]" +``` + +## Import SedonaDB + +To get started, import the library and connect to a new session. You can run SQL queries directly on the session object. + +```python +import sedona.db + +sd = sedona.db.connect() +sd.sql("SELECT ST_Point(0, 1) as geom").show() +``` + +**Output:** + +```sh +┌────────────┐ +│ geom │ +│ wkb │ +╞════════════╡ +│ POINT(0 1) │ +└────────────┘ +``` + +## Spatial Join Example + +A common use case is performing a spatial join. +In this example, we'll find the country that each city belongs to by checking if the city's point geometry intersects with a country's polygon geometry. + +### Load Datasets + +First, load the cities and countries parquet files from their URLs into SedonaDB DataFrames. + +```python +cities_url = "https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_cities_geo.parquet" +countries_url = "https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_countries_geo.parquet" + +cities = sd.read_parquet(cities_url) +countries = sd.read_parquet(countries_url) +``` + +### Register Views + +To query these DataFrames using SQL, they must be registered as temporary views in the session. + +```python +cities.to_view("cities") +countries.to_view("countries") +``` + +### Run the Join Query + +Now you can run a SQL query using `ST_Intersects` to join the two views. + +```python +# Join the cities and countries tables +sd.sql(""" + SELECT + cities.name AS city, + countries.name AS country, + countries.continent + FROM cities + JOIN countries + WHERE ST_Intersects(cities.geometry, countries.geometry) +""").show() +``` + +**Output:** + +``` +┌───────────────┬─────────────────────────────┬───────────────┐ +│ city ┆ country ┆ continent │ +│ utf8view ┆ utf8view ┆ utf8view │ +╞═══════════════╪═════════════════════════════╪═══════════════╡ +│ Suva ┆ Fiji ┆ Oceania │ +├───────────────┼─────────────────────────────┼───────────────┤ +│ Dodoma ┆ United Republic of Tanzania ┆ Africa │ +├───────────────┼─────────────────────────────┼───────────────┤ +│ Dar es Salaam ┆ United Republic of Tanzania ┆ Africa │ +├───────────────┼─────────────────────────────┼───────────────┤ +│ Bir Lehlou ┆ Western Sahara ┆ Africa │ +... +└───────────────┴─────────────────────────────┴───────────────┘ +``` + +## Creating a DataFrame Manually + +You can also create a SedonaDB DataFrame from scratch using SQL `VALUES` clauses and geometry functions like `ST_GeomFromWkt`. + +```python +df = sd.sql(""" + SELECT * FROM (VALUES + ('one', ST_GeomFromWkt('POINT(1 2)')), + ('two', ST_GeomFromWkt('POLYGON((-74.0 40.7, -74.0 40.8, -73.9 40.8, -73.9 40.7, -74.0 40.7))')), + ('three', ST_GeomFromWkt('LINESTRING(-74.0060 40.7128, -73.9352 40.7306, -73.8561 40.8484)'))) + AS t(val, point) +""") + +# Verify the object type +type(df) +``` + +**Output:** + +``` +sedonadb.dataframe.DataFrame +``` + +Once created, you can register it as a view and run further spatial operations on it. + +```python +df.to_view("fun_table") +sd.sql("SELECT *, ST_Centroid(point) AS centroid FROM fun_table").show() +``` + +**Output:** + +``` +┌───────┬─────────────────────────────────────────────┬────────────────────────────────────────────┐ +│ val ┆ point ┆ centroid │ +│ utf8 ┆ wkb ┆ wkb │ +╞═══════╪═════════════════════════════════════════════╪════════════════════════════════════════════╡ +│ one ┆ POINT(1 2) ┆ POINT(1 2) │ +├───────┼─────────────────────────────────────────────┼────────────────────────────────────────────┤ +│ two ┆ POLYGON((-74 40.7,-74 40.8,-73.9 40.8,-73.… ┆ POINT(-73.95000000000002 40.75) │ +├───────┼─────────────────────────────────────────────┼────────────────────────────────────────────┤ +│ three ┆ LINESTRING(-74.006 40.7128,-73.9352 40.730… ┆ POINT(-73.92111155675562 40.7664673976246… │ +└───────┴─────────────────────────────────────────────┴────────────────────────────────────────────┘ +``` + +## Interactive Mode + +For notebooks or interactive sessions, you can enable **interactive mode**. This eagerly prints the results of queries without requiring an explicit `.show()` call, which is useful for data exploration. + +```python +sedona.db.options.interactive = True +sd.sql("SELECT ST_Point(0, 1) as geom") +``` + +**Output:** + +``` +┌────────────┐ +│ geom │ +│ wkb │ +╞════════════╡ +│ POINT(0 1) │ +└────────────┘ +``` + +For non-interactive scripts or when working with very large datasets, it's best to leave this option `False` to avoid accidentally pulling large amounts of data. diff --git a/docs/reference/read-parquet-files.md b/docs/reference/read-parquet-files.md index 927598df6..9d9f919ca 100644 --- a/docs/reference/read-parquet-files.md +++ b/docs/reference/read-parquet-files.md @@ -32,15 +32,25 @@ The correct process is a two-step approach: 1. **Register** the DataFrame as a temporary view using `.createOrReplaceTempView()`. 1. **Query** the view using `sd.sql()`. -```python -# 1. Load the Parquet file from a URL into a DataFrame +```python linenums="1" title="Read a parquet file with SedonaDB" + +import sedona.db +sd = sedona.db.connect() + +df = sd.read_parquet( + 's3://wherobots-benchmark-prod/SpatialBench_sf=1_format=parquet/' + 'building/building.parquet' +) + +# Load the Parquet file, which creates a Pandas DataFrame df = sd.read_parquet('s3://wherobots-benchmark-prod/SpatialBench_sf=1_format=parquet/building/building.parquet') -# 2. Register the DataFrame as a temporary view named 'buildings' -df.createOrReplaceTempView('buildings') +# Convert the Pandas DataFrame to a Spark DataFrame AND +# register it as a temporary view in a single line. +spark.createDataFrame(df).createOrReplaceTempView('zone') -# 3. Now, query the view using SQL -sd.sql("SELECT * FROM buildings LIMIT 10").show() +# Now, query the view using SQL +sd.sql("SELECT * FROM zone LIMIT 10").show() ``` ### Common Errors @@ -56,6 +66,6 @@ sd.sql("SELECT * FROM 's3://wherobots-benchmark-prod/SpatialBench_sf=1_format=pa **Resulting Error:** -``` +```bash sedonadb._lib.SedonaError: Error during planning: table '...s3://...' not found ``` diff --git a/mkdocs.yml b/mkdocs.yml index 3c4af3677..7c591e0d5 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,10 +1,11 @@ site_name: SedonaDB site_description: "Documentation for Apache SedonaDB" +site_url: https://sedona.apache.org/sedonadb/ nav: - - Home: index.md + - SedonaDB: index.md - SedonaDB Guides: - - CLI Quickstart: quickstart-cli.md - Python Quickstart: quickstart-python.ipynb + - Python Quickstart (Markdown): reference/quickstart-python.md - Development: development.md - SedonaDB Reference: - Python: @@ -14,8 +15,9 @@ nav: - Spatial Joins: reference/sql-joins.md - Read Parquet Files: reference/read-parquet-files.md - Blog: "https://sedona.apache.org/latest/blog/" + - Community: "https://sedona.apache.org/latest/community/" - Apache Software Foundation: "https://sedona.apache.org/latest/asf/asf/" - - Return to Sedona Homepage: "https://sedona.apache.org/latest/" + - Sedona Homepage: "https://sedona.apache.org/latest/" repo_url: https://github.com/apache/sedona-db repo_name: apache/sedona-db theme: @@ -50,6 +52,7 @@ extra: - 0.1 + extra_css: - stylesheets/extra.css From f75ebe75906f4d1eda8f1cf930ca99e835947a15 Mon Sep 17 00:00:00 2001 From: Kelly-Ann Dolor Date: Thu, 11 Sep 2025 13:05:47 -0700 Subject: [PATCH 07/25] additional changes, removal of .ipynb --- docs/index.md | 18 ++++++++---------- docs/quickstart-python.ipynb | 4 ++-- mkdocs.yml | 1 - 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/docs/index.md b/docs/index.md index 3a398a745..0e93ab80a 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,6 +1,8 @@ --- hide: - navigation + +title: Introducing SedonaDB --- -# SedonaDB +SedonaDB is a high-performance, dependency-free geospatial compute engine designed for single-node processing, making it ideal for smaller datasets on local machines or cloud instances. -SedonaDB is a high-performance, dependency-free geospatial compute engine. +The initial `0.1` release supports a core set of vector operations, with comprehensive vector and raster computation capabilities planned for the near future. -You can easily run SedonaDB locally or in the cloud. The first release supports a core set of vector operations, but the full-suite of common vector and raster computations will be supported soon. +## Run a query in SQL, Python, or Rust -SedonaDB only runs on a single machine, so it’s perfect for processing smaller datasets. You can use SedonaSpark, SedonaFlink, or SedonaSnow for operations on larger datasets. +SedonaDB querying in SQL, Python, or Rust. === "SQL" @@ -81,7 +83,7 @@ SedonaDB has several advantages: ## Installation -Here’s how to install SedonaDB with various build tools: +Here's how to install SedonaDB with various build tools: === "pip" @@ -95,12 +97,8 @@ Here’s how to install SedonaDB with various build tools: install.packages("sedonadb", repos = "https://community.r-multiverse.org") ``` -## SedonaDB example with vector data - -TODO - ## Have questions? -Feel free to start a GitHub Discussion or join the Discord community to ask the developers any questions you may have. +Start a GitHub Discussion or join the [Discord community](https://discord.com/invite/9A3k5dEBsY) and ask the developers any questions you may have. We look forward to collaborating with you! diff --git a/docs/quickstart-python.ipynb b/docs/quickstart-python.ipynb index 99ef7c868..0323c0d9d 100644 --- a/docs/quickstart-python.ipynb +++ b/docs/quickstart-python.ipynb @@ -355,7 +355,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "3629c6e6", "metadata": {}, "outputs": [ @@ -376,7 +376,7 @@ } ], "source": [ - "sedonadb.options.interactive = True\n", + "sedona.db.options.interactive = True\n", "sd.sql(\"SELECT ST_Point(0, 1) as geom\")" ] }, diff --git a/mkdocs.yml b/mkdocs.yml index 7c591e0d5..1b36d56ec 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -4,7 +4,6 @@ site_url: https://sedona.apache.org/sedonadb/ nav: - SedonaDB: index.md - SedonaDB Guides: - - Python Quickstart: quickstart-python.ipynb - Python Quickstart (Markdown): reference/quickstart-python.md - Development: development.md - SedonaDB Reference: From e2a1a71c02685cb616a5f1db2eaed1ccd93b00d6 Mon Sep 17 00:00:00 2001 From: Kelly-Ann Dolor Date: Thu, 11 Sep 2025 20:38:56 -0700 Subject: [PATCH 08/25] continued fixes, removing interactive mode --- docs/index.md | 2 + docs/quickstart-python.ipynb | 43 ------- docs/reference/quickstart-python.md | 176 ---------------------------- mkdocs.yml | 2 +- 4 files changed, 3 insertions(+), 220 deletions(-) delete mode 100644 docs/reference/quickstart-python.md diff --git a/docs/index.md b/docs/index.md index b217bed7c..7eb6c8bdf 100644 --- a/docs/index.md +++ b/docs/index.md @@ -29,6 +29,8 @@ SedonaDB is a high-performance, dependency-free geospatial compute engine design The initial `0.1` release supports a core set of vector operations, with comprehensive vector and raster computation capabilities planned for the near future. +TODO Insert video + ## Run a query in SQL, Python, or Rust SedonaDB querying in SQL, Python, or Rust. diff --git a/docs/quickstart-python.ipynb b/docs/quickstart-python.ipynb index 0323c0d9d..bfec4b4c0 100644 --- a/docs/quickstart-python.ipynb +++ b/docs/quickstart-python.ipynb @@ -344,49 +344,6 @@ "source": [ "sd.sql(\"select *, ST_Centroid(point) as centroid from fun_table\").show()" ] - }, - { - "cell_type": "markdown", - "id": "d5244831-100e-4d8c-b5df-b796631bffc8", - "metadata": {}, - "source": [ - "## Interactive mode" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3629c6e6", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌────────────┐\n", - "│ geom │\n", - "│ wkb │\n", - "╞════════════╡\n", - "│ POINT(0 1) │\n", - "└────────────┘" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sedona.db.options.interactive = True\n", - "sd.sql(\"SELECT ST_Point(0, 1) as geom\")" - ] - }, - { - "cell_type": "markdown", - "id": "fc34cc93", - "metadata": {}, - "source": [ - "Most SedonaDB Python users will want to turn on interactive mode when developing code in a notebook or interactive session. Interactive mode prints results eagerly, which is usually what you want when interacting with a new data source or constructing a query. When interacting with large remote data sources or non-interactive workloads, this is usually *not* what you want; however, you can use an explicit `.show()` to force executing enough of a query to show the first few rows." - ] } ], "metadata": { diff --git a/docs/reference/quickstart-python.md b/docs/reference/quickstart-python.md deleted file mode 100644 index 2f59d0e32..000000000 --- a/docs/reference/quickstart-python.md +++ /dev/null @@ -1,176 +0,0 @@ - - -# Python Quickstart - -SedonaDB for Python can be installed from **PyPI**: - -```shell -pip install "apache-sedona[db]" -``` - -## Import SedonaDB - -To get started, import the library and connect to a new session. You can run SQL queries directly on the session object. - -```python -import sedona.db - -sd = sedona.db.connect() -sd.sql("SELECT ST_Point(0, 1) as geom").show() -``` - -**Output:** - -```sh -┌────────────┐ -│ geom │ -│ wkb │ -╞════════════╡ -│ POINT(0 1) │ -└────────────┘ -``` - -## Spatial Join Example - -A common use case is performing a spatial join. -In this example, we'll find the country that each city belongs to by checking if the city's point geometry intersects with a country's polygon geometry. - -### Load Datasets - -First, load the cities and countries parquet files from their URLs into SedonaDB DataFrames. - -```python -cities_url = "https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_cities_geo.parquet" -countries_url = "https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_countries_geo.parquet" - -cities = sd.read_parquet(cities_url) -countries = sd.read_parquet(countries_url) -``` - -### Register Views - -To query these DataFrames using SQL, they must be registered as temporary views in the session. - -```python -cities.to_view("cities") -countries.to_view("countries") -``` - -### Run the Join Query - -Now you can run a SQL query using `ST_Intersects` to join the two views. - -```python -# Join the cities and countries tables -sd.sql(""" - SELECT - cities.name AS city, - countries.name AS country, - countries.continent - FROM cities - JOIN countries - WHERE ST_Intersects(cities.geometry, countries.geometry) -""").show() -``` - -**Output:** - -``` -┌───────────────┬─────────────────────────────┬───────────────┐ -│ city ┆ country ┆ continent │ -│ utf8view ┆ utf8view ┆ utf8view │ -╞═══════════════╪═════════════════════════════╪═══════════════╡ -│ Suva ┆ Fiji ┆ Oceania │ -├───────────────┼─────────────────────────────┼───────────────┤ -│ Dodoma ┆ United Republic of Tanzania ┆ Africa │ -├───────────────┼─────────────────────────────┼───────────────┤ -│ Dar es Salaam ┆ United Republic of Tanzania ┆ Africa │ -├───────────────┼─────────────────────────────┼───────────────┤ -│ Bir Lehlou ┆ Western Sahara ┆ Africa │ -... -└───────────────┴─────────────────────────────┴───────────────┘ -``` - -## Creating a DataFrame Manually - -You can also create a SedonaDB DataFrame from scratch using SQL `VALUES` clauses and geometry functions like `ST_GeomFromWkt`. - -```python -df = sd.sql(""" - SELECT * FROM (VALUES - ('one', ST_GeomFromWkt('POINT(1 2)')), - ('two', ST_GeomFromWkt('POLYGON((-74.0 40.7, -74.0 40.8, -73.9 40.8, -73.9 40.7, -74.0 40.7))')), - ('three', ST_GeomFromWkt('LINESTRING(-74.0060 40.7128, -73.9352 40.7306, -73.8561 40.8484)'))) - AS t(val, point) -""") - -# Verify the object type -type(df) -``` - -**Output:** - -``` -sedonadb.dataframe.DataFrame -``` - -Once created, you can register it as a view and run further spatial operations on it. - -```python -df.to_view("fun_table") -sd.sql("SELECT *, ST_Centroid(point) AS centroid FROM fun_table").show() -``` - -**Output:** - -``` -┌───────┬─────────────────────────────────────────────┬────────────────────────────────────────────┐ -│ val ┆ point ┆ centroid │ -│ utf8 ┆ wkb ┆ wkb │ -╞═══════╪═════════════════════════════════════════════╪════════════════════════════════════════════╡ -│ one ┆ POINT(1 2) ┆ POINT(1 2) │ -├───────┼─────────────────────────────────────────────┼────────────────────────────────────────────┤ -│ two ┆ POLYGON((-74 40.7,-74 40.8,-73.9 40.8,-73.… ┆ POINT(-73.95000000000002 40.75) │ -├───────┼─────────────────────────────────────────────┼────────────────────────────────────────────┤ -│ three ┆ LINESTRING(-74.006 40.7128,-73.9352 40.730… ┆ POINT(-73.92111155675562 40.7664673976246… │ -└───────┴─────────────────────────────────────────────┴────────────────────────────────────────────┘ -``` - -## Interactive Mode - -For notebooks or interactive sessions, you can enable **interactive mode**. This eagerly prints the results of queries without requiring an explicit `.show()` call, which is useful for data exploration. - -```python -sedona.db.options.interactive = True -sd.sql("SELECT ST_Point(0, 1) as geom") -``` - -**Output:** - -``` -┌────────────┐ -│ geom │ -│ wkb │ -╞════════════╡ -│ POINT(0 1) │ -└────────────┘ -``` - -For non-interactive scripts or when working with very large datasets, it's best to leave this option `False` to avoid accidentally pulling large amounts of data. diff --git a/mkdocs.yml b/mkdocs.yml index eed58785f..e46f80428 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -4,7 +4,7 @@ site_url: https://sedona.apache.org/sedonadb/ nav: - SedonaDB: index.md - SedonaDB Guides: - - Python Quickstart: reference/quickstart-python.md + - Python Quickstart quickstart-python.md - Development: development.md - SedonaDB Reference: - Python: From bece09448a8a73ef11f45a2c41f71d7c1ee15c55 Mon Sep 17 00:00:00 2001 From: Kelly-Ann Dolor Date: Thu, 11 Sep 2025 21:43:19 -0700 Subject: [PATCH 09/25] changing introduction --- docs/index.md | 14 ++++++-------- mkdocs.yml | 4 ++-- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/docs/index.md b/docs/index.md index 7eb6c8bdf..8475a65af 100644 --- a/docs/index.md +++ b/docs/index.md @@ -3,8 +3,7 @@ hide: - navigation title: Introducing SedonaDB -======= - +--- + +# Python Quickstart + +SedonaDB for Python can be installed from **PyPI**: + +```shell +pip install "apache-sedona[db]" +``` + +## Import SedonaDB + +To get started, import the library and connect to a new session. You can run SQL queries directly on the session object. + +```python +import sedona.db + +sd = sedona.db.connect() +sd.sql("SELECT ST_Point(0, 1) as geom").show() +``` + +**Output:** + +```sh +┌────────────┐ +│ geom │ +│ wkb │ +╞════════════╡ +│ POINT(0 1) │ +└────────────┘ +``` + +## Spatial Join Example + +A common use case is performing a spatial join. +In this example, we'll find the country that each city belongs to by checking if the city's point geometry intersects with a country's polygon geometry. + +### Load Datasets + +First, load the cities and countries parquet files from their URLs into SedonaDB DataFrames. + +```python +cities_url = "https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_cities_geo.parquet" +countries_url = "https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_countries_geo.parquet" + +cities = sd.read_parquet(cities_url) +countries = sd.read_parquet(countries_url) +``` + +### Register Views + +To query these DataFrames using SQL, they must be registered as temporary views in the session. + +```python +cities.to_view("cities") +countries.to_view("countries") +``` + +### Run the Join Query + +Now you can run a SQL query using `ST_Intersects` to join the two views. + +```python +# Join the cities and countries tables +sd.sql(""" + SELECT + cities.name AS city, + countries.name AS country, + countries.continent + FROM cities + JOIN countries + WHERE ST_Intersects(cities.geometry, countries.geometry) +""").show() +``` + +**Output:** + +``` +┌───────────────┬─────────────────────────────┬───────────────┐ +│ city ┆ country ┆ continent │ +│ utf8view ┆ utf8view ┆ utf8view │ +╞═══════════════╪═════════════════════════════╪═══════════════╡ +│ Suva ┆ Fiji ┆ Oceania │ +├───────────────┼─────────────────────────────┼───────────────┤ +│ Dodoma ┆ United Republic of Tanzania ┆ Africa │ +├───────────────┼─────────────────────────────┼───────────────┤ +│ Dar es Salaam ┆ United Republic of Tanzania ┆ Africa │ +├───────────────┼─────────────────────────────┼───────────────┤ +│ Bir Lehlou ┆ Western Sahara ┆ Africa │ +... +└───────────────┴─────────────────────────────┴───────────────┘ +``` + +## Creating a DataFrame Manually + +You can also create a SedonaDB DataFrame from scratch using SQL `VALUES` clauses and geometry functions like `ST_GeomFromWkt`. + +```python +df = sd.sql(""" + SELECT * FROM (VALUES + ('one', ST_GeomFromWkt('POINT(1 2)')), + ('two', ST_GeomFromWkt('POLYGON((-74.0 40.7, -74.0 40.8, -73.9 40.8, -73.9 40.7, -74.0 40.7))')), + ('three', ST_GeomFromWkt('LINESTRING(-74.0060 40.7128, -73.9352 40.7306, -73.8561 40.8484)'))) + AS t(val, point) +""") + +# Verify the object type +type(df) +``` + +**Output:** + +``` +sedonadb.dataframe.DataFrame +``` + +Once created, you can register it as a view and run further spatial operations on it. + +```python +df.to_view("fun_table") +sd.sql("SELECT *, ST_Centroid(point) AS centroid FROM fun_table").show() +``` + +**Output:** + +``` +┌───────┬─────────────────────────────────────────────┬────────────────────────────────────────────┐ +│ val ┆ point ┆ centroid │ +│ utf8 ┆ wkb ┆ wkb │ +╞═══════╪═════════════════════════════════════════════╪════════════════════════════════════════════╡ +│ one ┆ POINT(1 2) ┆ POINT(1 2) │ +├───────┼─────────────────────────────────────────────┼────────────────────────────────────────────┤ +│ two ┆ POLYGON((-74 40.7,-74 40.8,-73.9 40.8,-73.… ┆ POINT(-73.95000000000002 40.75) │ +├───────┼─────────────────────────────────────────────┼────────────────────────────────────────────┤ +│ three ┆ LINESTRING(-74.006 40.7128,-73.9352 40.730… ┆ POINT(-73.92111155675562 40.7664673976246… │ +└───────┴─────────────────────────────────────────────┴────────────────────────────────────────────┘ +``` From c40f423df067b430ff7dc4a4ea02f5b917d22425 Mon Sep 17 00:00:00 2001 From: Kelly-Ann Dolor Date: Fri, 12 Sep 2025 11:05:38 -0700 Subject: [PATCH 11/25] Adding link to downloadable ipynb --- docs/quickstart-python.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/quickstart-python.md b/docs/quickstart-python.md index 2e4479c14..1ff5c1627 100644 --- a/docs/quickstart-python.md +++ b/docs/quickstart-python.md @@ -19,11 +19,13 @@ # Python Quickstart -SedonaDB for Python can be installed from **PyPI**: +SedonaDB for Python can be installed from PyPI: ```shell pip install "apache-sedona[db]" ``` +!!!tip "Run this tutorial as in interactive notebook" + You can also download the `.ipynb` version of this file from the [SedonaDB GitHub](https://github.com/apache/sedona-db/blob/main/docs/quickstart-python.ipynb)and run it as an interactive notebook. ## Import SedonaDB From 67c307785c67d5bdec5f3b287e3a14221790902f Mon Sep 17 00:00:00 2001 From: Kelly-Ann Dolor Date: Tue, 16 Sep 2025 10:19:57 -0700 Subject: [PATCH 12/25] Finish applying stashed changes and resolve conflict --- docs/geopandas-interop.ipynb | 6 +++--- docs/programming-guide.ipynb | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/docs/geopandas-interop.ipynb b/docs/geopandas-interop.ipynb index ce111166e..cef23d963 100644 --- a/docs/geopandas-interop.ipynb +++ b/docs/geopandas-interop.ipynb @@ -14,15 +14,15 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "0434bead-2628-4844-a3f6-2f9c15a21899", "metadata": {}, "outputs": [], "source": [ - "import sedonadb\n", + "import sedona.db\n", "import geopandas as gpd\n", "\n", - "sd = sedonadb.connect()" + "sd = sedona.db.connect()" ] }, { diff --git a/docs/programming-guide.ipynb b/docs/programming-guide.ipynb index 392fdbd93..ba460fdc5 100644 --- a/docs/programming-guide.ipynb +++ b/docs/programming-guide.ipynb @@ -20,14 +20,24 @@ }, { "cell_type": "code", +<<<<<<< HEAD "execution_count": 2, +======= + "execution_count": null, +>>>>>>> 33072a3 (Finish applying stashed changes and resolve conflict) "id": "53c3b7a8-c42a-407a-a454-6ee1e943fbcc", "metadata": {}, "outputs": [], "source": [ +<<<<<<< HEAD "import sedonadb\n", "\n", "sd = sedonadb.connect()" +======= + "import sedona.db\n", + "\n", + "sd = sedona.db.connect()\n" +>>>>>>> 33072a3 (Finish applying stashed changes and resolve conflict) ] }, { @@ -246,7 +256,11 @@ "source": [ "df = sd.sql(\"\"\"\n", "SELECT name, ST_Point(lng, lat) AS location\n", +<<<<<<< HEAD "FROM (VALUES \n", +======= + "FROM (VALUES\n", +>>>>>>> 33072a3 (Finish applying stashed changes and resolve conflict) " (101, -74.0, 40.7, 'Pizza Palace'),\n", " (102, -73.99, 40.69, 'Burger Barn'),\n", " (103, -74.02, 40.72, 'Taco Town'),\n", @@ -259,7 +273,11 @@ "\n", "df = sd.sql(\"\"\"\n", "SELECT name, ST_Point(lng, lat) AS location\n", +<<<<<<< HEAD "FROM (VALUES \n", +======= + "FROM (VALUES\n", +>>>>>>> 33072a3 (Finish applying stashed changes and resolve conflict) " (1, -74.0, 40.7, 'Alice'),\n", " (2, -73.9, 40.8, 'Bob'),\n", " (3, -74.1, 40.6, 'Carol')\n", From 2590c990ccec80cfccfcde8905a37bf6da6ee387 Mon Sep 17 00:00:00 2001 From: Kelly-Ann Dolor Date: Tue, 16 Sep 2025 10:20:15 -0700 Subject: [PATCH 13/25] Finish applying stashed changes and resolve conflict --- docs/programming-guide.ipynb | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/docs/programming-guide.ipynb b/docs/programming-guide.ipynb index ba460fdc5..7183dc6ba 100644 --- a/docs/programming-guide.ipynb +++ b/docs/programming-guide.ipynb @@ -20,24 +20,14 @@ }, { "cell_type": "code", -<<<<<<< HEAD "execution_count": 2, -======= - "execution_count": null, ->>>>>>> 33072a3 (Finish applying stashed changes and resolve conflict) "id": "53c3b7a8-c42a-407a-a454-6ee1e943fbcc", "metadata": {}, "outputs": [], "source": [ -<<<<<<< HEAD - "import sedonadb\n", - "\n", - "sd = sedonadb.connect()" -======= "import sedona.db\n", "\n", "sd = sedona.db.connect()\n" ->>>>>>> 33072a3 (Finish applying stashed changes and resolve conflict) ] }, { @@ -51,7 +41,7 @@ "\n", "**Manually creating SedonaDB DataFrame**\n", "\n", - "Here’s how to manually create a SedonaDB DataFrame:" + "Here's how to manually create a SedonaDB DataFrame:" ] }, { From 02fa7931f5ba184aea6750df98a8524d81ec8cc4 Mon Sep 17 00:00:00 2001 From: Kelly-Ann Dolor Date: Tue, 16 Sep 2025 11:00:08 -0700 Subject: [PATCH 14/25] fixing --- docs/programming-guide.ipynb | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/docs/programming-guide.ipynb b/docs/programming-guide.ipynb index 7183dc6ba..220e022da 100644 --- a/docs/programming-guide.ipynb +++ b/docs/programming-guide.ipynb @@ -27,7 +27,7 @@ "source": [ "import sedona.db\n", "\n", - "sd = sedona.db.connect()\n" + "sd = sedona.db.connect()" ] }, { @@ -246,11 +246,7 @@ "source": [ "df = sd.sql(\"\"\"\n", "SELECT name, ST_Point(lng, lat) AS location\n", -<<<<<<< HEAD - "FROM (VALUES \n", -======= "FROM (VALUES\n", ->>>>>>> 33072a3 (Finish applying stashed changes and resolve conflict) " (101, -74.0, 40.7, 'Pizza Palace'),\n", " (102, -73.99, 40.69, 'Burger Barn'),\n", " (103, -74.02, 40.72, 'Taco Town'),\n", @@ -263,12 +259,8 @@ "\n", "df = sd.sql(\"\"\"\n", "SELECT name, ST_Point(lng, lat) AS location\n", -<<<<<<< HEAD - "FROM (VALUES \n", -======= "FROM (VALUES\n", ->>>>>>> 33072a3 (Finish applying stashed changes and resolve conflict) - " (1, -74.0, 40.7, 'Alice'),\n", + " (1, -74.0, 40.7, 'Ali ce'),\n", " (2, -73.9, 40.8, 'Bob'),\n", " (3, -74.1, 40.6, 'Carol')\n", ") AS t(id, lng, lat, name)\n", From 7136a7d22018dd57d8ed0001e3ad34025ffcf5ea Mon Sep 17 00:00:00 2001 From: Kelly-Ann Dolor Date: Tue, 16 Sep 2025 11:55:44 -0700 Subject: [PATCH 15/25] fix notebook --- docs/programming-guide.ipynb | 36 +++++++++++++++++++++--------------- docs/stylesheets/extra.css | 24 ++++++++++++------------ 2 files changed, 33 insertions(+), 27 deletions(-) diff --git a/docs/programming-guide.ipynb b/docs/programming-guide.ipynb index 220e022da..042beac66 100644 --- a/docs/programming-guide.ipynb +++ b/docs/programming-guide.ipynb @@ -11,11 +11,11 @@ "\n", "You will learn how to create SedonaDB DataFrames, run spatial queries, and perform I/O operations with various types of files.\n", "\n", - "Let’s start by establishing a SedonaDB connection.\n", + "Let's start by establishing a SedonaDB connection.\n", "\n", "## Establish SedonaDB connection\n", "\n", - "Here’s how to create the SedonaDB connection:" + "Here's how to create the SedonaDB connection:" ] }, { @@ -35,7 +35,7 @@ "id": "7aeaa60f-2325-418c-8e72-4344bd4a75fe", "metadata": {}, "source": [ - "Now let’s see how to create SedonaDB DataFrames.\n", + "Now, let's see how to create SedonaDB dataframes.\n", "\n", "## Create SedonaDB DataFrame\n", "\n", @@ -95,7 +95,7 @@ "source": [ "**Create SedonaDB DataFrame from files in S3**\n", "\n", - "For most production applications, you will create SedonaDB DataFrames by reading data from a file. Let’s see how to read GeoParquet files in AWS S3 into a SedonaDB DataFrame." + "For most production applications, you will create SedonaDB DataFrames by reading data from a file. Let's see how to read GeoParquet files in AWS S3 into a SedonaDB DataFrame." ] }, { @@ -116,7 +116,7 @@ "id": "858fcc66-816d-4c71-8875-82b74169eccd", "metadata": {}, "source": [ - "Let’s now run some spatial queries.\n", + "Now, let's run some spatial queries.\n", "\n", "**Read from GeoPandas DataFrame**\n", "\n", @@ -181,11 +181,11 @@ "source": [ "## Spatial queries\n", "\n", - "Let’s see how to run spatial operations like filtering, joins, and clustering algorithms.\n", + "Let's see how to run spatial operations like filtering, joins, and clustering algorithms.\n", "\n", - "***Spatial filtering***\n", + "**Spatial filtering**\n", "\n", - "Let’s run a spatial filtering operation to fetch all the objects in the following polygon:" + "Let's run a spatial filtering operation to fetch all the objects in the following polygon:" ] }, { @@ -232,7 +232,7 @@ "source": [ "You can see it only includes the divisions in the Nova Scotia area. Skip to the visualization section to see how this data can be graphed on a map.\n", "\n", - "***K-nearest neighbors (KNN) joins***\n", + "**K-nearest neighbors (KNN) joins**\n", "\n", "Create `restaurants` and `customers` tables so we can demonstrate the KNN join functionality." ] @@ -349,17 +349,23 @@ "id": "2e93fe6a-b0a7-4ec0-952c-dde9edcacdc4", "metadata": {}, "source": [ - "Notice how each customer has two rows - one for each of the two closest restaurants.\n", - "\n", - "## Files\n", + "Notice how each customer has two rows - one for each of the two closest restaurants." + ] + }, + { + "cell_type": "markdown", + "id": "3cb1e53b", + "metadata": {}, + "source": [ + "## GeoParquet support\n", "\n", - "You can read GeoParquet files with SedonaDB, see the following example:\n", + "You can also read GeoParquet files with SedonaDB with `read_parquet()`\n", "\n", "```python\n", - "df = sd.read_parquet(\"some_file.parquet\")\n", + "df = sd.read_parquet(\"DATA_FILE.parquet\")\n", "```\n", "\n", - "Once you read the file, you can easily expose it as a view and query it with spatial SQL, as we demonstrated in the example above." + "Once you read the file, you can easily expose it as a view and query it with spatial SQL, as we demonstrated in the example above.\n" ] } ], diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css index b197694e0..2ed787430 100644 --- a/docs/stylesheets/extra.css +++ b/docs/stylesheets/extra.css @@ -32,26 +32,26 @@ width: auto; } -/* ========================================================================== - Navigation Tabs Styles - ========================================================================== */ +/* --- Refined Navigation Tabs Styling --- */ -/* The main navigation bar container (the red bar) */ +/* 1. Set the background color for the main navigation bar */ .md-tabs { - background-color: var(--color-red); + background-color: var(--color-red); /* Using your original variable for the red color */ } -/* This ensures the navigation links are centered */ +/* 2. Center the list of navigation links inside the bar */ .md-tabs .md-tabs__list { justify-content: center; } -/* Styles for each link in the navigation bar */ +/* 3. Style the individual navigation links */ .md-tabs__link { - font-family: var(--font-inter); - color: var(--color-white); + /* Use a normal font weight, not bold */ + font-weight: 400; + + /* Ensure the text color is slightly soft white */ + color: rgba(255, 255, 255, 0.85); - /* You can adjust the padding here to control spacing */ - /* The first value is top/bottom, the second is left/right. */ - padding: .5rem .6rem; + /* Adjusted spacing for the links: slightly less horizontal padding */ + padding: 0.8rem 0.9rem; /* Reduced from 1.3rem to 0.9rem */ } From b10b081a1d1ad91c113668efe06749f19b60715e Mon Sep 17 00:00:00 2001 From: Kelly-Ann Dolor Date: Tue, 16 Sep 2025 11:58:57 -0700 Subject: [PATCH 16/25] Update docs/index.md Co-authored-by: Dewey Dunnington --- docs/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.md b/docs/index.md index 8475a65af..f7060a3a8 100644 --- a/docs/index.md +++ b/docs/index.md @@ -32,7 +32,7 @@ The initial `0.1` release supports a core set of vector operations, with compreh SedonaDB offers a flexible query interface in SQL, Python, or Rust. -Engineered for speed, SedonaDB provides blazing-fast geospatial processing on a single machine. This makes it perfect for the rapid analysis of smaller datasets, whether you're working locally or on a cloud server. While the initial release focuses on core vector operations, a full suite of vector and raster computations is on the roadmap. +Engineered for speed, SedonaDB provides performant geospatial processing on a single machine. This makes it perfect for the rapid analysis of smaller datasets, whether you're working locally or on a cloud server. While the initial release focuses on core vector operations, a full suite of vector and raster computations is on the roadmap. For massive, distributed workloads, you can leverage the power of SedonaSpark, SedonaFlink, or SedonaSnow. From 00961d13c98615b2d0938b4ca88ce319a3e59f59 Mon Sep 17 00:00:00 2001 From: Kelly-Ann Dolor Date: Tue, 16 Sep 2025 12:58:50 -0700 Subject: [PATCH 17/25] fixing styling --- docs/index.md | 26 +++++++++++++------------- docs/stylesheets/extra.css | 24 ++++++++++++------------ 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/docs/index.md b/docs/index.md index f7060a3a8..fc1e07ac3 100644 --- a/docs/index.md +++ b/docs/index.md @@ -74,19 +74,7 @@ SedonaFlink, or SedonaSnow. sd_sql("SELECT ST_Point(0, 1) as geom") ``` -## Key features - -SedonaDB has several advantages: - -* **Blazing-Fast Performance:** Built in Rust to process massive geospatial datasets with exceptional speed. -* **Unified Geospatial Toolkit:** Access a comprehensive suite of functions for both vector and raster data in a single, powerful library. -* **Seamless Ecosystem Integration:** Built on Apache Arrow for smooth interoperability with popular data science libraries like GeoPandas, DuckDB, and Polars. -* **Flexible APIs:** Effortlessly switch between Python and SQL interfaces to match your preferred workflow and skillset. -* **Guaranteed CRS Propagation:** Automatically manages coordinate reference systems (CRS) to ensure spatial accuracy and prevent common errors. -* **Broad File Format Support:** Work with a wide range of both modern and legacy geospatial file formats like geoparquet. -* **Highly Extensible:** Easily customize and extend the library's functionality to meet your project's unique requirements. - -## Installation +## Install SedonaDB Here's how to install SedonaDB with various build tools: @@ -102,6 +90,18 @@ Here's how to install SedonaDB with various build tools: install.packages("sedonadb", repos = "https://community.r-multiverse.org") ``` +## Key features + +SedonaDB has several advantages: + +* **Blazing-Fast Performance:** Built in Rust to process massive geospatial datasets with exceptional speed. +* **Unified Geospatial Toolkit:** Access a comprehensive suite of functions for both vector and raster data in a single, powerful library. +* **Seamless Ecosystem Integration:** Built on Apache Arrow for smooth interoperability with popular data science libraries like GeoPandas, DuckDB, and Polars. +* **Flexible APIs:** Effortlessly switch between Python and SQL interfaces to match your preferred workflow and skillset. +* **Guaranteed CRS Propagation:** Automatically manages coordinate reference systems (CRS) to ensure spatial accuracy and prevent common errors. +* **Broad File Format Support:** Work with a wide range of both modern and legacy geospatial file formats like geoparquet. +* **Highly Extensible:** Easily customize and extend the library's functionality to meet your project's unique requirements. + ## Have questions? Start a [GitHub Discussion ](https://github.com/apache/sedona-db/issues)or join the [Discord community](https://discord.com/invite/9A3k5dEBsY) and ask the developers any questions you may have. diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css index 2ed787430..b75b5e526 100644 --- a/docs/stylesheets/extra.css +++ b/docs/stylesheets/extra.css @@ -32,26 +32,26 @@ width: auto; } -/* --- Refined Navigation Tabs Styling --- */ +/* --- Definitive Navigation CSS (Final Version) --- */ -/* 1. Set the background color for the main navigation bar */ +/* 1. Set the height of the main navigation bar */ .md-tabs { - background-color: var(--color-red); /* Using your original variable for the red color */ + background-color: var(--color-red); + height: 2.5rem; /* Set an explicit, predictable height for the bar */ } -/* 2. Center the list of navigation links inside the bar */ +/* 2. Control the alignment of the links within the bar */ .md-tabs .md-tabs__list { - justify-content: center; + height: 100%; /* Make the link container fill the bar's height */ + justify-content: center; /* Center links horizontally */ + align-items: center; /* NEW: Center links vertically */ + flex-wrap: wrap; /* Allow wrapping on small screens */ } /* 3. Style the individual navigation links */ .md-tabs__link { - /* Use a normal font weight, not bold */ font-weight: 400; - - /* Ensure the text color is slightly soft white */ color: rgba(255, 255, 255, 0.85); - - /* Adjusted spacing for the links: slightly less horizontal padding */ - padding: 0.8rem 0.9rem; /* Reduced from 1.3rem to 0.9rem */ -} + /* We no longer need vertical padding for spacing */ + padding: 0 0.9rem; +} \ No newline at end of file From 1f9e34248c7872b7949a3c5443f0a8e018ba9b31 Mon Sep 17 00:00:00 2001 From: Kelly-Ann Dolor Date: Tue, 16 Sep 2025 13:43:41 -0700 Subject: [PATCH 18/25] adding lint to check for pre-commit, adding navigation styling, adding conversion to pre-commit --- .github/workflows/lint.yml | 11 +++++++++++ .pre-commit-config.yaml | 13 +++++++++++++ docs/stylesheets/extra.css | 1 + 3 files changed, 25 insertions(+) create mode 100644 .github/workflows/lint.yml diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 000000000..997eb07c2 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,11 @@ +name: Lint and Test + +on: [push, pull_request] + +jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + - uses: pre-commit/action@v3.0.1 \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fefea8bb6..22a5e7fdd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -57,3 +57,16 @@ repos: types_or: [c, c++] # Don't run on vendored files exclude: "^c/(sedona-geoarrow-c/src/geoarrow|sedona-geoarrow-c/src/nanoarrow|sedona-tg/src/tg)/.*" + + - repo: local + hooks: + - id: jupyter-nbconvert + name: Convert Jupyter Notebook to Markdown + # IMPORTANT: Change the path below to your actual notebook file + entry: jupyter nbconvert --to markdown --execute docs/quickstart-python.ipynb + language: system + types: [jupyter] + # This ensures the hook only runs when this specific notebook changes + files: ^docs/quickstart-python.ipynb$ + # Always run this hook at the commit stage + stages: [commit] \ No newline at end of file diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css index b75b5e526..38d4d13ec 100644 --- a/docs/stylesheets/extra.css +++ b/docs/stylesheets/extra.css @@ -54,4 +54,5 @@ color: rgba(255, 255, 255, 0.85); /* We no longer need vertical padding for spacing */ padding: 0 0.9rem; + font-size: 0.65rem; /* NEW: Adjust font size */ } \ No newline at end of file From 6b9a819af8ff6676aa92dded465b50d6ecc2c848 Mon Sep 17 00:00:00 2001 From: Kelly-Ann Dolor Date: Tue, 16 Sep 2025 13:59:53 -0700 Subject: [PATCH 19/25] fixing notebook command --- .github/workflows/lint.yml | 2 +- .pre-commit-config.yaml | 2 +- docs/index.md | 2 +- docs/quickstart-python.ipynb | 75 +++++++++++------ docs/quickstart-python.md | 156 ----------------------------------- docs/stylesheets/extra.css | 2 +- 6 files changed, 54 insertions(+), 185 deletions(-) delete mode 100644 docs/quickstart-python.md diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 997eb07c2..4f1ab3c0c 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -8,4 +8,4 @@ jobs: steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 - - uses: pre-commit/action@v3.0.1 \ No newline at end of file + - uses: pre-commit/action@v3.0.1 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 22a5e7fdd..76253a3ef 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -69,4 +69,4 @@ repos: # This ensures the hook only runs when this specific notebook changes files: ^docs/quickstart-python.ipynb$ # Always run this hook at the commit stage - stages: [commit] \ No newline at end of file + stages: [pre-commit] diff --git a/docs/index.md b/docs/index.md index fc1e07ac3..920f4ee4e 100644 --- a/docs/index.md +++ b/docs/index.md @@ -104,6 +104,6 @@ SedonaDB has several advantages: ## Have questions? -Start a [GitHub Discussion ](https://github.com/apache/sedona-db/issues)or join the [Discord community](https://discord.com/invite/9A3k5dEBsY) and ask the developers any questions you may have. +Start a [GitHub Discussion](https://github.com/apache/sedona-db/issues)or join the [Discord community](https://discord.com/invite/9A3k5dEBsY) and ask the developers any questions you may have. We look forward to collaborating with you! diff --git a/docs/quickstart-python.ipynb b/docs/quickstart-python.ipynb index bfec4b4c0..931b35cfd 100644 --- a/docs/quickstart-python.ipynb +++ b/docs/quickstart-python.ipynb @@ -10,7 +10,7 @@ "SedonaDB for Python can be installed from [PyPI](https://pypi.org):\n", "\n", "```shell\n", - "pip install apache-sedona[db]\n", + "pip install \"apache-sedona[db]\"\n", "```\n", "\n", "If you can import the module and connect to a new session, you're good to go!" @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "a06df76b", "metadata": {}, "outputs": [ @@ -28,7 +28,7 @@ "text": [ "┌────────────┐\n", "│ geom │\n", - "│ wkb │\n", + "│ geometry │\n", "╞════════════╡\n", "│ POINT(0 1) │\n", "└────────────┘\n" @@ -74,7 +74,7 @@ "text": [ "┌──────────────┬───────────────────────────────┐\n", "│ name ┆ geometry │\n", - "│ utf8view ┆ wkb_view │\n", + "│ utf8view ┆ geometry │\n", "╞══════════════╪═══════════════════════════════╡\n", "│ Vatican City ┆ POINT(12.4533865 41.9032822) │\n", "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", @@ -127,7 +127,7 @@ "text": [ "┌─────────────────────────────┬───────────────┬────────────────────────────────────────────────────┐\n", "│ name ┆ continent ┆ geometry │\n", - "│ utf8view ┆ utf8view ┆ wkb_view │\n", + "│ utf8view ┆ utf8view ┆ geometry │\n", "╞═════════════════════════════╪═══════════════╪════════════════════════════════════════════════════╡\n", "│ Fiji ┆ Oceania ┆ MULTIPOLYGON(((180 -16.067132663642447,180 -16.55… │\n", "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", @@ -179,7 +179,7 @@ "text": [ "┌───────────────┬──────────────────────┬─────────────────────┬───────────────┬─────────────────────┐\n", "│ name ┆ geometry ┆ name ┆ continent ┆ geometry │\n", - "│ utf8view ┆ wkb_view ┆ utf8view ┆ utf8view ┆ wkb_view - -# Python Quickstart - -SedonaDB for Python can be installed from PyPI: - -```shell -pip install "apache-sedona[db]" -``` -!!!tip "Run this tutorial as in interactive notebook" - You can also download the `.ipynb` version of this file from the [SedonaDB GitHub](https://github.com/apache/sedona-db/blob/main/docs/quickstart-python.ipynb)and run it as an interactive notebook. - -## Import SedonaDB - -To get started, import the library and connect to a new session. You can run SQL queries directly on the session object. - -```python -import sedona.db - -sd = sedona.db.connect() -sd.sql("SELECT ST_Point(0, 1) as geom").show() -``` - -**Output:** - -```sh -┌────────────┐ -│ geom │ -│ wkb │ -╞════════════╡ -│ POINT(0 1) │ -└────────────┘ -``` - -## Spatial Join Example - -A common use case is performing a spatial join. -In this example, we'll find the country that each city belongs to by checking if the city's point geometry intersects with a country's polygon geometry. - -### Load Datasets - -First, load the cities and countries parquet files from their URLs into SedonaDB DataFrames. - -```python -cities_url = "https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_cities_geo.parquet" -countries_url = "https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_countries_geo.parquet" - -cities = sd.read_parquet(cities_url) -countries = sd.read_parquet(countries_url) -``` - -### Register Views - -To query these DataFrames using SQL, they must be registered as temporary views in the session. - -```python -cities.to_view("cities") -countries.to_view("countries") -``` - -### Run the Join Query - -Now you can run a SQL query using `ST_Intersects` to join the two views. - -```python -# Join the cities and countries tables -sd.sql(""" - SELECT - cities.name AS city, - countries.name AS country, - countries.continent - FROM cities - JOIN countries - WHERE ST_Intersects(cities.geometry, countries.geometry) -""").show() -``` - -**Output:** - -``` -┌───────────────┬─────────────────────────────┬───────────────┐ -│ city ┆ country ┆ continent │ -│ utf8view ┆ utf8view ┆ utf8view │ -╞═══════════════╪═════════════════════════════╪═══════════════╡ -│ Suva ┆ Fiji ┆ Oceania │ -├───────────────┼─────────────────────────────┼───────────────┤ -│ Dodoma ┆ United Republic of Tanzania ┆ Africa │ -├───────────────┼─────────────────────────────┼───────────────┤ -│ Dar es Salaam ┆ United Republic of Tanzania ┆ Africa │ -├───────────────┼─────────────────────────────┼───────────────┤ -│ Bir Lehlou ┆ Western Sahara ┆ Africa │ -... -└───────────────┴─────────────────────────────┴───────────────┘ -``` - -## Creating a DataFrame Manually - -You can also create a SedonaDB DataFrame from scratch using SQL `VALUES` clauses and geometry functions like `ST_GeomFromWkt`. - -```python -df = sd.sql(""" - SELECT * FROM (VALUES - ('one', ST_GeomFromWkt('POINT(1 2)')), - ('two', ST_GeomFromWkt('POLYGON((-74.0 40.7, -74.0 40.8, -73.9 40.8, -73.9 40.7, -74.0 40.7))')), - ('three', ST_GeomFromWkt('LINESTRING(-74.0060 40.7128, -73.9352 40.7306, -73.8561 40.8484)'))) - AS t(val, point) -""") - -# Verify the object type -type(df) -``` - -**Output:** - -``` -sedonadb.dataframe.DataFrame -``` - -Once created, you can register it as a view and run further spatial operations on it. - -```python -df.to_view("fun_table") -sd.sql("SELECT *, ST_Centroid(point) AS centroid FROM fun_table").show() -``` - -**Output:** - -``` -┌───────┬─────────────────────────────────────────────┬────────────────────────────────────────────┐ -│ val ┆ point ┆ centroid │ -│ utf8 ┆ wkb ┆ wkb │ -╞═══════╪═════════════════════════════════════════════╪════════════════════════════════════════════╡ -│ one ┆ POINT(1 2) ┆ POINT(1 2) │ -├───────┼─────────────────────────────────────────────┼────────────────────────────────────────────┤ -│ two ┆ POLYGON((-74 40.7,-74 40.8,-73.9 40.8,-73.… ┆ POINT(-73.95000000000002 40.75) │ -├───────┼─────────────────────────────────────────────┼────────────────────────────────────────────┤ -│ three ┆ LINESTRING(-74.006 40.7128,-73.9352 40.730… ┆ POINT(-73.92111155675562 40.7664673976246… │ -└───────┴─────────────────────────────────────────────┴────────────────────────────────────────────┘ -``` diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css index 38d4d13ec..a7ad72235 100644 --- a/docs/stylesheets/extra.css +++ b/docs/stylesheets/extra.css @@ -55,4 +55,4 @@ /* We no longer need vertical padding for spacing */ padding: 0 0.9rem; font-size: 0.65rem; /* NEW: Adjust font size */ -} \ No newline at end of file +} From f67701215a526cd88f01219c9a5498b3d194cce5 Mon Sep 17 00:00:00 2001 From: Kelly-Ann Dolor Date: Tue, 16 Sep 2025 14:08:19 -0700 Subject: [PATCH 20/25] adding file --- docs/quickstart-python.md | 217 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 217 insertions(+) create mode 100644 docs/quickstart-python.md diff --git a/docs/quickstart-python.md b/docs/quickstart-python.md new file mode 100644 index 000000000..975069cc6 --- /dev/null +++ b/docs/quickstart-python.md @@ -0,0 +1,217 @@ +# Python Quickstart + +SedonaDB for Python can be installed from [PyPI](https://pypi.org): + +```shell +pip install "apache-sedona[db]" +``` + +If you can import the module and connect to a new session, you're good to go! + + +```python +import sedona.db + +sd = sedona.db.connect() +sd.sql("SELECT ST_Point(0, 1) as geom").show() +``` + + ┌────────────┐ + │ geom │ + │ geometry │ + ╞════════════╡ + │ POINT(0 1) │ + └────────────┘ + +## Point in polygon join + +```python +cities = sd.read_parquet( + "https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_cities_geo.parquet" +) +``` + +```python +cities.show() +``` + + ┌──────────────┬───────────────────────────────┐ + │ name ┆ geometry │ + │ utf8view ┆ geometry │ + ╞══════════════╪═══════════════════════════════╡ + │ Vatican City ┆ POINT(12.4533865 41.9032822) │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ San Marino ┆ POINT(12.4417702 43.9360958) │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ Vaduz ┆ POINT(9.5166695 47.1337238) │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ Lobamba ┆ POINT(31.1999971 -26.4666675) │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ Luxembourg ┆ POINT(6.1300028 49.6116604) │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ Palikir ┆ POINT(158.1499743 6.9166437) │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ Majuro ┆ POINT(171.3800002 7.1030043) │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ Funafuti ┆ POINT(179.2166471 -8.516652) │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ Melekeok ┆ POINT(134.6265485 7.4873962) │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ Bir Lehlou ┆ POINT(-9.6525222 26.1191667) │ + └──────────────┴───────────────────────────────┘ + +```python +countries = sd.read_parquet( + "https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_countries_geo.parquet" +) +``` + +```python +countries.show() +``` + + ┌─────────────────────────────┬───────────────┬────────────────────────────────────────────────────┐ + │ name ┆ continent ┆ geometry │ + │ utf8view ┆ utf8view ┆ geometry │ + ╞═════════════════════════════╪═══════════════╪════════════════════════════════════════════════════╡ + │ Fiji ┆ Oceania ┆ MULTIPOLYGON(((180 -16.067132663642447,180 -16.55… │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ United Republic of Tanzania ┆ Africa ┆ POLYGON((33.90371119710453 -0.9500000000000001,34… │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ Western Sahara ┆ Africa ┆ POLYGON((-8.665589565454809 27.656425889592356,-8… │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ Canada ┆ North America ┆ MULTIPOLYGON(((-122.84000000000003 49.00000000000… │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ United States of America ┆ North America ┆ MULTIPOLYGON(((-122.84000000000003 49.00000000000… │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ Kazakhstan ┆ Asia ┆ POLYGON((87.35997033076265 49.21498078062912,86.5… │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ Uzbekistan ┆ Asia ┆ POLYGON((55.96819135928291 41.30864166926936,55.9… │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ Papua New Guinea ┆ Oceania ┆ MULTIPOLYGON(((141.00021040259185 -2.600151055515… │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ Indonesia ┆ Asia ┆ MULTIPOLYGON(((141.00021040259185 -2.600151055515… │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ Argentina ┆ South America ┆ MULTIPOLYGON(((-68.63401022758323 -52.63637045887… │ + └─────────────────────────────┴───────────────┴────────────────────────────────────────────────────┘ + +```python +cities.to_view("cities") +countries.to_view("countries") +``` + +```python +# join the cities and countries tables +sd.sql(""" +select * from cities +join countries +where ST_Intersects(cities.geometry, countries.geometry) +""").show() +``` + + ┌───────────────┬──────────────────────┬─────────────────────┬───────────────┬─────────────────────┐ + │ name ┆ geometry ┆ name ┆ continent ┆ geometry │ + │ utf8view ┆ geometry ┆ utf8view ┆ utf8view ┆ geometry │ + ╞═══════════════╪══════════════════════╪═════════════════════╪═══════════════╪═════════════════════╡ + │ Suva ┆ POINT(178.4417073 -… ┆ Fiji ┆ Oceania ┆ MULTIPOLYGON(((180… │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ Dodoma ┆ POINT(35.7500036 -6… ┆ United Republic of… ┆ Africa ┆ POLYGON((33.903711… │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ Dar es Salaam ┆ POINT(39.266396 -6.… ┆ United Republic of… ┆ Africa ┆ POLYGON((33.903711… │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ Bir Lehlou ┆ POINT(-9.6525222 26… ┆ Western Sahara ┆ Africa ┆ POLYGON((-8.665589… │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ Ottawa ┆ POINT(-75.7019612 4… ┆ Canada ┆ North America ┆ MULTIPOLYGON(((-12… │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ Vancouver ┆ POINT(-123.1235901 … ┆ Canada ┆ North America ┆ MULTIPOLYGON(((-12… │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ Toronto ┆ POINT(-79.389458554… ┆ Canada ┆ North America ┆ MULTIPOLYGON(((-12… │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ San Francisco ┆ POINT(-122.39959956… ┆ United States of A… ┆ North America ┆ MULTIPOLYGON(((-12… │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ Denver ┆ POINT(-104.9859618 … ┆ United States of A… ┆ North America ┆ MULTIPOLYGON(((-12… │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ Houston ┆ POINT(-95.348436256… ┆ United States of A… ┆ North America ┆ MULTIPOLYGON(((-12… │ + └───────────────┴──────────────────────┴─────────────────────┴───────────────┴─────────────────────┘ + +## Manually create SedonaDB DataFrames + +Let's create a DataFrame with one string column and one geometry column to show some of the functionality of the SedonaDB Python interface. + +```python +df = sd.sql(""" +SELECT * FROM (VALUES + ('one', ST_GeomFromWkt('POINT(1 2)')), + ('two', ST_GeomFromWkt('POLYGON((-74.0 40.7, -74.0 40.8, -73.9 40.8, -73.9 40.7, -74.0 40.7))')), + ('three', ST_GeomFromWkt('LINESTRING(-74.0060 40.7128, -73.9352 40.7306, -73.8561 40.8484)'))) +AS t(val, point)""") +``` + +```python +df.show() +``` + + ┌───────┬──────────────────────────────────────────────────────────────────────────────────────────┐ + │ val ┆ point │ + │ utf8 ┆ binary │ + ╞═══════╪══════════════════════════════════════════════════════════════════════════════════════════╡ + │ one ┆ 0101000000000000000000f03f0000000000000040 │ + ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ two ┆ 0103000000010000000500000000000000008052c09a9999999959444000000000008052c06666666666664… │ + ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ three ┆ 010200000003000000aaf1d24d628052c05e4bc8073d5b444007ce1951da7b52c0933a014d845d4440c286a… │ + └───────┴──────────────────────────────────────────────────────────────────────────────────────────┘ + + +Verify that this object is a SedonaDB DataFrame. + + +```python +type(df) +``` + + + + + sedonadb.dataframe.DataFrame + + + +Expose the DataFrame as a view and run a SQL operation on the geometry data. + + +```python +df.to_view("fun_table") +``` + + +```python +sd.sql("DESCRIBE fun_table").show() +``` + + ┌─────────────┬───────────┬─────────────┐ + │ column_name ┆ data_type ┆ is_nullable │ + │ utf8 ┆ utf8 ┆ utf8 │ + ╞═════════════╪═══════════╪═════════════╡ + │ val ┆ Utf8 ┆ YES │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ point ┆ Binary ┆ YES │ + └─────────────┴───────────┴─────────────┘ + + + +```python +sd.sql("SELECT *, ST_Centroid(ST_GeomFromWKB(point)) as centroid from fun_table").show() +``` + + ┌───────┬─────────────────────────────────────────────┬────────────────────────────────────────────┐ + │ val ┆ point ┆ centroid │ + │ utf8 ┆ binary ┆ geometry │ + ╞═══════╪═════════════════════════════════════════════╪════════════════════════════════════════════╡ + │ one ┆ 0101000000000000000000f03f0000000000000040 ┆ POINT(1 2) │ + ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ two ┆ 0103000000010000000500000000000000008052c0… ┆ POINT(-73.95 40.75) │ + ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ three ┆ 010200000003000000aaf1d24d628052c05e4bc807… ┆ POINT(-73.92111155675562 40.7664673976246… │ + └───────┴─────────────────────────────────────────────┴────────────────────────────────────────────┘ + From 5cbbd6f2cd57c2cac170a171748261957de5b3b3 Mon Sep 17 00:00:00 2001 From: Kelly-Ann Dolor Date: Tue, 16 Sep 2025 15:30:14 -0700 Subject: [PATCH 21/25] adding generated file from notebook --- docs/index.md | 60 ++++++++++++++++++++++++++++++++------- docs/quickstart-python.md | 12 ++++++++ mkdocs.yml | 1 - 3 files changed, 62 insertions(+), 11 deletions(-) diff --git a/docs/index.md b/docs/index.md index 920f4ee4e..85ea8a9e2 100644 --- a/docs/index.md +++ b/docs/index.md @@ -28,6 +28,18 @@ SedonaDB is a high-performance, dependency-free geospatial compute engine design The initial `0.1` release supports a core set of vector operations, with comprehensive vector and raster computation capabilities planned for the near future. +## Key features + +SedonaDB has several advantages: + +* **Blazing-Fast Performance:** Built in Rust to process massive geospatial datasets with exceptional speed. +* **Unified Geospatial Toolkit:** Access a comprehensive suite of functions for both vector and raster data in a single, powerful library. +* **Seamless Ecosystem Integration:** Built on Apache Arrow for smooth interoperability with popular data science libraries like GeoPandas, DuckDB, and Polars. +* **Flexible APIs:** Effortlessly switch between Python and SQL interfaces to match your preferred workflow and skill set. +* **Guaranteed CRS Propagation:** Automatically manages coordinate reference systems (CRS) to ensure spatial accuracy and prevent common errors. +* **Broad File Format Support:** Work with a wide range of both modern and legacy geospatial file formats like geoparquet. +* **Highly Extensible:** Easily customize and extend the library's functionality to meet your project's unique requirements. + ## Run a query in SQL, Python, or Rust SedonaDB offers a flexible query interface in SQL, Python, or Rust. @@ -90,20 +102,48 @@ Here's how to install SedonaDB with various build tools: install.packages("sedonadb", repos = "https://community.r-multiverse.org") ``` -## Key features +## Install SedonaDB CLI -SedonaDB has several advantages: +The SedonaDB command-line interface (CLI) is an interactive SQL shell for data analysis. For advanced usage, see the [DataFusion CLI docs](https://datafusion.apache.org/user-guide/cli/index.html). -* **Blazing-Fast Performance:** Built in Rust to process massive geospatial datasets with exceptional speed. -* **Unified Geospatial Toolkit:** Access a comprehensive suite of functions for both vector and raster data in a single, powerful library. -* **Seamless Ecosystem Integration:** Built on Apache Arrow for smooth interoperability with popular data science libraries like GeoPandas, DuckDB, and Polars. -* **Flexible APIs:** Effortlessly switch between Python and SQL interfaces to match your preferred workflow and skillset. -* **Guaranteed CRS Propagation:** Automatically manages coordinate reference systems (CRS) to ensure spatial accuracy and prevent common errors. -* **Broad File Format Support:** Work with a wide range of both modern and legacy geospatial file formats like geoparquet. -* **Highly Extensible:** Easily customize and extend the library's functionality to meet your project's unique requirements. +Install via Cargo: + +```shell +cargo install sedona-cli +``` + +### Usage + +Start the interactive shell by running `sedona-cli` in your terminal. All SQL queries must end with a semicolon (`;`). + +```shell +> sedona-cli +Sedona CLI v0.0.1 +``` + +```shell +> SELECT ST_Point(0, 1) as geom; + +┌────────────┐ +│ geom │ +│ wkb │ +╞════════════╡ +│ POINT(0 1) │ +└────────────┘ +1 row(s)/1 column(s) fetched. +Elapsed 0.024 seconds. + +``` + +For a full list of supported SQL functions, see the [SQL Reference](https://sedona.apache.org/latest/api/sql/Overview/). + +### Help + +* **Interactive Shell:** Use `\?` inside the shell to see special commands like `\d` (list tables) or `\q` (quit). +* **Command Line:** Use `sedona-cli --help` in your terminal to view launch options, such as setting a data path (`-p`) or executing a command (`-c`). ## Have questions? -Start a [GitHub Discussion](https://github.com/apache/sedona-db/issues)or join the [Discord community](https://discord.com/invite/9A3k5dEBsY) and ask the developers any questions you may have. +Start a [GitHub Discussion](https://github.com/apache/sedona-db/issues) or join the [Discord community](https://discord.com/invite/9A3k5dEBsY) and ask the developers any questions you may have. We look forward to collaborating with you! diff --git a/docs/quickstart-python.md b/docs/quickstart-python.md index 975069cc6..254535c62 100644 --- a/docs/quickstart-python.md +++ b/docs/quickstart-python.md @@ -23,14 +23,17 @@ sd.sql("SELECT ST_Point(0, 1) as geom").show() │ POINT(0 1) │ └────────────┘ + ## Point in polygon join + ```python cities = sd.read_parquet( "https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_cities_geo.parquet" ) ``` + ```python cities.show() ``` @@ -60,12 +63,15 @@ cities.show() │ Bir Lehlou ┆ POINT(-9.6525222 26.1191667) │ └──────────────┴───────────────────────────────┘ + + ```python countries = sd.read_parquet( "https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_countries_geo.parquet" ) ``` + ```python countries.show() ``` @@ -95,11 +101,14 @@ countries.show() │ Argentina ┆ South America ┆ MULTIPOLYGON(((-68.63401022758323 -52.63637045887… │ └─────────────────────────────┴───────────────┴────────────────────────────────────────────────────┘ + + ```python cities.to_view("cities") countries.to_view("countries") ``` + ```python # join the cities and countries tables sd.sql(""" @@ -134,10 +143,12 @@ where ST_Intersects(cities.geometry, countries.geometry) │ Houston ┆ POINT(-95.348436256… ┆ United States of A… ┆ North America ┆ MULTIPOLYGON(((-12… │ └───────────────┴──────────────────────┴─────────────────────┴───────────────┴─────────────────────┘ + ## Manually create SedonaDB DataFrames Let's create a DataFrame with one string column and one geometry column to show some of the functionality of the SedonaDB Python interface. + ```python df = sd.sql(""" SELECT * FROM (VALUES @@ -147,6 +158,7 @@ SELECT * FROM (VALUES AS t(val, point)""") ``` + ```python df.show() ``` diff --git a/mkdocs.yml b/mkdocs.yml index b920833f5..f8f4d66ab 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -6,7 +6,6 @@ nav: - SedonaDB Guides: - Python Quickstart: quickstart-python.md - SedonaDB Guide: programming-guide.ipynb - - CLI Quickstart: quickstart-cli.md - Development: development.md - SedonaDB Reference: - Python: From 9d59ae444b052af93061e3157efaf2a8b725855d Mon Sep 17 00:00:00 2001 From: Kelly-Ann Dolor Date: Tue, 16 Sep 2025 15:43:40 -0700 Subject: [PATCH 22/25] adding to requirements.txt --- docs/requirements.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/requirements.txt b/docs/requirements.txt index a6c075f2c..7b12c371b 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,3 +1,4 @@ +jupyter mike mkdocs-git-revision-date-localized-plugin mkdocs-glightbox @@ -5,5 +6,7 @@ mkdocs-jupyter mkdocs-macros-plugin mkdocs-material mkdocstrings[python] +nbconvert ruff +pre-commit pyproj From 9c7d825c73f1ce9c89a82e02f9f1b31258fc06e6 Mon Sep 17 00:00:00 2001 From: Kelly-Ann Dolor Date: Tue, 16 Sep 2025 15:58:22 -0700 Subject: [PATCH 23/25] changing lint file --- .github/workflows/lint.yml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 4f1ab3c0c..32c15672f 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -6,6 +6,11 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - - uses: pre-commit/action@v3.0.1 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + - name: Install dependencies + run: pip install -r requirements.txt + + # The pre-commit/action is replaced with this manual command + - name: Run pre-commit checks + run: pre-commit run --all-files From 10c011a6435e43d05f4788dc8b13a6290ad982ef Mon Sep 17 00:00:00 2001 From: Kelly-Ann Dolor Date: Tue, 16 Sep 2025 21:18:25 -0700 Subject: [PATCH 24/25] integrating feedback --- .github/workflows/lint.yml | 16 -------------- .pre-commit-config.yaml | 13 ----------- docs/index.md | 42 +----------------------------------- docs/programming-guide.ipynb | 4 ++-- docs/quickstart-python.md | 1 - docs/requirements.txt | 1 - 6 files changed, 3 insertions(+), 74 deletions(-) delete mode 100644 .github/workflows/lint.yml diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml deleted file mode 100644 index 32c15672f..000000000 --- a/.github/workflows/lint.yml +++ /dev/null @@ -1,16 +0,0 @@ -name: Lint and Test - -on: [push, pull_request] - -jobs: - pre-commit: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - - name: Install dependencies - run: pip install -r requirements.txt - - # The pre-commit/action is replaced with this manual command - - name: Run pre-commit checks - run: pre-commit run --all-files diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 76253a3ef..fefea8bb6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -57,16 +57,3 @@ repos: types_or: [c, c++] # Don't run on vendored files exclude: "^c/(sedona-geoarrow-c/src/geoarrow|sedona-geoarrow-c/src/nanoarrow|sedona-tg/src/tg)/.*" - - - repo: local - hooks: - - id: jupyter-nbconvert - name: Convert Jupyter Notebook to Markdown - # IMPORTANT: Change the path below to your actual notebook file - entry: jupyter nbconvert --to markdown --execute docs/quickstart-python.ipynb - language: system - types: [jupyter] - # This ensures the hook only runs when this specific notebook changes - files: ^docs/quickstart-python.ipynb$ - # Always run this hook at the commit stage - stages: [pre-commit] diff --git a/docs/index.md b/docs/index.md index 85ea8a9e2..45b2119b2 100644 --- a/docs/index.md +++ b/docs/index.md @@ -32,7 +32,7 @@ The initial `0.1` release supports a core set of vector operations, with compreh SedonaDB has several advantages: -* **Blazing-Fast Performance:** Built in Rust to process massive geospatial datasets with exceptional speed. +* **Exceptional Performance:** Built in Rust to process massive geospatial datasets with exceptional speed. * **Unified Geospatial Toolkit:** Access a comprehensive suite of functions for both vector and raster data in a single, powerful library. * **Seamless Ecosystem Integration:** Built on Apache Arrow for smooth interoperability with popular data science libraries like GeoPandas, DuckDB, and Polars. * **Flexible APIs:** Effortlessly switch between Python and SQL interfaces to match your preferred workflow and skill set. @@ -102,46 +102,6 @@ Here's how to install SedonaDB with various build tools: install.packages("sedonadb", repos = "https://community.r-multiverse.org") ``` -## Install SedonaDB CLI - -The SedonaDB command-line interface (CLI) is an interactive SQL shell for data analysis. For advanced usage, see the [DataFusion CLI docs](https://datafusion.apache.org/user-guide/cli/index.html). - -Install via Cargo: - -```shell -cargo install sedona-cli -``` - -### Usage - -Start the interactive shell by running `sedona-cli` in your terminal. All SQL queries must end with a semicolon (`;`). - -```shell -> sedona-cli -Sedona CLI v0.0.1 -``` - -```shell -> SELECT ST_Point(0, 1) as geom; - -┌────────────┐ -│ geom │ -│ wkb │ -╞════════════╡ -│ POINT(0 1) │ -└────────────┘ -1 row(s)/1 column(s) fetched. -Elapsed 0.024 seconds. - -``` - -For a full list of supported SQL functions, see the [SQL Reference](https://sedona.apache.org/latest/api/sql/Overview/). - -### Help - -* **Interactive Shell:** Use `\?` inside the shell to see special commands like `\d` (list tables) or `\q` (quit). -* **Command Line:** Use `sedona-cli --help` in your terminal to view launch options, such as setting a data path (`-p`) or executing a command (`-c`). - ## Have questions? Start a [GitHub Discussion](https://github.com/apache/sedona-db/issues) or join the [Discord community](https://discord.com/invite/9A3k5dEBsY) and ask the developers any questions you may have. diff --git a/docs/programming-guide.ipynb b/docs/programming-guide.ipynb index 042beac66..93c7208d2 100644 --- a/docs/programming-guide.ipynb +++ b/docs/programming-guide.ipynb @@ -239,7 +239,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "deaa36db-2fee-4ba2-ab79-1dc756cb1655", "metadata": {}, "outputs": [], @@ -260,7 +260,7 @@ "df = sd.sql(\"\"\"\n", "SELECT name, ST_Point(lng, lat) AS location\n", "FROM (VALUES\n", - " (1, -74.0, 40.7, 'Ali ce'),\n", + " (1, -74.0, 40.7, 'Alice'),\n", " (2, -73.9, 40.8, 'Bob'),\n", " (3, -74.1, 40.6, 'Carol')\n", ") AS t(id, lng, lat, name)\n", diff --git a/docs/quickstart-python.md b/docs/quickstart-python.md index 254535c62..836d2bde0 100644 --- a/docs/quickstart-python.md +++ b/docs/quickstart-python.md @@ -226,4 +226,3 @@ sd.sql("SELECT *, ST_Centroid(ST_GeomFromWKB(point)) as centroid from fun_table" ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ three ┆ 010200000003000000aaf1d24d628052c05e4bc807… ┆ POINT(-73.92111155675562 40.7664673976246… │ └───────┴─────────────────────────────────────────────┴────────────────────────────────────────────┘ - diff --git a/docs/requirements.txt b/docs/requirements.txt index 7b12c371b..f6a1590e3 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -8,5 +8,4 @@ mkdocs-material mkdocstrings[python] nbconvert ruff -pre-commit pyproj From 1e9954538f060c07afcff57b7a467daacf280adb Mon Sep 17 00:00:00 2001 From: Kelly-Ann Dolor Date: Tue, 16 Sep 2025 21:22:48 -0700 Subject: [PATCH 25/25] changing lint file --- docs/reference/read-parquet-files.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/reference/read-parquet-files.md b/docs/reference/read-parquet-files.md index 9d9f919ca..6dc4836e4 100644 --- a/docs/reference/read-parquet-files.md +++ b/docs/reference/read-parquet-files.md @@ -28,8 +28,8 @@ The `sd.sql()` function is designed to query tables that have already been regis The correct process is a two-step approach: -1. **Load** the Parquet file into a DataFrame using `sd.read_parquet()`. -1. **Register** the DataFrame as a temporary view using `.createOrReplaceTempView()`. +1. **Load** the Parquet file into a data frame using `sd.read_parquet()`. +1. **Register** the data frame view with `to_view()`. 1. **Query** the view using `sd.sql()`. ```python linenums="1" title="Read a parquet file with SedonaDB" @@ -42,12 +42,12 @@ df = sd.read_parquet( 'building/building.parquet' ) -# Load the Parquet file, which creates a Pandas DataFrame +# Load the Parquet file, which creates a Pandas data frame df = sd.read_parquet('s3://wherobots-benchmark-prod/SpatialBench_sf=1_format=parquet/building/building.parquet') -# Convert the Pandas DataFrame to a Spark DataFrame AND +# Convert the Pandas data frame to a Spark data frame AND # register it as a temporary view in a single line. -spark.createDataFrame(df).createOrReplaceTempView('zone') +spark.createDataFrame(df).to_view("zone") # Now, query the view using SQL sd.sql("SELECT * FROM zone LIMIT 10").show()