Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ import org.json4s.{DefaultFormats, Extraction, JField, JNothing, JNull, JObject,
* @param geometryTypes
* The geometry types of all geometries, or an empty array if they are not known.
* @param bbox
* Bounding Box of the geometries in the file, formatted according to RFC 7946, section 5.
* Bounding Box of the geometries in the file, formatted according to RFC 7946, section 5. None
* if the file contains no geometries (per the GeoParquet 1.1 spec, bbox is optional and should
* be omitted when there is no extent to describe).
* @param crs
* The CRS of the geometries in the file. None if crs metadata is absent, Some(JNull) if crs is
* null, Some(value) if the crs is present and not null.
Expand All @@ -44,7 +46,7 @@ import org.json4s.{DefaultFormats, Extraction, JField, JNothing, JNull, JObject,
case class GeometryFieldMetaData(
encoding: String,
geometryTypes: Seq[String],
bbox: Seq[Double],
bbox: Option[Seq[Double]],
crs: Option[JValue] = None,
covering: Option[Covering] = None)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ object GeoParquetSpatialFilter {
extends GeoParquetSpatialFilter {
def evaluate(columns: Map[String, GeometryFieldMetaData]): Boolean = {
columns.get(columnName).forall { column =>
val bbox = column.bbox
val bbox = column.bbox.getOrElse(return true)
if (bbox.isEmpty) {
return true
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -245,13 +245,19 @@ class GeoParquetWriteSupport extends WriteSupport[InternalRow] with Logging {
val columns = geometryColumnInfoMap.map { case (ordinal, columnInfo) =>
val columnName = schema.fields(ordinal).name
val geometryTypes = columnInfo.seenGeometryTypes.toSeq
// Omit bbox from column metadata when no geometries were observed (e.g. an empty
// Spark partition produces a zero-row file). Per the GeoParquet 1.1 spec, bbox is
// optional and represents the extent of the geometries in the file; emitting
// [0, 0, 0, 0] for an empty file falsely advertises data at Null Island and breaks
// bbox-based file pruning in downstream readers.
val bbox = if (geometryTypes.nonEmpty) {
Seq(
columnInfo.bbox.minX,
columnInfo.bbox.minY,
columnInfo.bbox.maxX,
columnInfo.bbox.maxY)
} else Seq(0.0, 0.0, 0.0, 0.0)
Some(
Seq(
columnInfo.bbox.minX,
columnInfo.bbox.minY,
columnInfo.bbox.maxX,
columnInfo.bbox.maxY))
} else None
val crs = geoParquetColumnCrsMap.getOrElse(
columnName, {
if (!userExplicitlySetDefaultCrs) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ case class StacBatch(
val geometryFieldMetaData = GeometryFieldMetaData(
encoding = "WKB",
geometryTypes = geometryTypes,
bbox = bbox,
bbox = Some(bbox),
crs = None,
covering = None)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -252,9 +252,12 @@ class geoparquetIOTests extends TestBaseScala with BeforeAndAfterAll {
validateGeoParquetMetadata(geoParquetSavePath) { geo =>
implicit val formats: org.json4s.Formats = org.json4s.DefaultFormats
val g0Types = (geo \ "columns" \ "g" \ "geometry_types").extract[Seq[String]]
val g0BBox = (geo \ "columns" \ "g" \ "bbox").extract[Seq[Double]]
assert(g0Types.isEmpty)
assert(g0BBox == Seq(0.0, 0.0, 0.0, 0.0))
// Per the GeoParquet spec, bbox is optional and represents the extent of the geometries
// in the file; for a file with no geometries we omit it entirely rather than emit a
// bogus [0, 0, 0, 0] (which would falsely advertise data at Null Island and break
// bbox-based file pruning in downstream readers). See issue #2880.
assert((geo \ "columns" \ "g" \ "bbox") == org.json4s.JNothing)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ object GeoParquetMetadataPartitionReaderFactory {
val columnMetadataFields: Array[Any] = Array(
UTF8String.fromString(columnMetadata.encoding),
new GenericArrayData(columnMetadata.geometryTypes.map(UTF8String.fromString).toArray),
new GenericArrayData(columnMetadata.bbox.toArray),
columnMetadata.bbox.map(b => new GenericArrayData(b.toArray)).orNull,
columnMetadata.crs
.map(projjson => UTF8String.fromString(compact(render(projjson))))
.getOrElse(UTF8String.fromString("")),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ object GeoParquetMetadataPartitionReaderFactory {
val columnMetadataFields: Array[Any] = Array(
UTF8String.fromString(columnMetadata.encoding),
new GenericArrayData(columnMetadata.geometryTypes.map(UTF8String.fromString).toArray),
new GenericArrayData(columnMetadata.bbox.toArray),
columnMetadata.bbox.map(b => new GenericArrayData(b.toArray)).orNull,
columnMetadata.crs
.map(projjson => UTF8String.fromString(compact(render(projjson))))
.getOrElse(UTF8String.fromString("")),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ object GeoParquetMetadataPartitionReaderFactory {
val columnMetadataFields: Array[Any] = Array(
UTF8String.fromString(columnMetadata.encoding),
new GenericArrayData(columnMetadata.geometryTypes.map(UTF8String.fromString).toArray),
new GenericArrayData(columnMetadata.bbox.toArray),
columnMetadata.bbox.map(b => new GenericArrayData(b.toArray)).orNull,
columnMetadata.crs
.map(projjson => UTF8String.fromString(compact(render(projjson))))
.getOrElse(UTF8String.fromString("")),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ object GeoParquetMetadataPartitionReaderFactory {
val columnMetadataFields: Array[Any] = Array(
UTF8String.fromString(columnMetadata.encoding),
new GenericArrayData(columnMetadata.geometryTypes.map(UTF8String.fromString).toArray),
new GenericArrayData(columnMetadata.bbox.toArray),
columnMetadata.bbox.map(b => new GenericArrayData(b.toArray)).orNull,
columnMetadata.crs
.map(projjson => UTF8String.fromString(compact(render(projjson))))
.getOrElse(UTF8String.fromString("")),
Expand Down
Loading