From c9a95882dc1af92787f749d8108e8a07de0852d3 Mon Sep 17 00:00:00 2001 From: Emilio Lahr-Vivaz Date: Mon, 18 May 2026 10:07:03 -0400 Subject: [PATCH] GEOMESA-3582 FSDS - Support optimized stats count and min/max queries --- .../geomesa/fs/data/FileSystemDataStore.scala | 13 ++- .../fs/data/stats/FileSystemStats.scala | 102 ++++++++++++++++++ .../fs/data/FileSystemDataStoreTest.scala | 9 ++ .../index/geotools/GeoMesaFeatureSource.scala | 2 +- .../index/geotools/GeoMesaFeatureStore.scala | 2 +- .../geomesa/index/geotools/package.scala | 2 +- 6 files changed, 125 insertions(+), 5 deletions(-) create mode 100644 geomesa-fs/geomesa-fs-datastore/src/main/scala/org/locationtech/geomesa/fs/data/stats/FileSystemStats.scala diff --git a/geomesa-fs/geomesa-fs-datastore/src/main/scala/org/locationtech/geomesa/fs/data/FileSystemDataStore.scala b/geomesa-fs/geomesa-fs-datastore/src/main/scala/org/locationtech/geomesa/fs/data/FileSystemDataStore.scala index 62187ef5c784..4f70503b356b 100644 --- a/geomesa-fs/geomesa-fs-datastore/src/main/scala/org/locationtech/geomesa/fs/data/FileSystemDataStore.scala +++ b/geomesa-fs/geomesa-fs-datastore/src/main/scala/org/locationtech/geomesa/fs/data/FileSystemDataStore.scala @@ -16,8 +16,8 @@ import org.geotools.api.feature.simple.SimpleFeatureType import org.geotools.data.store.{ContentDataStore, ContentEntry, ContentFeatureSource} import org.geotools.feature.NameImpl import org.locationtech.geomesa.fs.data.FileSystemDataStore.FileSystemDataStoreConfig +import org.locationtech.geomesa.fs.data.stats.FileSystemStats import org.locationtech.geomesa.fs.storage.core.{FileSystemContext, FileSystemStorage, FileSystemStorageFactory, StorageMetadataCatalog} -import org.locationtech.geomesa.index.stats.RunnableStats.UnoptimizedRunnableStats import org.locationtech.geomesa.index.stats.{GeoMesaStats, HasGeoMesaStats} import org.locationtech.geomesa.utils.geotools.SimpleFeatureTypes import org.locationtech.geomesa.utils.index.GeoMesaSchemaValidator @@ -45,7 +45,7 @@ class FileSystemDataStore(storageFactory: FileSystemStorageFactory, catalog: Sto config.context.namespace.foreach(setNamespaceURI) - override val stats: GeoMesaStats = new UnoptimizedRunnableStats(this) + override val stats: GeoMesaStats = new FileSystemStats(this) override def createTypeNames(): java.util.List[Name] = { val names = new java.util.ArrayList[Name]() @@ -100,6 +100,15 @@ class FileSystemDataStore(storageFactory: FileSystemStorageFactory, catalog: Sto } object FileSystemDataStore { + + /** + * Config options + * + * @param context handle to the file system + * @param readThreads number of threads per read + * @param writeTimeout write timeout + * @param queryTimeout read timeout + */ case class FileSystemDataStoreConfig( context: FileSystemContext, readThreads: Int, diff --git a/geomesa-fs/geomesa-fs-datastore/src/main/scala/org/locationtech/geomesa/fs/data/stats/FileSystemStats.scala b/geomesa-fs/geomesa-fs-datastore/src/main/scala/org/locationtech/geomesa/fs/data/stats/FileSystemStats.scala new file mode 100644 index 000000000000..ba365a001ed6 --- /dev/null +++ b/geomesa-fs/geomesa-fs-datastore/src/main/scala/org/locationtech/geomesa/fs/data/stats/FileSystemStats.scala @@ -0,0 +1,102 @@ +/*********************************************************************** + * Copyright (c) 2013-2025 General Atomics Integrated Intelligence, Inc. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, Version 2.0 + * which accompanies this distribution and is available at + * https://www.apache.org/licenses/LICENSE-2.0 + ***********************************************************************/ + +package org.locationtech.geomesa.fs.data.stats + +import org.geotools.api.feature.simple.SimpleFeatureType +import org.geotools.api.filter.Filter +import org.geotools.util.factory.Hints +import org.locationtech.geomesa.features.ScalaSimpleFeature +import org.locationtech.geomesa.fs.data.FileSystemDataStore +import org.locationtech.geomesa.index.index.attribute.AttributeIndexKey +import org.locationtech.geomesa.index.stats.RunnableStats.UnoptimizedRunnableStats +import org.locationtech.geomesa.index.stats.impl.MinMax +import org.locationtech.geomesa.index.stats.impl.MinMax.MinMaxDefaults +import org.locationtech.jts.geom.Point + +/** + * Optimized stats using per-file bounds for non-exact cases + * + * @param ds datastore + */ +class FileSystemStats(ds: FileSystemDataStore) extends UnoptimizedRunnableStats(ds) { + + import org.locationtech.geomesa.fs.storage.core.RichSimpleFeatureType + + override def getCount( + sft: SimpleFeatureType, + filter: Filter, + exact: Boolean, + queryHints: Hints): Option[Long] = { + if (!exact || filter == Filter.INCLUDE) { + Some(ds.storage(sft.getTypeName).metadata.getFiles(filter).map(_.count).sum) + } else { + super.getCount(sft, filter, exact, queryHints) + } + } + + override def getMinMax[T]( + sft: SimpleFeatureType, + attribute: String, + filter: Filter, + exact: Boolean): Option[MinMax[T]] = { + if (!exact || filter == Filter.INCLUDE) { + val i = sft.indexOf(attribute) + if (sft.nonSpatialBounds().contains(i)) { + val binding = sft.getDescriptor(i).getType.getBinding + val minMax = new MinMax[T](sft, sft.getDescriptor(i).getLocalName)(MinMaxDefaults(binding)) + + var min: String = null + var max: String = null + ds.storage(sft.getTypeName).metadata.getFiles(filter).foreach { file => + file.attributeBounds.filter(_.attribute == i).foreach { bounds => + if (max == null || max < bounds.upper) { + max = bounds.upper + } + if (min == null || min > bounds.lower) { + min = bounds.lower + } + } + } + if (min != null) { + val alias = AttributeIndexKey.alias(binding) + val sf = new ScalaSimpleFeature(sft, "") + Seq(min, max).foreach { value => + sf.setAttribute(i, AttributeIndexKey.decode(alias, value)) + minMax.observe(sf) + } + } + Some(minMax) + } else if (sft.spatialBounds().contains(i) && sft.getDescriptor(i).getType.getBinding == classOf[Point]) { + val minMax = new MinMax[T](sft, sft.getDescriptor(i).getLocalName)(MinMaxDefaults(classOf[Point])) + + var xmin, ymin, xmax, ymax: java.lang.Double = null + ds.storage(sft.getTypeName).metadata.getFiles(filter).foreach { file => + file.spatialBounds.filter(_.attribute == i).foreach { bounds => + xmin = if (xmin == null) { bounds.xmin } else { math.min(xmin, bounds.xmin) } + xmax = if (xmax == null) { bounds.xmax } else { math.max(xmax, bounds.xmax) } + ymin = if (ymin == null) { bounds.ymin } else { math.min(ymin, bounds.ymin) } + ymax = if (ymax == null) { bounds.ymax } else { math.max(ymax, bounds.ymax) } + } + } + if (xmin != null) { + val sf = new ScalaSimpleFeature(sft, "") + Seq(xmin -> ymin, xmax -> ymax).foreach { case (x, y) => + sf.setAttribute(i, s"POINT ($x $y)") + minMax.observe(sf) + } + } + Some(minMax) + } else { + super.getMinMax(sft, attribute, filter, exact) + } + } else { + super.getMinMax(sft, attribute, filter, exact) + } + } +} diff --git a/geomesa-fs/geomesa-fs-datastore/src/test/scala/org/locationtech/geomesa/fs/data/FileSystemDataStoreTest.scala b/geomesa-fs/geomesa-fs-datastore/src/test/scala/org/locationtech/geomesa/fs/data/FileSystemDataStoreTest.scala index adbd5bc4da0e..c0898fa601d1 100644 --- a/geomesa-fs/geomesa-fs-datastore/src/test/scala/org/locationtech/geomesa/fs/data/FileSystemDataStoreTest.scala +++ b/geomesa-fs/geomesa-fs-datastore/src/test/scala/org/locationtech/geomesa/fs/data/FileSystemDataStoreTest.scala @@ -173,6 +173,15 @@ class FileSystemDataStoreTest extends SpecificationWithJUnit with BeforeAfterAll fs2.getCount(Query.ALL) must beEqualTo(10) fs2.getBounds must equalTo(new ReferencedEnvelope(10.0, 10.0, 10.0, 10.9, CRS_EPSG_4326)) } + + // test stats queries + ds.stats.getCount(sft) must beSome(10L) + ds.stats.getCount(sft, exact = true) must beSome(10L) + val minMax = ds.stats.getMinMax[String](sft, "name").orNull + minMax must not(beNull) + minMax.min mustEqual "test0" + minMax.max mustEqual "test9" + ds.stats.getMinMax[Int](sft, "age") must beNone // only attributes with fs.bounds will return cached stats } } } diff --git a/geomesa-index-api/src/main/scala/org/locationtech/geomesa/index/geotools/GeoMesaFeatureSource.scala b/geomesa-index-api/src/main/scala/org/locationtech/geomesa/index/geotools/GeoMesaFeatureSource.scala index 76ad8fa293cd..64ceca38eb3d 100644 --- a/geomesa-index-api/src/main/scala/org/locationtech/geomesa/index/geotools/GeoMesaFeatureSource.scala +++ b/geomesa-index-api/src/main/scala/org/locationtech/geomesa/index/geotools/GeoMesaFeatureSource.scala @@ -30,7 +30,7 @@ import java.util import java.util.Collections import scala.util.Try -class GeoMesaFeatureSource(val ds: GeoMeasBaseStore, val sft: SimpleFeatureType) +class GeoMesaFeatureSource(val ds: GeoMesaBaseStore, val sft: SimpleFeatureType) extends SimpleFeatureSource with LazyLogging { lazy private val hints = Collections.unmodifiableSet(Collections.emptySet[Key]) diff --git a/geomesa-index-api/src/main/scala/org/locationtech/geomesa/index/geotools/GeoMesaFeatureStore.scala b/geomesa-index-api/src/main/scala/org/locationtech/geomesa/index/geotools/GeoMesaFeatureStore.scala index 8dfd8edf0fa3..5482ec611e0a 100644 --- a/geomesa-index-api/src/main/scala/org/locationtech/geomesa/index/geotools/GeoMesaFeatureStore.scala +++ b/geomesa-index-api/src/main/scala/org/locationtech/geomesa/index/geotools/GeoMesaFeatureStore.scala @@ -24,7 +24,7 @@ import org.locationtech.geomesa.utils.io.WithClose import java.util.Collections import scala.collection.mutable.ArrayBuffer -class GeoMesaFeatureStore(ds: GeoMeasBaseStore, sft: SimpleFeatureType) +class GeoMesaFeatureStore(ds: GeoMesaBaseStore, sft: SimpleFeatureType) extends GeoMesaFeatureSource(ds, sft) with SimpleFeatureStore { private var transaction: Transaction = Transaction.AUTO_COMMIT diff --git a/geomesa-index-api/src/main/scala/org/locationtech/geomesa/index/geotools/package.scala b/geomesa-index-api/src/main/scala/org/locationtech/geomesa/index/geotools/package.scala index 1279273dcf66..c65c2989f781 100644 --- a/geomesa-index-api/src/main/scala/org/locationtech/geomesa/index/geotools/package.scala +++ b/geomesa-index-api/src/main/scala/org/locationtech/geomesa/index/geotools/package.scala @@ -14,5 +14,5 @@ import org.locationtech.geomesa.index.stats.HasGeoMesaStats package object geotools { - type GeoMeasBaseStore = DataStore with HasGeoMesaFeatureReader with HasGeoMesaStats + type GeoMesaBaseStore = DataStore with HasGeoMesaFeatureReader with HasGeoMesaStats }