Skip to content

Commit 9a8df2a

Browse files
authored
GEOMESA-3582 FSDS - Support optimized stats count and min/max queries (#3549)
1 parent a2e351e commit 9a8df2a

6 files changed

Lines changed: 125 additions & 5 deletions

File tree

geomesa-fs/geomesa-fs-datastore/src/main/scala/org/locationtech/geomesa/fs/data/FileSystemDataStore.scala

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ import org.geotools.api.feature.simple.SimpleFeatureType
1616
import org.geotools.data.store.{ContentDataStore, ContentEntry, ContentFeatureSource}
1717
import org.geotools.feature.NameImpl
1818
import org.locationtech.geomesa.fs.data.FileSystemDataStore.FileSystemDataStoreConfig
19+
import org.locationtech.geomesa.fs.data.stats.FileSystemStats
1920
import org.locationtech.geomesa.fs.storage.core.{FileSystemContext, FileSystemStorage, FileSystemStorageFactory, StorageMetadataCatalog}
20-
import org.locationtech.geomesa.index.stats.RunnableStats.UnoptimizedRunnableStats
2121
import org.locationtech.geomesa.index.stats.{GeoMesaStats, HasGeoMesaStats}
2222
import org.locationtech.geomesa.utils.geotools.SimpleFeatureTypes
2323
import org.locationtech.geomesa.utils.index.GeoMesaSchemaValidator
@@ -45,7 +45,7 @@ class FileSystemDataStore(storageFactory: FileSystemStorageFactory, catalog: Sto
4545

4646
config.context.namespace.foreach(setNamespaceURI)
4747

48-
override val stats: GeoMesaStats = new UnoptimizedRunnableStats(this)
48+
override val stats: GeoMesaStats = new FileSystemStats(this)
4949

5050
override def createTypeNames(): java.util.List[Name] = {
5151
val names = new java.util.ArrayList[Name]()
@@ -100,6 +100,15 @@ class FileSystemDataStore(storageFactory: FileSystemStorageFactory, catalog: Sto
100100
}
101101

102102
object FileSystemDataStore {
103+
104+
/**
105+
* Config options
106+
*
107+
* @param context handle to the file system
108+
* @param readThreads number of threads per read
109+
* @param writeTimeout write timeout
110+
* @param queryTimeout read timeout
111+
*/
103112
case class FileSystemDataStoreConfig(
104113
context: FileSystemContext,
105114
readThreads: Int,
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
/***********************************************************************
2+
* Copyright (c) 2013-2025 General Atomics Integrated Intelligence, Inc.
3+
* All rights reserved. This program and the accompanying materials
4+
* are made available under the terms of the Apache License, Version 2.0
5+
* which accompanies this distribution and is available at
6+
* https://www.apache.org/licenses/LICENSE-2.0
7+
***********************************************************************/
8+
9+
package org.locationtech.geomesa.fs.data.stats
10+
11+
import org.geotools.api.feature.simple.SimpleFeatureType
12+
import org.geotools.api.filter.Filter
13+
import org.geotools.util.factory.Hints
14+
import org.locationtech.geomesa.features.ScalaSimpleFeature
15+
import org.locationtech.geomesa.fs.data.FileSystemDataStore
16+
import org.locationtech.geomesa.index.index.attribute.AttributeIndexKey
17+
import org.locationtech.geomesa.index.stats.RunnableStats.UnoptimizedRunnableStats
18+
import org.locationtech.geomesa.index.stats.impl.MinMax
19+
import org.locationtech.geomesa.index.stats.impl.MinMax.MinMaxDefaults
20+
import org.locationtech.jts.geom.Point
21+
22+
/**
23+
* Optimized stats using per-file bounds for non-exact cases
24+
*
25+
* @param ds datastore
26+
*/
27+
class FileSystemStats(ds: FileSystemDataStore) extends UnoptimizedRunnableStats(ds) {
28+
29+
import org.locationtech.geomesa.fs.storage.core.RichSimpleFeatureType
30+
31+
override def getCount(
32+
sft: SimpleFeatureType,
33+
filter: Filter,
34+
exact: Boolean,
35+
queryHints: Hints): Option[Long] = {
36+
if (!exact || filter == Filter.INCLUDE) {
37+
Some(ds.storage(sft.getTypeName).metadata.getFiles(filter).map(_.count).sum)
38+
} else {
39+
super.getCount(sft, filter, exact, queryHints)
40+
}
41+
}
42+
43+
override def getMinMax[T](
44+
sft: SimpleFeatureType,
45+
attribute: String,
46+
filter: Filter,
47+
exact: Boolean): Option[MinMax[T]] = {
48+
if (!exact || filter == Filter.INCLUDE) {
49+
val i = sft.indexOf(attribute)
50+
if (sft.nonSpatialBounds().contains(i)) {
51+
val binding = sft.getDescriptor(i).getType.getBinding
52+
val minMax = new MinMax[T](sft, sft.getDescriptor(i).getLocalName)(MinMaxDefaults(binding))
53+
54+
var min: String = null
55+
var max: String = null
56+
ds.storage(sft.getTypeName).metadata.getFiles(filter).foreach { file =>
57+
file.attributeBounds.filter(_.attribute == i).foreach { bounds =>
58+
if (max == null || max < bounds.upper) {
59+
max = bounds.upper
60+
}
61+
if (min == null || min > bounds.lower) {
62+
min = bounds.lower
63+
}
64+
}
65+
}
66+
if (min != null) {
67+
val alias = AttributeIndexKey.alias(binding)
68+
val sf = new ScalaSimpleFeature(sft, "")
69+
Seq(min, max).foreach { value =>
70+
sf.setAttribute(i, AttributeIndexKey.decode(alias, value))
71+
minMax.observe(sf)
72+
}
73+
}
74+
Some(minMax)
75+
} else if (sft.spatialBounds().contains(i) && sft.getDescriptor(i).getType.getBinding == classOf[Point]) {
76+
val minMax = new MinMax[T](sft, sft.getDescriptor(i).getLocalName)(MinMaxDefaults(classOf[Point]))
77+
78+
var xmin, ymin, xmax, ymax: java.lang.Double = null
79+
ds.storage(sft.getTypeName).metadata.getFiles(filter).foreach { file =>
80+
file.spatialBounds.filter(_.attribute == i).foreach { bounds =>
81+
xmin = if (xmin == null) { bounds.xmin } else { math.min(xmin, bounds.xmin) }
82+
xmax = if (xmax == null) { bounds.xmax } else { math.max(xmax, bounds.xmax) }
83+
ymin = if (ymin == null) { bounds.ymin } else { math.min(ymin, bounds.ymin) }
84+
ymax = if (ymax == null) { bounds.ymax } else { math.max(ymax, bounds.ymax) }
85+
}
86+
}
87+
if (xmin != null) {
88+
val sf = new ScalaSimpleFeature(sft, "")
89+
Seq(xmin -> ymin, xmax -> ymax).foreach { case (x, y) =>
90+
sf.setAttribute(i, s"POINT ($x $y)")
91+
minMax.observe(sf)
92+
}
93+
}
94+
Some(minMax)
95+
} else {
96+
super.getMinMax(sft, attribute, filter, exact)
97+
}
98+
} else {
99+
super.getMinMax(sft, attribute, filter, exact)
100+
}
101+
}
102+
}

geomesa-fs/geomesa-fs-datastore/src/test/scala/org/locationtech/geomesa/fs/data/FileSystemDataStoreTest.scala

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,15 @@ class FileSystemDataStoreTest extends SpecificationWithJUnit with BeforeAfterAll
173173
fs2.getCount(Query.ALL) must beEqualTo(10)
174174
fs2.getBounds must equalTo(new ReferencedEnvelope(10.0, 10.0, 10.0, 10.9, CRS_EPSG_4326))
175175
}
176+
177+
// test stats queries
178+
ds.stats.getCount(sft) must beSome(10L)
179+
ds.stats.getCount(sft, exact = true) must beSome(10L)
180+
val minMax = ds.stats.getMinMax[String](sft, "name").orNull
181+
minMax must not(beNull)
182+
minMax.min mustEqual "test0"
183+
minMax.max mustEqual "test9"
184+
ds.stats.getMinMax[Int](sft, "age") must beNone // only attributes with fs.bounds will return cached stats
176185
}
177186
}
178187
}

geomesa-index-api/src/main/scala/org/locationtech/geomesa/index/geotools/GeoMesaFeatureSource.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ import java.util
3030
import java.util.Collections
3131
import scala.util.Try
3232

33-
class GeoMesaFeatureSource(val ds: GeoMeasBaseStore, val sft: SimpleFeatureType)
33+
class GeoMesaFeatureSource(val ds: GeoMesaBaseStore, val sft: SimpleFeatureType)
3434
extends SimpleFeatureSource with LazyLogging {
3535

3636
lazy private val hints = Collections.unmodifiableSet(Collections.emptySet[Key])

geomesa-index-api/src/main/scala/org/locationtech/geomesa/index/geotools/GeoMesaFeatureStore.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ import org.locationtech.geomesa.utils.io.WithClose
2424
import java.util.Collections
2525
import scala.collection.mutable.ArrayBuffer
2626

27-
class GeoMesaFeatureStore(ds: GeoMeasBaseStore, sft: SimpleFeatureType)
27+
class GeoMesaFeatureStore(ds: GeoMesaBaseStore, sft: SimpleFeatureType)
2828
extends GeoMesaFeatureSource(ds, sft) with SimpleFeatureStore {
2929

3030
private var transaction: Transaction = Transaction.AUTO_COMMIT

geomesa-index-api/src/main/scala/org/locationtech/geomesa/index/geotools/package.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,5 +14,5 @@ import org.locationtech.geomesa.index.stats.HasGeoMesaStats
1414

1515
package object geotools {
1616

17-
type GeoMeasBaseStore = DataStore with HasGeoMesaFeatureReader with HasGeoMesaStats
17+
type GeoMesaBaseStore = DataStore with HasGeoMesaFeatureReader with HasGeoMesaStats
1818
}

0 commit comments

Comments
 (0)