Skip to content

Commit b1c0133

Browse files
authored
[feature](function) Add ST_NumGeometries, ST_NumPoints, ST_Geometries functions (#63049)
… functions ### What problem does this PR solve? Issue Number: ref #48203 Related PR: apache/doris-website#3623 Problem Summary: Add three new spatial functions for geometry collection operations: - `ST_NumGeometries`: Returns the number of sub-geometries in a geometry object. - `ST_NumPoints`: Returns the total number of vertices (points) in a geometry object. - `ST_Geometries`: Decomposes a geometry object into an array of its sub-geometries.
1 parent ff1012d commit b1c0133

12 files changed

Lines changed: 1892 additions & 0 deletions

File tree

be/src/exprs/function/geo/functions_geo.cpp

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,10 @@
2828
#include "core/block/block.h"
2929
#include "core/block/column_with_type_and_name.h"
3030
#include "core/column/column.h"
31+
#include "core/column/column_array.h"
3132
#include "core/column/column_execute_util.h"
3233
#include "core/column/column_nullable.h"
34+
#include "core/data_type/data_type_array.h"
3335
#include "core/data_type/data_type_nullable.h"
3436
#include "core/data_type/data_type_number.h"
3537
#include "core/data_type/data_type_string.h"
@@ -917,6 +919,165 @@ struct StDistance {
917919
}
918920
};
919921

922+
struct StNumGeometries {
923+
static constexpr auto NAME = "st_numgeometries";
924+
static const size_t NUM_ARGS = 1;
925+
using Type = DataTypeInt64;
926+
927+
static Status execute(Block& block, const ColumnNumbers& arguments, size_t result) {
928+
DCHECK_EQ(arguments.size(), 1);
929+
930+
auto col = ColumnView<TYPE_STRING>::create(block.get_by_position(arguments[0]).column);
931+
const auto size = col.size();
932+
933+
auto res = ColumnInt64::create();
934+
res->reserve(size);
935+
936+
auto null_map = ColumnUInt8::create(size, 0);
937+
auto& null_map_data = null_map->get_data();
938+
939+
for (int row = 0; row < size; ++row) {
940+
auto value = col.value_at(row);
941+
auto shape = GeoShape::from_encoded(value.data, value.size);
942+
if (!shape) {
943+
null_map_data[row] = 1;
944+
res->insert_default();
945+
continue;
946+
}
947+
948+
res->insert_value(shape->num_geometries());
949+
}
950+
951+
block.replace_by_position(result,
952+
ColumnNullable::create(std::move(res), std::move(null_map)));
953+
return Status::OK();
954+
}
955+
};
956+
957+
class FunctionStGeometries final : public IFunction {
958+
public:
959+
static constexpr auto name = "st_geometries";
960+
961+
static FunctionPtr create() { return std::make_shared<FunctionStGeometries>(); }
962+
963+
String get_name() const override { return name; }
964+
965+
size_t get_number_of_arguments() const override { return 1; }
966+
967+
bool is_variadic() const override { return false; }
968+
969+
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
970+
return make_nullable(
971+
std::make_shared<DataTypeArray>(make_nullable(std::make_shared<DataTypeString>())));
972+
}
973+
974+
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
975+
uint32_t result, size_t input_rows_count) const override {
976+
DCHECK_EQ(arguments.size(), 1);
977+
978+
auto col = ColumnView<TYPE_STRING>::create(block.get_by_position(arguments[0]).column);
979+
const auto size = col.size();
980+
981+
auto nested_data = ColumnString::create();
982+
auto offsets_col = ColumnArray::ColumnOffsets::create();
983+
auto& offsets = offsets_col->get_data();
984+
offsets.reserve(size);
985+
986+
auto null_map = ColumnUInt8::create(size, 0);
987+
auto& null_map_data = null_map->get_data();
988+
989+
size_t current_offset = 0;
990+
std::string buf;
991+
992+
for (size_t row = 0; row < size; ++row) {
993+
auto shape_value = col.value_at(row);
994+
auto shape = GeoShape::from_encoded(shape_value.data, shape_value.size);
995+
996+
if (!shape) {
997+
null_map_data[row] = 1;
998+
offsets.push_back(current_offset);
999+
continue;
1000+
}
1001+
1002+
if (shape->type() == GEO_SHAPE_MULTI_POLYGON) {
1003+
auto* multi_polygon = static_cast<GeoMultiPolygon*>(shape.get());
1004+
const auto& polygons = multi_polygon->polygons();
1005+
1006+
if (polygons.empty()) {
1007+
null_map_data[row] = 1;
1008+
offsets.push_back(current_offset);
1009+
continue;
1010+
}
1011+
1012+
for (const auto& polygon : polygons) {
1013+
DCHECK(polygon != nullptr);
1014+
buf.clear();
1015+
polygon->encode_to(&buf);
1016+
nested_data->insert_data(buf.data(), buf.size());
1017+
++current_offset;
1018+
}
1019+
} else {
1020+
nested_data->insert_data(shape_value.data, shape_value.size);
1021+
++current_offset;
1022+
}
1023+
1024+
offsets.push_back(current_offset);
1025+
}
1026+
1027+
auto nested_null_map = ColumnUInt8::create(nested_data->size(), 0);
1028+
auto nested_nullable =
1029+
ColumnNullable::create(std::move(nested_data), std::move(nested_null_map));
1030+
auto array_col = ColumnArray::create(std::move(nested_nullable), std::move(offsets_col));
1031+
1032+
block.replace_by_position(
1033+
result, ColumnNullable::create(std::move(array_col), std::move(null_map)));
1034+
1035+
return Status::OK();
1036+
}
1037+
};
1038+
1039+
struct StNumPoints {
1040+
static constexpr auto NAME = "st_numpoints";
1041+
static const size_t NUM_ARGS = 1;
1042+
using Type = DataTypeInt64;
1043+
1044+
static Status execute(Block& block, const ColumnNumbers& arguments, size_t result) {
1045+
DCHECK_EQ(arguments.size(), 1);
1046+
1047+
auto col = ColumnView<TYPE_STRING>::create(block.get_by_position(arguments[0]).column);
1048+
const auto size = col.size();
1049+
1050+
auto res = ColumnInt64::create();
1051+
res->reserve(size);
1052+
1053+
auto null_map = ColumnUInt8::create(size, 0);
1054+
auto& null_map_data = null_map->get_data();
1055+
1056+
for (int row = 0; row < size; ++row) {
1057+
auto value = col.value_at(row);
1058+
auto shape = GeoShape::from_encoded(value.data, value.size);
1059+
if (!shape) {
1060+
null_map_data[row] = 1;
1061+
res->insert_default();
1062+
continue;
1063+
}
1064+
1065+
auto num_points = shape->num_points();
1066+
if (num_points < 0) {
1067+
null_map_data[row] = 1;
1068+
res->insert_default();
1069+
continue;
1070+
}
1071+
1072+
res->insert_value(num_points);
1073+
}
1074+
1075+
block.replace_by_position(result,
1076+
ColumnNullable::create(std::move(res), std::move(null_map)));
1077+
return Status::OK();
1078+
}
1079+
};
1080+
9201081
void register_function_geo(SimpleFunctionFactory& factory) {
9211082
factory.register_function<GeoFunction<StPoint>>();
9221083
factory.register_function<GeoFunction<StAsText<StAsWktName>>>();
@@ -947,6 +1108,10 @@ void register_function_geo(SimpleFunctionFactory& factory) {
9471108
factory.register_function<GeoFunction<StLength>>();
9481109
factory.register_function<GeoFunction<StGeometryType>>();
9491110
factory.register_function<GeoFunction<StDistance>>();
1111+
factory.register_function<GeoFunction<StNumGeometries>>();
1112+
factory.register_function<GeoFunction<StNumPoints>>();
1113+
factory.register_alias("st_numpoints", "st_npoints");
1114+
factory.register_function<FunctionStGeometries>();
9501115
}
9511116

9521117
} // namespace doris

be/src/exprs/function/geo/geo_types.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1929,6 +1929,21 @@ double GeoMultiPolygon::Distance(const GeoShape* rhs) const {
19291929
return (min_distance == std::numeric_limits<double>::max()) ? -1.0 : min_distance;
19301930
}
19311931

1932+
int GeoPolygon::num_points() const {
1933+
return _polygon->num_vertices() + _polygon->num_loops();
1934+
}
1935+
1936+
int GeoMultiPolygon::num_points() const {
1937+
int total = 0;
1938+
for (const auto& polygon : _polygons) {
1939+
DCHECK(polygon != nullptr);
1940+
int point_count = polygon->num_points();
1941+
DCHECK_GE(point_count, 0);
1942+
total += point_count;
1943+
}
1944+
return total;
1945+
}
1946+
19321947
double GeoCircle::Distance(const GeoShape* rhs) const {
19331948
// Both rhs and self are guaranteed to be valid by StDistance (functions_geo.cpp)
19341949
double circle_radius = S2Earth::ToMeters(_cap->radius());

be/src/exprs/function/geo/geo_types.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,9 @@ class GeoShape {
8181

8282
static bool ComputeArea(GeoShape* rhs, double* angle, std::string square_unit);
8383

84+
virtual int num_geometries() const { return 1; }
85+
virtual int num_points() const { return -1; }
86+
8487
protected:
8588
virtual void encode(std::string* buf) = 0;
8689
virtual bool decode(const void* data, size_t size) = 0;
@@ -125,6 +128,9 @@ class GeoPoint : public GeoShape {
125128
double x() const;
126129
double y() const;
127130

131+
int num_geometries() const override { return 1; }
132+
int num_points() const override { return 1; }
133+
128134
protected:
129135
void encode(std::string* buf) override;
130136
bool decode(const void* data, size_t size) override;
@@ -161,6 +167,9 @@ class GeoLine : public GeoShape {
161167
int numPoint() const;
162168
const S2Point* getPoint(int i) const;
163169

170+
int num_geometries() const override { return 1; }
171+
int num_points() const override { return numPoint(); }
172+
164173
protected:
165174
void encode(std::string* buf) override;
166175
bool decode(const void* data, size_t size) override;
@@ -199,6 +208,9 @@ class GeoPolygon : public GeoShape {
199208
double Distance(const GeoShape* rhs) const override;
200209
S2Loop* getLoop(int i) const;
201210

211+
int num_geometries() const override { return 1; }
212+
int num_points() const override;
213+
202214
protected:
203215
void encode(std::string* buf) override;
204216
bool decode(const void* data, size_t size) override;
@@ -232,6 +244,9 @@ class GeoMultiPolygon : public GeoShape {
232244
double Length() const override;
233245
double Distance(const GeoShape* rhs) const override;
234246

247+
int num_geometries() const override { return static_cast<int>(_polygons.size()); }
248+
int num_points() const override;
249+
235250
protected:
236251
void encode(std::string* buf) override;
237252
bool decode(const void* data, size_t size) override;

0 commit comments

Comments
 (0)