2020#include " iceberg/sort_order.h"
2121
2222#include < format>
23+ #include < memory>
24+ #include < optional>
2325#include < ranges>
2426
27+ #include " iceberg/exception.h"
28+ #include " iceberg/expression/term.h"
29+ #include " iceberg/result.h"
30+ #include " iceberg/schema.h"
31+ #include " iceberg/sort_field.h"
32+ #include " iceberg/transform.h"
2533#include " iceberg/util/formatter.h" // IWYU pragma: keep
34+ #include " iceberg/util/macros.h"
2635
2736namespace iceberg {
2837
@@ -31,7 +40,7 @@ SortOrder::SortOrder(int32_t order_id, std::vector<SortField> fields)
3140
3241const std::shared_ptr<SortOrder>& SortOrder::Unsorted () {
3342 static const std::shared_ptr<SortOrder> unsorted =
34- std::make_shared<SortOrder>(/* order_id= */ 0 , std::vector<SortField>{});
43+ std::make_shared<SortOrder>(kUnsortedOrderId , std::vector<SortField>{});
3544 return unsorted;
3645}
3746
@@ -80,4 +89,114 @@ bool SortOrder::Equals(const SortOrder& other) const {
8089 return order_id_ == other.order_id_ && fields_ == other.fields_ ;
8190}
8291
92+ // SortOrderBuilder implementation
93+
94+ struct SortOrderBuilder ::Impl {
95+ const Schema* schema;
96+ std::optional<int32_t > sort_id;
97+ std::vector<SortField> fields;
98+ bool case_sensitive{false };
99+
100+ explicit Impl (const Schema* schema) : schema(schema) {}
101+ };
102+
103+ SortOrderBuilder::~SortOrderBuilder () = default ;
104+
105+ SortOrderBuilder::SortOrderBuilder (SortOrderBuilder&&) noexcept = default ;
106+
107+ SortOrderBuilder& SortOrderBuilder::operator =(SortOrderBuilder&&) noexcept = default ;
108+
109+ SortOrderBuilder::SortOrderBuilder (const Schema* schema)
110+ : impl_(std::make_unique<Impl>(schema)) {}
111+
112+ std::unique_ptr<SortOrderBuilder> SortOrderBuilder::BuildFromSchema (
113+ const Schema* schema) {
114+ return std::unique_ptr<SortOrderBuilder>(new SortOrderBuilder (schema)); // NOLINT
115+ }
116+
117+ SortOrderBuilder& SortOrderBuilder::WithOrderId (int32_t sort_id) {
118+ impl_->sort_id = sort_id;
119+ return *this ;
120+ }
121+
122+ SortOrderBuilder& SortOrderBuilder::CaseSensitive (bool case_sensitive) {
123+ impl_->case_sensitive = case_sensitive;
124+ return *this ;
125+ }
126+
127+ Result<std::unique_ptr<SortOrder>> SortOrderBuilder::BuildUncheckd () {
128+ if (impl_->fields .empty ()) {
129+ if (impl_->sort_id .has_value () && impl_->sort_id != SortOrder::kUnsortedOrderId ) {
130+ return InvalidArgument (" Unsorted order ID must be 0" );
131+ }
132+ return std::make_unique<SortOrder>(SortOrder::kUnsortedOrderId ,
133+ std::vector<SortField>{});
134+ }
135+
136+ if (impl_->sort_id .has_value () && impl_->sort_id == SortOrder::kUnsortedOrderId ) {
137+ return InvalidArgument (" Sort order ID 0 is reserved for unsorted order" );
138+ }
139+
140+ // default ID to 1 as 0 is reserved for unsorted order
141+ return std::make_unique<SortOrder>(
142+ impl_->sort_id .value_or (SortOrder::kInitialSortOrderId ), std::move (impl_->fields ));
143+ }
144+
145+ Result<std::unique_ptr<SortOrder>> SortOrderBuilder::Build () {
146+ ICEBERG_ASSIGN_OR_RAISE (auto sort_order, BuildUncheckd ());
147+ ICEBERG_RETURN_UNEXPECTED (CheckCompatibility (sort_order, impl_->schema ));
148+ return sort_order;
149+ }
150+
151+ SortOrderBuilder& SortOrderBuilder::AddSortField (
152+ int32_t source_id, const std::shared_ptr<Transform>& transform,
153+ SortDirection direction, NullOrder null_order) {
154+ impl_->fields .emplace_back (source_id, transform, direction, null_order);
155+ return *this ;
156+ }
157+
158+ SortOrderBuilder& SortOrderBuilder::AddSortField (const std::shared_ptr<Term>& term,
159+ SortDirection direction,
160+ NullOrder null_order) {
161+ if (auto named_ref = std::dynamic_pointer_cast<NamedReference>(term)) {
162+ auto bound_ref = named_ref->Bind (*impl_->schema , impl_->case_sensitive );
163+ ICEBERG_CHECK (bound_ref.has_value (), " Failed to bind named reference to schema." );
164+ int32_t source_id = bound_ref.value ()->field ().field_id ();
165+ impl_->fields .emplace_back (source_id, Transform::Identity (), direction, null_order);
166+ } else if (auto unbound_transform = std::dynamic_pointer_cast<UnboundTransform>(term)) {
167+ auto bound_transform = unbound_transform->Bind (*impl_->schema , impl_->case_sensitive );
168+ ICEBERG_CHECK (bound_transform.has_value (),
169+ " Failed to bind unbound transform to schema." );
170+ int32_t source_id = bound_transform.value ()->reference ()->field ().field_id ();
171+ impl_->fields .emplace_back (source_id, bound_transform.value ()->transform (), direction,
172+ null_order);
173+ } else {
174+ throw IcebergError (std::format (
175+ " Invalid term: {}, expected either a named reference or an unbound transform" ,
176+ term ? term->ToString () : " null" ));
177+ }
178+
179+ return *this ;
180+ }
181+
182+ Status SortOrderBuilder::CheckCompatibility (const std::unique_ptr<SortOrder>& sort_order,
183+ const Schema* schema) {
184+ for (const auto & field : sort_order->fields ()) {
185+ ICEBERG_ASSIGN_OR_RAISE (auto schema_field, schema->FindFieldById (field.source_id ()));
186+ if (schema_field == std::nullopt ) {
187+ return ValidationError (" Cannot find source column for sort field: {}" , field);
188+ }
189+
190+ const auto & source_type = schema_field.value ().get ().type ();
191+
192+ if (!source_type->is_primitive ()) {
193+ return ValidationError (" Cannot sort by non-primitive source field: {}" ,
194+ *source_type);
195+ }
196+
197+ ICEBERG_RETURN_UNEXPECTED (field.transform ()->ResultType (source_type));
198+ }
199+ return {};
200+ }
201+
83202} // namespace iceberg
0 commit comments