2020#include " iceberg/sort_order.h"
2121
2222#include < format>
23+ #include < memory>
24+ #include < optional>
2325#include < ranges>
2426
27+ #include " iceberg/exception.h"
28+ #include " iceberg/expression/term.h"
29+ #include " iceberg/result.h"
30+ #include " iceberg/schema.h"
31+ #include " iceberg/sort_field.h"
32+ #include " iceberg/transform.h"
33+ #include " iceberg/util/checked_cast.h"
2534#include " iceberg/util/formatter.h" // IWYU pragma: keep
35+ #include " iceberg/util/macros.h"
2636
2737namespace iceberg {
2838
@@ -31,7 +41,7 @@ SortOrder::SortOrder(int32_t order_id, std::vector<SortField> fields)
3141
3242const std::shared_ptr<SortOrder>& SortOrder::Unsorted () {
3343 static const std::shared_ptr<SortOrder> unsorted =
34- std::make_shared<SortOrder>(/* order_id= */ 0 , std::vector<SortField>{});
44+ std::make_shared<SortOrder>(kUnsortedOrderId , std::vector<SortField>{});
3545 return unsorted;
3646}
3747
@@ -80,4 +90,110 @@ bool SortOrder::Equals(const SortOrder& other) const {
8090 return order_id_ == other.order_id_ && fields_ == other.fields_ ;
8191}
8292
93+ // SortOrderBuilder implementation
94+
95+ struct SortOrderBuilder ::Impl {
96+ const std::shared_ptr<Schema>& schema;
97+ std::optional<int32_t > sort_id;
98+ std::vector<SortField> fields;
99+ bool case_sensitive{false };
100+
101+ explicit Impl (const std::shared_ptr<Schema>& schema) : schema(schema) {}
102+ };
103+
104+ SortOrderBuilder::~SortOrderBuilder () = default ;
105+
106+ SortOrderBuilder::SortOrderBuilder (const std::shared_ptr<Schema>& schema)
107+ : impl_(std::make_unique<Impl>(schema)) {}
108+
109+ std::unique_ptr<SortOrderBuilder> SortOrderBuilder::BuildFromSchema (
110+ const std::shared_ptr<Schema>& schema) {
111+ return std::unique_ptr<SortOrderBuilder>(new SortOrderBuilder (schema)); // NOLINT
112+ }
113+
114+ SortOrderBuilder& SortOrderBuilder::WithOrderId (int32_t sort_id) {
115+ impl_->sort_id = sort_id;
116+ return *this ;
117+ }
118+
119+ SortOrderBuilder& SortOrderBuilder::CaseSensitive (bool case_sensitive) {
120+ impl_->case_sensitive = case_sensitive;
121+ return *this ;
122+ }
123+
124+ Result<std::shared_ptr<SortOrder>> SortOrderBuilder::BuildUncheckd () {
125+ if (impl_->fields .empty ()) {
126+ if (impl_->sort_id .has_value () && impl_->sort_id != SortOrder::kUnsortedOrderId ) {
127+ return InvalidArgument (" Unsorted order ID must be 0" );
128+ }
129+ return SortOrder::Unsorted ();
130+ }
131+
132+ if (impl_->sort_id .has_value () && impl_->sort_id == SortOrder::kUnsortedOrderId ) {
133+ return InvalidArgument (" Sort order ID 0 is reserved for unsorted order" );
134+ }
135+
136+ // default ID to 1 as 0 is reserved for unsorted order
137+ return std::make_shared<SortOrder>(
138+ impl_->sort_id .value_or (SortOrder::kInitialSortOrderId ), std::move (impl_->fields ));
139+ }
140+
141+ Result<std::shared_ptr<SortOrder>> SortOrderBuilder::Build () {
142+ ICEBERG_ASSIGN_OR_RAISE (auto sort_order, BuildUncheckd ());
143+ ICEBERG_RETURN_UNEXPECTED (CheckCompatibility (sort_order, impl_->schema ));
144+ return sort_order;
145+ }
146+
147+ SortOrderBuilder& SortOrderBuilder::AddSortField (
148+ int32_t source_id, const std::shared_ptr<Transform>& transform,
149+ SortDirection direction, NullOrder null_order) {
150+ impl_->fields .emplace_back (source_id, transform, direction, null_order);
151+ return *this ;
152+ }
153+
154+ SortOrderBuilder& SortOrderBuilder::AddSortField (const std::shared_ptr<Term>& term,
155+ SortDirection direction,
156+ NullOrder null_order) {
157+ if (auto named_ref = internal::checked_pointer_cast<NamedReference>(term)) {
158+ auto bound_ref = named_ref->Bind (*impl_->schema , impl_->case_sensitive );
159+ ICEBERG_CHECK (bound_ref.has_value (), " Failed to bind named reference to schema." );
160+ int32_t source_id = bound_ref.value ()->field ().field_id ();
161+ impl_->fields .emplace_back (source_id, Transform::Identity (), direction, null_order);
162+ } else if (auto unbound_transform =
163+ internal::checked_pointer_cast<UnboundTransform>(term)) {
164+ auto bound_transform = unbound_transform->Bind (*impl_->schema , impl_->case_sensitive );
165+ ICEBERG_CHECK (bound_transform.has_value (),
166+ " Failed to bind unbound transform to schema." );
167+ int32_t source_id = bound_transform.value ()->reference ()->field ().field_id ();
168+ impl_->fields .emplace_back (source_id, bound_transform.value ()->transform (), direction,
169+ null_order);
170+ } else {
171+ throw IcebergError (std::format (
172+ " Invalid term: {}, expected either a named reference or an unbound transform" ,
173+ term ? term->ToString () : " null" ));
174+ }
175+
176+ return *this ;
177+ }
178+
179+ Status SortOrderBuilder::CheckCompatibility (const std::shared_ptr<SortOrder>& sort_order,
180+ const std::shared_ptr<Schema>& schema) {
181+ for (const auto & field : sort_order->fields ()) {
182+ ICEBERG_ASSIGN_OR_RAISE (auto schema_field, schema->FindFieldById (field.source_id ()));
183+ if (schema_field == std::nullopt ) {
184+ return ValidationError (" Cannot find source column for sort field: {}" , field);
185+ }
186+
187+ const auto & source_type = schema_field.value ().get ().type ();
188+
189+ if (!source_type->is_primitive ()) {
190+ return ValidationError (" Cannot sort by non-primitive source field: {}" ,
191+ *source_type);
192+ }
193+
194+ ICEBERG_RETURN_UNEXPECTED (field.transform ()->ResultType (source_type));
195+ }
196+ return {};
197+ }
198+
83199} // namespace iceberg
0 commit comments