2121
2222#include < functional>
2323#include < memory>
24- #include < span>
25- #include < stack>
2624#include < string>
2725#include < string_view>
2826#include < unordered_map>
3129
3230#include " iceberg/result.h"
3331#include " iceberg/schema_field.h"
34- #include " iceberg/type.h"
35- #include " iceberg/util/checked_cast.h"
36- #include " iceberg/util/formatter_internal.h"
32+ #include " iceberg/type_fwd.h"
3733#include " iceberg/util/string_util.h"
38- #include " iceberg/util/visit_type.h"
3934
4035// / \file iceberg/util/type_util.h
4136// / Utility functions and visitors for Iceberg types.
@@ -86,38 +81,11 @@ class NameToIdVisitor {
8681// / \brief Visitor for building a map from field ID to position path.
8782class PositionPathVisitor {
8883 public:
89- Status Visit (const PrimitiveType& type) {
90- if (current_field_id_ == kUnassignedFieldId ) {
91- return InvalidSchema (" Current field id is not assigned, type: {}" , type.ToString ());
92- }
93-
94- if (auto ret = position_path_.try_emplace (current_field_id_, current_path_);
95- !ret.second ) {
96- return InvalidSchema (" Duplicate field id found: {}, prev path: {}, curr path: {}" ,
97- current_field_id_, ret.first ->second , current_path_);
98- }
99-
100- return {};
101- }
102-
103- Status Visit (const StructType& type) {
104- for (size_t i = 0 ; i < type.fields ().size (); ++i) {
105- const auto & field = type.fields ()[i];
106- current_field_id_ = field.field_id ();
107- current_path_.push_back (i);
108- ICEBERG_RETURN_UNEXPECTED (VisitTypeInline (*field.type (), this ));
109- current_path_.pop_back ();
110- }
111- return {};
112- }
113-
114- // Non-struct types are not supported yet, but it is not an error.
115- Status Visit (const ListType& type) { return {}; }
116- Status Visit (const MapType& type) { return {}; }
117-
118- std::unordered_map<int32_t , std::vector<size_t >> Finish () {
119- return std::move (position_path_);
120- }
84+ Status Visit (const PrimitiveType& type);
85+ Status Visit (const StructType& type);
86+ Status Visit (const ListType& type);
87+ Status Visit (const MapType& type);
88+ std::unordered_map<int32_t , std::vector<size_t >> Finish ();
12189
12290 private:
12391 constexpr static int32_t kUnassignedFieldId = -1 ;
@@ -137,83 +105,14 @@ class PositionPathVisitor {
137105class PruneColumnVisitor {
138106 public:
139107 PruneColumnVisitor (const std::unordered_set<int32_t >& selected_ids,
140- bool select_full_types)
141- : selected_ids_(selected_ids), select_full_types_(select_full_types) {}
142-
143- Result<std::shared_ptr<Type>> Visit (const std::shared_ptr<Type>& type) const {
144- switch (type->type_id ()) {
145- case TypeId::kStruct :
146- return Visit (internal::checked_pointer_cast<StructType>(type));
147- case TypeId::kList :
148- return Visit (internal::checked_pointer_cast<ListType>(type));
149- case TypeId::kMap :
150- return Visit (internal::checked_pointer_cast<MapType>(type));
151- default :
152- return nullptr ;
153- }
154- }
155-
156- Result<std::shared_ptr<Type>> Visit (const SchemaField& field) const {
157- if (selected_ids_.contains (field.field_id ())) {
158- return (select_full_types_ || field.type ()->is_primitive ()) ? field.type ()
159- : Visit (field.type ());
160- }
161- return Visit (field.type ());
162- }
108+ bool select_full_types);
163109
164- static SchemaField MakeField (const SchemaField& field, std::shared_ptr<Type> type) {
165- return {field.field_id (), std::string (field.name ()), std::move (type),
166- field.optional (), std::string (field.doc ())};
167- }
168-
169- Result<std::shared_ptr<Type>> Visit (const std::shared_ptr<StructType>& type) const {
170- bool same_types = true ;
171- std::vector<SchemaField> selected_fields;
172- for (const auto & field : type->fields ()) {
173- ICEBERG_ASSIGN_OR_RAISE (auto child_type, Visit (field));
174- if (child_type) {
175- same_types = same_types && (child_type == field.type ());
176- selected_fields.emplace_back (MakeField (field, std::move (child_type)));
177- }
178- }
179-
180- if (selected_fields.empty ()) {
181- return nullptr ;
182- } else if (same_types && selected_fields.size () == type->fields ().size ()) {
183- return type;
184- }
185- return std::make_shared<StructType>(std::move (selected_fields));
186- }
187-
188- Result<std::shared_ptr<Type>> Visit (const std::shared_ptr<ListType>& type) const {
189- const auto & elem_field = type->fields ()[0 ];
190- ICEBERG_ASSIGN_OR_RAISE (auto elem_type, Visit (elem_field));
191- if (elem_type == nullptr ) {
192- return nullptr ;
193- } else if (elem_type == elem_field.type ()) {
194- return type;
195- }
196- return std::make_shared<ListType>(MakeField (elem_field, std::move (elem_type)));
197- }
198-
199- Result<std::shared_ptr<Type>> Visit (const std::shared_ptr<MapType>& type) const {
200- const auto & key_field = type->fields ()[0 ];
201- const auto & value_field = type->fields ()[1 ];
202- ICEBERG_ASSIGN_OR_RAISE (auto key_type, Visit (key_field));
203- ICEBERG_ASSIGN_OR_RAISE (auto value_type, Visit (value_field));
204-
205- if (key_type == nullptr && value_type == nullptr ) {
206- return nullptr ;
207- } else if (value_type == value_field.type () &&
208- (key_type == key_field.type () || key_type == nullptr )) {
209- return type;
210- } else if (value_type == nullptr ) {
211- return InvalidArgument (" Cannot project Map without value field" );
212- }
213- return std::make_shared<MapType>(
214- (key_type == nullptr ? key_field : MakeField (key_field, std::move (key_type))),
215- MakeField (value_field, std::move (value_type)));
216- }
110+ Result<std::shared_ptr<Type>> Visit (const std::shared_ptr<Type>& type) const ;
111+ Result<std::shared_ptr<Type>> Visit (const SchemaField& field) const ;
112+ static SchemaField MakeField (const SchemaField& field, std::shared_ptr<Type> type);
113+ Result<std::shared_ptr<Type>> Visit (const std::shared_ptr<StructType>& type) const ;
114+ Result<std::shared_ptr<Type>> Visit (const std::shared_ptr<ListType>& type) const ;
115+ Result<std::shared_ptr<Type>> Visit (const std::shared_ptr<MapType>& type) const ;
217116
218117 private:
219118 const std::unordered_set<int32_t >& selected_ids_;
@@ -229,35 +128,6 @@ class PruneColumnVisitor {
229128// / - All field IDs must be unique across the entire schema hierarchy
230129// / If the struct is part of a Schema, these invariants are enforced by
231130// / StructType::InitFieldById which checks for duplicate field IDs.
232- static std::unordered_map<int32_t , int32_t > indexParents (const StructType& root_struct) {
233- std::unordered_map<int32_t , int32_t > id_to_parent;
234- std::stack<int32_t > parent_id_stack;
235-
236- // Recursive function to visit and build parent relationships
237- std::function<void (const Type&)> visit = [&](const Type& type) -> void {
238- switch (type.type_id ()) {
239- case TypeId::kStruct :
240- case TypeId::kList :
241- case TypeId::kMap : {
242- const auto & nested_type = static_cast <const NestedType&>(type);
243- for (const auto & field : nested_type.fields ()) {
244- if (!parent_id_stack.empty ()) {
245- id_to_parent[field.field_id ()] = parent_id_stack.top ();
246- }
247- parent_id_stack.push (field.field_id ());
248- visit (*field.type ());
249- parent_id_stack.pop ();
250- }
251- break ;
252- }
253-
254- default :
255- break ;
256- }
257- };
258-
259- visit (root_struct);
260- return id_to_parent;
261- }
131+ std::unordered_map<int32_t , int32_t > indexParents (const StructType& root_struct);
262132
263133} // namespace iceberg
0 commit comments