2727
2828#include " iceberg/expression/json_serde_internal.h"
2929#include " iceberg/expression/literal.h"
30+ #include " iceberg/expression/predicate.h"
31+ #include " iceberg/expression/term.h"
32+ #include " iceberg/transform.h"
3033#include " iceberg/util/checked_cast.h"
3134#include " iceberg/util/json_util_internal.h"
3235#include " iceberg/util/macros.h"
36+ #include " iceberg/util/transform_util.h"
3337
3438namespace iceberg {
3539namespace {
40+ // JSON field names
41+ constexpr std::string_view kType = " type" ;
42+ constexpr std::string_view kTerm = " term" ;
43+ constexpr std::string_view kTransform = " transform" ;
44+ constexpr std::string_view kValue = " value" ;
45+ constexpr std::string_view kValues = " values" ;
46+ constexpr std::string_view kLeft = " left" ;
47+ constexpr std::string_view kRight = " right" ;
48+ constexpr std::string_view kChild = " child" ;
3649// Expression type strings
3750constexpr std::string_view kTypeTrue = " true" ;
3851constexpr std::string_view kTypeFalse = " false" ;
@@ -58,6 +71,43 @@ constexpr std::string_view kTypeCountNull = "count-null";
5871constexpr std::string_view kTypeCountStar = " count-star" ;
5972constexpr std::string_view kTypeMin = " min" ;
6073constexpr std::string_view kTypeMax = " max" ;
74+
75+ // / Helper to check if a JSON term represents a transform
76+ bool IsTransformTerm (const nlohmann::json& json) {
77+ return json.is_object () && json.contains (kType ) && json[kType ] == kTransform &&
78+ json.contains (kTerm );
79+ }
80+
81+ // / Template helper to create predicates from JSON with the appropriate term type
82+ template <typename B>
83+ Result<std::unique_ptr<UnboundPredicate>> MakePredicateFromJson (
84+ Expression::Operation op, std::shared_ptr<UnboundTerm<B>> term,
85+ const nlohmann::json& json) {
86+ if (IsUnaryOperation (op)) {
87+ return UnboundPredicateImpl<B>::Make (op, std::move (term));
88+ }
89+
90+ if (IsSetOperation (op)) {
91+ std::vector<Literal> literals;
92+ if (!json.contains (kValues ) || !json[kValues ].is_array ()) [[unlikely]] {
93+ return JsonParseError (" Missing or invalid 'values' field for set operation: {}" ,
94+ SafeDumpJson (json));
95+ }
96+ for (const auto & val : json[kValues ]) {
97+ ICEBERG_ASSIGN_OR_RAISE (auto lit, LiteralFromJson (val));
98+ literals.push_back (std::move (lit));
99+ }
100+ return UnboundPredicateImpl<B>::Make (op, std::move (term), std::move (literals));
101+ }
102+
103+ // Literal predicate
104+ if (!json.contains (kValue )) [[unlikely]] {
105+ return JsonParseError (" Missing 'value' field for literal predicate: {}" ,
106+ SafeDumpJson (json));
107+ }
108+ ICEBERG_ASSIGN_OR_RAISE (auto literal, LiteralFromJson (json[kValue ]));
109+ return UnboundPredicateImpl<B>::Make (op, std::move (term), std::move (literal));
110+ }
61111} // namespace
62112
63113bool IsUnaryOperation (Expression::Operation op) {
@@ -83,7 +133,7 @@ bool IsSetOperation(Expression::Operation op) {
83133}
84134
85135Result<Expression::Operation> OperationTypeFromJson (const nlohmann::json& json) {
86- if (!json.is_string ()) {
136+ if (!json.is_string ()) [[unlikely]] {
87137 return JsonParseError (" Unable to create operation. Json value is not a string" );
88138 }
89139 auto typeStr = json.get <std::string>();
@@ -123,27 +173,243 @@ nlohmann::json ToJson(Expression::Operation op) {
123173 return json;
124174}
125175
176+ nlohmann::json ToJson (const NamedReference& ref) { return ref.name (); }
177+
178+ Result<std::unique_ptr<NamedReference>> NamedReferenceFromJson (
179+ const nlohmann::json& json) {
180+ if (!json.is_string ()) [[unlikely]] {
181+ return JsonParseError (" Expected string for named reference" );
182+ }
183+ return NamedReference::Make (json.get <std::string>());
184+ }
185+
186+ nlohmann::json ToJson (const UnboundTransform& transform) {
187+ auto & mutable_transform = const_cast <UnboundTransform&>(transform);
188+ nlohmann::json json;
189+ json[kType ] = kTransform ;
190+ json[kTransform ] = transform.transform ()->ToString ();
191+ json[kTerm ] = mutable_transform.reference ()->name ();
192+ return json;
193+ }
194+
195+ Result<std::unique_ptr<UnboundTransform>> UnboundTransformFromJson (
196+ const nlohmann::json& json) {
197+ if (IsTransformTerm (json)) {
198+ ICEBERG_ASSIGN_OR_RAISE (auto transform_str,
199+ GetJsonValue<std::string>(json, kTransform ));
200+ ICEBERG_ASSIGN_OR_RAISE (auto transform, TransformFromString (transform_str));
201+ ICEBERG_ASSIGN_OR_RAISE (auto ref, NamedReferenceFromJson (json[kTerm ]));
202+ return UnboundTransform::Make (std::move (ref), std::move (transform));
203+ }
204+ return JsonParseError (" Invalid unbound transform json: {}" , SafeDumpJson (json));
205+ }
206+
207+ nlohmann::json ToJson (const Literal& literal) {
208+ if (literal.IsNull ()) {
209+ return nullptr ;
210+ }
211+
212+ const auto type_id = literal.type ()->type_id ();
213+ const auto & value = literal.value ();
214+
215+ switch (type_id) {
216+ case TypeId::kBoolean :
217+ return std::get<bool >(value);
218+ case TypeId::kInt :
219+ return std::get<int32_t >(value);
220+ case TypeId::kDate :
221+ return TransformUtil::HumanDay (std::get<int32_t >(value));
222+ case TypeId::kLong :
223+ return std::get<int64_t >(value);
224+ case TypeId::kTime :
225+ return TransformUtil::HumanTime (std::get<int64_t >(value));
226+ case TypeId::kTimestamp :
227+ return TransformUtil::HumanTimestamp (std::get<int64_t >(value));
228+ case TypeId::kTimestampTz :
229+ return TransformUtil::HumanTimestampWithZone (std::get<int64_t >(value));
230+ case TypeId::kFloat :
231+ return std::get<float >(value);
232+ case TypeId::kDouble :
233+ return std::get<double >(value);
234+ case TypeId::kString :
235+ return std::get<std::string>(value);
236+ case TypeId::kBinary :
237+ case TypeId::kFixed : {
238+ const auto & bytes = std::get<std::vector<uint8_t >>(value);
239+ std::string hex;
240+ hex.reserve (bytes.size () * 2 );
241+ for (uint8_t byte : bytes) {
242+ hex += std::format (" {:02X}" , byte);
243+ }
244+ return hex;
245+ }
246+ case TypeId::kDecimal : {
247+ return literal.ToString ();
248+ }
249+ case TypeId::kUuid :
250+ return std::get<Uuid>(value).ToString ();
251+ default :
252+ nlohmann::json json;
253+ return json;
254+ }
255+ }
256+
257+ Result<Literal> LiteralFromJson (const nlohmann::json& json) {
258+ if (json.is_null ()) {
259+ return Literal::Null (nullptr );
260+ }
261+ if (json.is_boolean ()) {
262+ return Literal::Boolean (json.get <bool >());
263+ }
264+ if (json.is_number_integer ()) {
265+ return Literal::Long (json.get <int64_t >());
266+ }
267+ if (json.is_number_float ()) {
268+ return Literal::Double (json.get <double >());
269+ }
270+ if (json.is_string ()) {
271+ // All strings are returned as String literals.
272+ // Conversion to binary/date/time/etc. happens during binding
273+ // when schema type information is available.
274+ return Literal::String (json.get <std::string>());
275+ }
276+ return JsonParseError (" Unsupported literal JSON type" );
277+ }
278+
279+ nlohmann::json TermToJson (const Term& term) {
280+ switch (term.kind ()) {
281+ case Term::Kind::kReference :
282+ return ToJson (static_cast <const NamedReference&>(term));
283+ case Term::Kind::kTransform :
284+ return ToJson (static_cast <const UnboundTransform&>(term));
285+ default :
286+ return nullptr ;
287+ }
288+ }
289+
290+ nlohmann::json ToJson (const UnboundPredicate& pred) {
291+ nlohmann::json json;
292+ json[kType ] = ToJson (pred.op ());
293+
294+ // Get term and literals by casting to the appropriate impl type
295+ std::span<const Literal> literals;
296+
297+ if (auto * ref_pred = dynamic_cast <const UnboundPredicateImpl<BoundReference>*>(&pred)) {
298+ json[kTerm ] = TermToJson (*ref_pred->term ());
299+ literals = ref_pred->literals ();
300+ } else if (auto * transform_pred =
301+ dynamic_cast <const UnboundPredicateImpl<BoundTransform>*>(&pred)) {
302+ json[kTerm ] = TermToJson (*transform_pred->term ());
303+ literals = transform_pred->literals ();
304+ }
305+
306+ if (!IsUnaryOperation (pred.op ())) {
307+ if (IsSetOperation (pred.op ())) {
308+ nlohmann::json values = nlohmann::json::array ();
309+ for (const auto & lit : literals) {
310+ values.push_back (ToJson (lit));
311+ }
312+ json[kValues ] = std::move (values);
313+ } else if (!literals.empty ()) {
314+ json[kValue ] = ToJson (literals[0 ]);
315+ }
316+ }
317+ return json;
318+ }
319+
320+ Result<std::unique_ptr<UnboundPredicate>> UnboundPredicateFromJson (
321+ const nlohmann::json& json) {
322+ ICEBERG_ASSIGN_OR_RAISE (auto op, OperationTypeFromJson (json[kType ]));
323+
324+ const auto & term_json = json[kTerm ];
325+
326+ if (IsTransformTerm (term_json)) {
327+ ICEBERG_ASSIGN_OR_RAISE (auto term, UnboundTransformFromJson (term_json));
328+ return MakePredicateFromJson<BoundTransform>(op, std::move (term), json);
329+ }
330+
331+ ICEBERG_ASSIGN_OR_RAISE (auto term, NamedReferenceFromJson (term_json));
332+ return MakePredicateFromJson<BoundReference>(op, std::move (term), json);
333+ }
334+
126335Result<std::shared_ptr<Expression>> ExpressionFromJson (const nlohmann::json& json) {
127- // Handle boolean
336+ // Handle boolean constants
128337 if (json.is_boolean ()) {
129338 return json.get <bool >()
130339 ? internal::checked_pointer_cast<Expression>(True::Instance ())
131340 : internal::checked_pointer_cast<Expression>(False::Instance ());
132341 }
133- return JsonParseError (" Only booleans are currently supported." );
342+
343+ if (!json.is_object ()) [[unlikely]] {
344+ return JsonParseError (" Expression must be boolean or object" );
345+ }
346+
347+ ICEBERG_ASSIGN_OR_RAISE (auto op, OperationTypeFromJson (json[kType ]));
348+
349+ switch (op) {
350+ case Expression::Operation::kAnd : {
351+ if (!json.contains (kLeft ) || !json.contains (kRight )) [[unlikely]] {
352+ return JsonParseError (" AND expression missing 'left' or 'right' field" );
353+ }
354+ ICEBERG_ASSIGN_OR_RAISE (auto left, ExpressionFromJson (json[kLeft ]));
355+ ICEBERG_ASSIGN_OR_RAISE (auto right, ExpressionFromJson (json[kRight ]));
356+ ICEBERG_ASSIGN_OR_RAISE (auto result, And::Make (std::move (left), std::move (right)));
357+ return std::shared_ptr<Expression>(std::move (result));
358+ }
359+ case Expression::Operation::kOr : {
360+ if (!json.contains (kLeft ) || !json.contains (kRight )) [[unlikely]] {
361+ return JsonParseError (" OR expression missing 'left' or 'right' field" );
362+ }
363+ ICEBERG_ASSIGN_OR_RAISE (auto left, ExpressionFromJson (json[kLeft ]));
364+ ICEBERG_ASSIGN_OR_RAISE (auto right, ExpressionFromJson (json[kRight ]));
365+ ICEBERG_ASSIGN_OR_RAISE (auto result, Or::Make (std::move (left), std::move (right)));
366+ return std::shared_ptr<Expression>(std::move (result));
367+ }
368+ case Expression::Operation::kNot : {
369+ if (!json.contains (kChild )) [[unlikely]] {
370+ return JsonParseError (" NOT expression missing 'child' field" );
371+ }
372+ ICEBERG_ASSIGN_OR_RAISE (auto child, ExpressionFromJson (json[kChild ]));
373+ ICEBERG_ASSIGN_OR_RAISE (auto result, Not::Make (std::move (child)));
374+ return std::shared_ptr<Expression>(std::move (result));
375+ }
376+ default :
377+ // All other operations are predicates
378+ return UnboundPredicateFromJson (json);
379+ }
134380}
135381
136382nlohmann::json ToJson (const Expression& expr) {
137383 switch (expr.op ()) {
138384 case Expression::Operation::kTrue :
139385 return true ;
140-
141386 case Expression::Operation::kFalse :
142387 return false ;
388+ case Expression::Operation::kAnd : {
389+ const auto & and_expr = static_cast <const And&>(expr);
390+ nlohmann::json json;
391+ json[kType ] = ToJson (expr.op ());
392+ json[kLeft ] = ToJson (*and_expr.left ());
393+ json[kRight ] = ToJson (*and_expr.right ());
394+ return json;
395+ }
396+ case Expression::Operation::kOr : {
397+ const auto & or_expr = static_cast <const Or&>(expr);
398+ nlohmann::json json;
399+ json[kType ] = ToJson (expr.op ());
400+ json[kLeft ] = ToJson (*or_expr.left ());
401+ json[kRight ] = ToJson (*or_expr.right ());
402+ return json;
403+ }
404+ case Expression::Operation::kNot : {
405+ const auto & not_expr = static_cast <const Not&>(expr);
406+ nlohmann::json json;
407+ json[kType ] = ToJson (expr.op ());
408+ json[kChild ] = ToJson (*not_expr.child ());
409+ return json;
410+ }
143411 default :
144- // TODO(evindj): This code will be removed as we implemented the full expression
145- // serialization.
146- ICEBERG_CHECK_OR_DIE (false , " Only booleans are currently supported." );
412+ return ToJson (dynamic_cast <const UnboundPredicate&>(expr));
147413 }
148414}
149415
0 commit comments