1717 * under the License.
1818 */
1919
20- #include < format>
2120#include < ranges>
2221#include < string>
23- #include < utility>
2422#include < vector>
2523
2624#include < nlohmann/json.hpp>
2725
2826#include " iceberg/expression/json_serde_internal.h"
2927#include " iceberg/expression/literal.h"
28+ #include " iceberg/expression/predicate.h"
29+ #include " iceberg/expression/term.h"
30+ #include " iceberg/transform.h"
3031#include " iceberg/util/checked_cast.h"
3132#include " iceberg/util/json_util_internal.h"
3233#include " iceberg/util/macros.h"
34+ #include " iceberg/util/transform_util.h"
3335
3436namespace iceberg {
3537namespace {
38+ // JSON field names
39+ constexpr std::string_view kType = " type" ;
40+ constexpr std::string_view kTerm = " term" ;
41+ constexpr std::string_view kTransform = " transform" ;
42+ constexpr std::string_view kValue = " value" ;
43+ constexpr std::string_view kValues = " values" ;
44+ constexpr std::string_view kLeft = " left" ;
45+ constexpr std::string_view kRight = " right" ;
46+ constexpr std::string_view kChild = " child" ;
3647// Expression type strings
3748constexpr std::string_view kTypeTrue = " true" ;
3849constexpr std::string_view kTypeFalse = " false" ;
@@ -58,6 +69,53 @@ constexpr std::string_view kTypeCountNull = "count-null";
5869constexpr std::string_view kTypeCountStar = " count-star" ;
5970constexpr std::string_view kTypeMin = " min" ;
6071constexpr std::string_view kTypeMax = " max" ;
72+ constexpr std::string_view kTypeLiteral = " literal" ;
73+ constexpr std::string_view kTypeReference = " reference" ;
74+
75+ // / Helper to check if a JSON term represents a transform
76+ bool IsTransformTerm (const nlohmann::json& json) {
77+ return json.is_object () && json.contains (kType ) &&
78+ json[kType ].get <std::string>() == kTransform && json.contains (kTerm );
79+ }
80+
81+ // / Template helper to create predicates from JSON with the appropriate term type
82+ template <typename B>
83+ Result<std::unique_ptr<UnboundPredicate>> MakePredicateFromJson (
84+ Expression::Operation op, std::shared_ptr<UnboundTerm<B>> term,
85+ const nlohmann::json& json) {
86+ if (IsUnaryOperation (op)) {
87+ if (json.contains (kValue )) [[unlikely]] {
88+ return JsonParseError (" Unary predicate has invalid 'value' field: {}" ,
89+ SafeDumpJson (json));
90+ }
91+ if (json.contains (kValues )) [[unlikely]] {
92+ return JsonParseError (" Unary predicate has invalid 'values' field: {}" ,
93+ SafeDumpJson (json));
94+ }
95+ return UnboundPredicateImpl<B>::Make (op, std::move (term));
96+ }
97+
98+ if (IsSetOperation (op)) {
99+ std::vector<Literal> literals;
100+ if (!json.contains (kValues ) || !json[kValues ].is_array ()) [[unlikely]] {
101+ return JsonParseError (" Missing or invalid 'values' field for set operation: {}" ,
102+ SafeDumpJson (json));
103+ }
104+ for (const auto & val : json[kValues ]) {
105+ ICEBERG_ASSIGN_OR_RAISE (auto lit, LiteralFromJson (val));
106+ literals.push_back (std::move (lit));
107+ }
108+ return UnboundPredicateImpl<B>::Make (op, std::move (term), std::move (literals));
109+ }
110+
111+ // Literal predicate
112+ if (!json.contains (kValue )) [[unlikely]] {
113+ return JsonParseError (" Missing 'value' field for literal predicate: {}" ,
114+ SafeDumpJson (json));
115+ }
116+ ICEBERG_ASSIGN_OR_RAISE (auto literal, LiteralFromJson (json[kValue ]));
117+ return UnboundPredicateImpl<B>::Make (op, std::move (term), std::move (literal));
118+ }
61119} // namespace
62120
63121bool IsUnaryOperation (Expression::Operation op) {
@@ -83,7 +141,7 @@ bool IsSetOperation(Expression::Operation op) {
83141}
84142
85143Result<Expression::Operation> OperationTypeFromJson (const nlohmann::json& json) {
86- if (!json.is_string ()) {
144+ if (!json.is_string ()) [[unlikely]] {
87145 return JsonParseError (" Unable to create operation. Json value is not a string" );
88146 }
89147 auto typeStr = json.get <std::string>();
@@ -123,27 +181,252 @@ nlohmann::json ToJson(Expression::Operation op) {
123181 return json;
124182}
125183
184+ nlohmann::json ToJson (const NamedReference& ref) { return ref.name (); }
185+
186+ Result<std::unique_ptr<NamedReference>> NamedReferenceFromJson (
187+ const nlohmann::json& json) {
188+ if (json.is_object () && json.contains (kType ) &&
189+ json[kType ].get <std::string>() == kTypeReference && json.contains (kTerm )) {
190+ return NamedReference::Make (json[kTerm ].get <std::string>());
191+ }
192+ if (!json.is_string ()) [[unlikely]] {
193+ return JsonParseError (" Expected string for named reference" );
194+ }
195+ return NamedReference::Make (json.get <std::string>());
196+ }
197+
198+ nlohmann::json ToJson (const UnboundTransform& transform) {
199+ auto & mutable_transform = const_cast <UnboundTransform&>(transform);
200+ nlohmann::json json;
201+ json[kType ] = kTransform ;
202+ json[kTransform ] = transform.transform ()->ToString ();
203+ json[kTerm ] = mutable_transform.reference ()->name ();
204+ return json;
205+ }
206+
207+ Result<std::unique_ptr<UnboundTransform>> UnboundTransformFromJson (
208+ const nlohmann::json& json) {
209+ if (IsTransformTerm (json)) {
210+ ICEBERG_ASSIGN_OR_RAISE (auto transform_str,
211+ GetJsonValue<std::string>(json, kTransform ));
212+ ICEBERG_ASSIGN_OR_RAISE (auto transform, TransformFromString (transform_str));
213+ ICEBERG_ASSIGN_OR_RAISE (auto ref, NamedReferenceFromJson (json[kTerm ]));
214+ return UnboundTransform::Make (std::move (ref), std::move (transform));
215+ }
216+ return JsonParseError (" Invalid unbound transform json: {}" , SafeDumpJson (json));
217+ }
218+
219+ nlohmann::json ToJson (const Literal& literal) {
220+ if (literal.IsNull ()) {
221+ return nullptr ;
222+ }
223+
224+ const auto type_id = literal.type ()->type_id ();
225+ const auto & value = literal.value ();
226+
227+ switch (type_id) {
228+ case TypeId::kBoolean :
229+ return std::get<bool >(value);
230+ case TypeId::kInt :
231+ return std::get<int32_t >(value);
232+ case TypeId::kDate :
233+ return TransformUtil::HumanDay (std::get<int32_t >(value));
234+ case TypeId::kLong :
235+ return std::get<int64_t >(value);
236+ case TypeId::kTime :
237+ return TransformUtil::HumanTime (std::get<int64_t >(value));
238+ case TypeId::kTimestamp :
239+ return TransformUtil::HumanTimestamp (std::get<int64_t >(value));
240+ case TypeId::kTimestampTz :
241+ return TransformUtil::HumanTimestampWithZone (std::get<int64_t >(value));
242+ case TypeId::kFloat :
243+ return std::get<float >(value);
244+ case TypeId::kDouble :
245+ return std::get<double >(value);
246+ case TypeId::kString :
247+ return std::get<std::string>(value);
248+ case TypeId::kBinary :
249+ case TypeId::kFixed : {
250+ const auto & bytes = std::get<std::vector<uint8_t >>(value);
251+ std::string hex;
252+ hex.reserve (bytes.size () * 2 );
253+ for (uint8_t byte : bytes) {
254+ hex += std::format (" {:02X}" , byte);
255+ }
256+ return hex;
257+ }
258+ case TypeId::kDecimal : {
259+ return literal.ToString ();
260+ }
261+ case TypeId::kUuid :
262+ return std::get<Uuid>(value).ToString ();
263+ default :
264+ nlohmann::json json;
265+ return json;
266+ }
267+ }
268+
269+ Result<Literal> LiteralFromJson (const nlohmann::json& json) {
270+ // Unwrap {"type": "literal", "value": <actual>} wrapper
271+ if (json.is_object () && json.contains (kType ) &&
272+ json[kType ].get <std::string>() == kTypeLiteral && json.contains (kValue )) {
273+ return LiteralFromJson (json[kValue ]);
274+ }
275+ if (json.is_null ()) {
276+ return Literal::Null (nullptr );
277+ }
278+ if (json.is_boolean ()) {
279+ return Literal::Boolean (json.get <bool >());
280+ }
281+ if (json.is_number_integer ()) {
282+ return Literal::Long (json.get <int64_t >());
283+ }
284+ if (json.is_number_float ()) {
285+ return Literal::Double (json.get <double >());
286+ }
287+ if (json.is_string ()) {
288+ // All strings are returned as String literals.
289+ // Conversion to binary/date/time/etc. happens during binding
290+ // when schema type information is available.
291+ return Literal::String (json.get <std::string>());
292+ }
293+ return JsonParseError (" Unsupported literal JSON type" );
294+ }
295+
296+ nlohmann::json TermToJson (const Term& term) {
297+ switch (term.kind ()) {
298+ case Term::Kind::kReference :
299+ return ToJson (static_cast <const NamedReference&>(term));
300+ case Term::Kind::kTransform :
301+ return ToJson (static_cast <const UnboundTransform&>(term));
302+ default :
303+ return nullptr ;
304+ }
305+ }
306+
307+ nlohmann::json ToJson (const UnboundPredicate& pred) {
308+ nlohmann::json json;
309+ json[kType ] = ToJson (pred.op ());
310+
311+ // Get term and literals by casting to the appropriate impl type
312+ std::span<const Literal> literals;
313+
314+ if (auto * ref_pred = dynamic_cast <const UnboundPredicateImpl<BoundReference>*>(&pred)) {
315+ json[kTerm ] = TermToJson (*ref_pred->term ());
316+ literals = ref_pred->literals ();
317+ } else if (auto * transform_pred =
318+ dynamic_cast <const UnboundPredicateImpl<BoundTransform>*>(&pred)) {
319+ json[kTerm ] = TermToJson (*transform_pred->term ());
320+ literals = transform_pred->literals ();
321+ }
322+
323+ if (!IsUnaryOperation (pred.op ())) {
324+ if (IsSetOperation (pred.op ())) {
325+ nlohmann::json values = nlohmann::json::array ();
326+ for (const auto & lit : literals) {
327+ values.push_back (ToJson (lit));
328+ }
329+ json[kValues ] = std::move (values);
330+ } else if (!literals.empty ()) {
331+ json[kValue ] = ToJson (literals[0 ]);
332+ }
333+ }
334+ return json;
335+ }
336+
337+ Result<std::unique_ptr<UnboundPredicate>> UnboundPredicateFromJson (
338+ const nlohmann::json& json) {
339+ ICEBERG_ASSIGN_OR_RAISE (auto op, OperationTypeFromJson (json[kType ]));
340+
341+ const auto & term_json = json[kTerm ];
342+
343+ if (IsTransformTerm (term_json)) {
344+ ICEBERG_ASSIGN_OR_RAISE (auto term, UnboundTransformFromJson (term_json));
345+ return MakePredicateFromJson<BoundTransform>(op, std::move (term), json);
346+ }
347+
348+ ICEBERG_ASSIGN_OR_RAISE (auto term, NamedReferenceFromJson (term_json));
349+ return MakePredicateFromJson<BoundReference>(op, std::move (term), json);
350+ }
351+
126352Result<std::shared_ptr<Expression>> ExpressionFromJson (const nlohmann::json& json) {
127- // Handle boolean
353+ // Handle boolean constants
128354 if (json.is_boolean ()) {
129355 return json.get <bool >()
130356 ? internal::checked_pointer_cast<Expression>(True::Instance ())
131357 : internal::checked_pointer_cast<Expression>(False::Instance ());
132358 }
133- return JsonParseError (" Only booleans are currently supported." );
359+
360+ if (!json.is_object ()) [[unlikely]] {
361+ return JsonParseError (" Expression must be boolean or object" );
362+ }
363+
364+ ICEBERG_ASSIGN_OR_RAISE (auto op, OperationTypeFromJson (json[kType ]));
365+
366+ switch (op) {
367+ case Expression::Operation::kAnd : {
368+ if (!json.contains (kLeft ) || !json.contains (kRight )) [[unlikely]] {
369+ return JsonParseError (" AND expression missing 'left' or 'right' field" );
370+ }
371+ ICEBERG_ASSIGN_OR_RAISE (auto left, ExpressionFromJson (json[kLeft ]));
372+ ICEBERG_ASSIGN_OR_RAISE (auto right, ExpressionFromJson (json[kRight ]));
373+ ICEBERG_ASSIGN_OR_RAISE (auto result, And::Make (std::move (left), std::move (right)));
374+ return std::shared_ptr<Expression>(std::move (result));
375+ }
376+ case Expression::Operation::kOr : {
377+ if (!json.contains (kLeft ) || !json.contains (kRight )) [[unlikely]] {
378+ return JsonParseError (" OR expression missing 'left' or 'right' field" );
379+ }
380+ ICEBERG_ASSIGN_OR_RAISE (auto left, ExpressionFromJson (json[kLeft ]));
381+ ICEBERG_ASSIGN_OR_RAISE (auto right, ExpressionFromJson (json[kRight ]));
382+ ICEBERG_ASSIGN_OR_RAISE (auto result, Or::Make (std::move (left), std::move (right)));
383+ return std::shared_ptr<Expression>(std::move (result));
384+ }
385+ case Expression::Operation::kNot : {
386+ if (!json.contains (kChild )) [[unlikely]] {
387+ return JsonParseError (" NOT expression missing 'child' field" );
388+ }
389+ ICEBERG_ASSIGN_OR_RAISE (auto child, ExpressionFromJson (json[kChild ]));
390+ ICEBERG_ASSIGN_OR_RAISE (auto result, Not::Make (std::move (child)));
391+ return std::shared_ptr<Expression>(std::move (result));
392+ }
393+ default :
394+ // All other operations are predicates
395+ return UnboundPredicateFromJson (json);
396+ }
134397}
135398
136399nlohmann::json ToJson (const Expression& expr) {
137400 switch (expr.op ()) {
138401 case Expression::Operation::kTrue :
139402 return true ;
140-
141403 case Expression::Operation::kFalse :
142404 return false ;
405+ case Expression::Operation::kAnd : {
406+ const auto & and_expr = static_cast <const And&>(expr);
407+ nlohmann::json json;
408+ json[kType ] = ToJson (expr.op ());
409+ json[kLeft ] = ToJson (*and_expr.left ());
410+ json[kRight ] = ToJson (*and_expr.right ());
411+ return json;
412+ }
413+ case Expression::Operation::kOr : {
414+ const auto & or_expr = static_cast <const Or&>(expr);
415+ nlohmann::json json;
416+ json[kType ] = ToJson (expr.op ());
417+ json[kLeft ] = ToJson (*or_expr.left ());
418+ json[kRight ] = ToJson (*or_expr.right ());
419+ return json;
420+ }
421+ case Expression::Operation::kNot : {
422+ const auto & not_expr = static_cast <const Not&>(expr);
423+ nlohmann::json json;
424+ json[kType ] = ToJson (expr.op ());
425+ json[kChild ] = ToJson (*not_expr.child ());
426+ return json;
427+ }
143428 default :
144- // TODO(evindj): This code will be removed as we implemented the full expression
145- // serialization.
146- ICEBERG_CHECK_OR_DIE (false , " Only booleans are currently supported." );
429+ return ToJson (dynamic_cast <const UnboundPredicate&>(expr));
147430 }
148431}
149432
0 commit comments