Skip to content

Commit 37e4775

Browse files
author
Innocent
committed
feat: add json serde for expressions
1 parent 43b83c5 commit 37e4775

4 files changed

Lines changed: 516 additions & 28 deletions

File tree

src/iceberg/expression/json_serde.cc

Lines changed: 273 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,25 @@
2727

2828
#include "iceberg/expression/json_serde_internal.h"
2929
#include "iceberg/expression/literal.h"
30+
#include "iceberg/expression/predicate.h"
31+
#include "iceberg/expression/term.h"
32+
#include "iceberg/transform.h"
3033
#include "iceberg/util/checked_cast.h"
3134
#include "iceberg/util/json_util_internal.h"
3235
#include "iceberg/util/macros.h"
36+
#include "iceberg/util/transform_util.h"
3337

3438
namespace iceberg {
3539
namespace {
40+
// JSON field names
41+
constexpr std::string_view kType = "type";
42+
constexpr std::string_view kTerm = "term";
43+
constexpr std::string_view kTransform = "transform";
44+
constexpr std::string_view kValue = "value";
45+
constexpr std::string_view kValues = "values";
46+
constexpr std::string_view kLeft = "left";
47+
constexpr std::string_view kRight = "right";
48+
constexpr std::string_view kChild = "child";
3649
// Expression type strings
3750
constexpr std::string_view kTypeTrue = "true";
3851
constexpr std::string_view kTypeFalse = "false";
@@ -58,6 +71,43 @@ constexpr std::string_view kTypeCountNull = "count-null";
5871
constexpr std::string_view kTypeCountStar = "count-star";
5972
constexpr std::string_view kTypeMin = "min";
6073
constexpr std::string_view kTypeMax = "max";
74+
75+
/// Helper to check if a JSON term represents a transform
76+
bool IsTransformTerm(const nlohmann::json& json) {
77+
return json.is_object() && json.contains(kType) && json[kType] == kTransform &&
78+
json.contains(kTerm);
79+
}
80+
81+
/// Template helper to create predicates from JSON with the appropriate term type
82+
template <typename B>
83+
Result<std::unique_ptr<UnboundPredicate>> MakePredicateFromJson(
84+
Expression::Operation op, std::shared_ptr<UnboundTerm<B>> term,
85+
const nlohmann::json& json) {
86+
if (IsUnaryOperation(op)) {
87+
return UnboundPredicateImpl<B>::Make(op, std::move(term));
88+
}
89+
90+
if (IsSetOperation(op)) {
91+
std::vector<Literal> literals;
92+
if (!json.contains(kValues) || !json[kValues].is_array()) [[unlikely]] {
93+
return JsonParseError("Missing or invalid 'values' field for set operation: {}",
94+
SafeDumpJson(json));
95+
}
96+
for (const auto& val : json[kValues]) {
97+
ICEBERG_ASSIGN_OR_RAISE(auto lit, LiteralFromJson(val));
98+
literals.push_back(std::move(lit));
99+
}
100+
return UnboundPredicateImpl<B>::Make(op, std::move(term), std::move(literals));
101+
}
102+
103+
// Literal predicate
104+
if (!json.contains(kValue)) [[unlikely]] {
105+
return JsonParseError("Missing 'value' field for literal predicate: {}",
106+
SafeDumpJson(json));
107+
}
108+
ICEBERG_ASSIGN_OR_RAISE(auto literal, LiteralFromJson(json[kValue]));
109+
return UnboundPredicateImpl<B>::Make(op, std::move(term), std::move(literal));
110+
}
61111
} // namespace
62112

63113
bool IsUnaryOperation(Expression::Operation op) {
@@ -83,7 +133,7 @@ bool IsSetOperation(Expression::Operation op) {
83133
}
84134

85135
Result<Expression::Operation> OperationTypeFromJson(const nlohmann::json& json) {
86-
if (!json.is_string()) {
136+
if (!json.is_string()) [[unlikely]] {
87137
return JsonParseError("Unable to create operation. Json value is not a string");
88138
}
89139
auto typeStr = json.get<std::string>();
@@ -123,27 +173,243 @@ nlohmann::json ToJson(Expression::Operation op) {
123173
return json;
124174
}
125175

176+
nlohmann::json ToJson(const NamedReference& ref) { return ref.name(); }
177+
178+
Result<std::unique_ptr<NamedReference>> NamedReferenceFromJson(
179+
const nlohmann::json& json) {
180+
if (!json.is_string()) [[unlikely]] {
181+
return JsonParseError("Expected string for named reference");
182+
}
183+
return NamedReference::Make(json.get<std::string>());
184+
}
185+
186+
nlohmann::json ToJson(const UnboundTransform& transform) {
187+
auto& mutable_transform = const_cast<UnboundTransform&>(transform);
188+
nlohmann::json json;
189+
json[kType] = kTransform;
190+
json[kTransform] = transform.transform()->ToString();
191+
json[kTerm] = mutable_transform.reference()->name();
192+
return json;
193+
}
194+
195+
Result<std::unique_ptr<UnboundTransform>> UnboundTransformFromJson(
196+
const nlohmann::json& json) {
197+
if (IsTransformTerm(json)) {
198+
ICEBERG_ASSIGN_OR_RAISE(auto transform_str,
199+
GetJsonValue<std::string>(json, kTransform));
200+
ICEBERG_ASSIGN_OR_RAISE(auto transform, TransformFromString(transform_str));
201+
ICEBERG_ASSIGN_OR_RAISE(auto ref, NamedReferenceFromJson(json[kTerm]));
202+
return UnboundTransform::Make(std::move(ref), std::move(transform));
203+
}
204+
return JsonParseError("Invalid unbound transform json: {}", SafeDumpJson(json));
205+
}
206+
207+
nlohmann::json ToJson(const Literal& literal) {
208+
if (literal.IsNull()) {
209+
return nullptr;
210+
}
211+
212+
const auto type_id = literal.type()->type_id();
213+
const auto& value = literal.value();
214+
215+
switch (type_id) {
216+
case TypeId::kBoolean:
217+
return std::get<bool>(value);
218+
case TypeId::kInt:
219+
return std::get<int32_t>(value);
220+
case TypeId::kDate:
221+
return TransformUtil::HumanDay(std::get<int32_t>(value));
222+
case TypeId::kLong:
223+
return std::get<int64_t>(value);
224+
case TypeId::kTime:
225+
return TransformUtil::HumanTime(std::get<int64_t>(value));
226+
case TypeId::kTimestamp:
227+
return TransformUtil::HumanTimestamp(std::get<int64_t>(value));
228+
case TypeId::kTimestampTz:
229+
return TransformUtil::HumanTimestampWithZone(std::get<int64_t>(value));
230+
case TypeId::kFloat:
231+
return std::get<float>(value);
232+
case TypeId::kDouble:
233+
return std::get<double>(value);
234+
case TypeId::kString:
235+
return std::get<std::string>(value);
236+
case TypeId::kBinary:
237+
case TypeId::kFixed: {
238+
const auto& bytes = std::get<std::vector<uint8_t>>(value);
239+
std::string hex;
240+
hex.reserve(bytes.size() * 2);
241+
for (uint8_t byte : bytes) {
242+
hex += std::format("{:02X}", byte);
243+
}
244+
return hex;
245+
}
246+
case TypeId::kDecimal: {
247+
return literal.ToString();
248+
}
249+
case TypeId::kUuid:
250+
return std::get<Uuid>(value).ToString();
251+
default:
252+
nlohmann::json json;
253+
return json;
254+
}
255+
}
256+
257+
Result<Literal> LiteralFromJson(const nlohmann::json& json) {
258+
if (json.is_null()) {
259+
return Literal::Null(nullptr);
260+
}
261+
if (json.is_boolean()) {
262+
return Literal::Boolean(json.get<bool>());
263+
}
264+
if (json.is_number_integer()) {
265+
return Literal::Long(json.get<int64_t>());
266+
}
267+
if (json.is_number_float()) {
268+
return Literal::Double(json.get<double>());
269+
}
270+
if (json.is_string()) {
271+
// All strings are returned as String literals.
272+
// Conversion to binary/date/time/etc. happens during binding
273+
// when schema type information is available.
274+
return Literal::String(json.get<std::string>());
275+
}
276+
return JsonParseError("Unsupported literal JSON type");
277+
}
278+
279+
nlohmann::json TermToJson(const Term& term) {
280+
switch (term.kind()) {
281+
case Term::Kind::kReference:
282+
return ToJson(static_cast<const NamedReference&>(term));
283+
case Term::Kind::kTransform:
284+
return ToJson(static_cast<const UnboundTransform&>(term));
285+
default:
286+
return nullptr;
287+
}
288+
}
289+
290+
nlohmann::json ToJson(const UnboundPredicate& pred) {
291+
nlohmann::json json;
292+
json[kType] = ToJson(pred.op());
293+
294+
// Get term and literals by casting to the appropriate impl type
295+
std::span<const Literal> literals;
296+
297+
if (auto* ref_pred = dynamic_cast<const UnboundPredicateImpl<BoundReference>*>(&pred)) {
298+
json[kTerm] = TermToJson(*ref_pred->term());
299+
literals = ref_pred->literals();
300+
} else if (auto* transform_pred =
301+
dynamic_cast<const UnboundPredicateImpl<BoundTransform>*>(&pred)) {
302+
json[kTerm] = TermToJson(*transform_pred->term());
303+
literals = transform_pred->literals();
304+
}
305+
306+
if (!IsUnaryOperation(pred.op())) {
307+
if (IsSetOperation(pred.op())) {
308+
nlohmann::json values = nlohmann::json::array();
309+
for (const auto& lit : literals) {
310+
values.push_back(ToJson(lit));
311+
}
312+
json[kValues] = std::move(values);
313+
} else if (!literals.empty()) {
314+
json[kValue] = ToJson(literals[0]);
315+
}
316+
}
317+
return json;
318+
}
319+
320+
Result<std::unique_ptr<UnboundPredicate>> UnboundPredicateFromJson(
321+
const nlohmann::json& json) {
322+
ICEBERG_ASSIGN_OR_RAISE(auto op, OperationTypeFromJson(json[kType]));
323+
324+
const auto& term_json = json[kTerm];
325+
326+
if (IsTransformTerm(term_json)) {
327+
ICEBERG_ASSIGN_OR_RAISE(auto term, UnboundTransformFromJson(term_json));
328+
return MakePredicateFromJson<BoundTransform>(op, std::move(term), json);
329+
}
330+
331+
ICEBERG_ASSIGN_OR_RAISE(auto term, NamedReferenceFromJson(term_json));
332+
return MakePredicateFromJson<BoundReference>(op, std::move(term), json);
333+
}
334+
126335
Result<std::shared_ptr<Expression>> ExpressionFromJson(const nlohmann::json& json) {
127-
// Handle boolean
336+
// Handle boolean constants
128337
if (json.is_boolean()) {
129338
return json.get<bool>()
130339
? internal::checked_pointer_cast<Expression>(True::Instance())
131340
: internal::checked_pointer_cast<Expression>(False::Instance());
132341
}
133-
return JsonParseError("Only booleans are currently supported.");
342+
343+
if (!json.is_object()) [[unlikely]] {
344+
return JsonParseError("Expression must be boolean or object");
345+
}
346+
347+
ICEBERG_ASSIGN_OR_RAISE(auto op, OperationTypeFromJson(json[kType]));
348+
349+
switch (op) {
350+
case Expression::Operation::kAnd: {
351+
if (!json.contains(kLeft) || !json.contains(kRight)) [[unlikely]] {
352+
return JsonParseError("AND expression missing 'left' or 'right' field");
353+
}
354+
ICEBERG_ASSIGN_OR_RAISE(auto left, ExpressionFromJson(json[kLeft]));
355+
ICEBERG_ASSIGN_OR_RAISE(auto right, ExpressionFromJson(json[kRight]));
356+
ICEBERG_ASSIGN_OR_RAISE(auto result, And::Make(std::move(left), std::move(right)));
357+
return std::shared_ptr<Expression>(std::move(result));
358+
}
359+
case Expression::Operation::kOr: {
360+
if (!json.contains(kLeft) || !json.contains(kRight)) [[unlikely]] {
361+
return JsonParseError("OR expression missing 'left' or 'right' field");
362+
}
363+
ICEBERG_ASSIGN_OR_RAISE(auto left, ExpressionFromJson(json[kLeft]));
364+
ICEBERG_ASSIGN_OR_RAISE(auto right, ExpressionFromJson(json[kRight]));
365+
ICEBERG_ASSIGN_OR_RAISE(auto result, Or::Make(std::move(left), std::move(right)));
366+
return std::shared_ptr<Expression>(std::move(result));
367+
}
368+
case Expression::Operation::kNot: {
369+
if (!json.contains(kChild)) [[unlikely]] {
370+
return JsonParseError("NOT expression missing 'child' field");
371+
}
372+
ICEBERG_ASSIGN_OR_RAISE(auto child, ExpressionFromJson(json[kChild]));
373+
ICEBERG_ASSIGN_OR_RAISE(auto result, Not::Make(std::move(child)));
374+
return std::shared_ptr<Expression>(std::move(result));
375+
}
376+
default:
377+
// All other operations are predicates
378+
return UnboundPredicateFromJson(json);
379+
}
134380
}
135381

136382
nlohmann::json ToJson(const Expression& expr) {
137383
switch (expr.op()) {
138384
case Expression::Operation::kTrue:
139385
return true;
140-
141386
case Expression::Operation::kFalse:
142387
return false;
388+
case Expression::Operation::kAnd: {
389+
const auto& and_expr = static_cast<const And&>(expr);
390+
nlohmann::json json;
391+
json[kType] = ToJson(expr.op());
392+
json[kLeft] = ToJson(*and_expr.left());
393+
json[kRight] = ToJson(*and_expr.right());
394+
return json;
395+
}
396+
case Expression::Operation::kOr: {
397+
const auto& or_expr = static_cast<const Or&>(expr);
398+
nlohmann::json json;
399+
json[kType] = ToJson(expr.op());
400+
json[kLeft] = ToJson(*or_expr.left());
401+
json[kRight] = ToJson(*or_expr.right());
402+
return json;
403+
}
404+
case Expression::Operation::kNot: {
405+
const auto& not_expr = static_cast<const Not&>(expr);
406+
nlohmann::json json;
407+
json[kType] = ToJson(expr.op());
408+
json[kChild] = ToJson(*not_expr.child());
409+
return json;
410+
}
143411
default:
144-
// TODO(evindj): This code will be removed as we implemented the full expression
145-
// serialization.
146-
ICEBERG_CHECK_OR_DIE(false, "Only booleans are currently supported.");
412+
return ToJson(dynamic_cast<const UnboundPredicate&>(expr));
147413
}
148414
}
149415

0 commit comments

Comments
 (0)