Skip to content

Commit 78d3930

Browse files
author
Innocent
committed
feat: add json serde for expressions
1 parent 721e529 commit 78d3930

File tree

4 files changed

+549
-35
lines changed

4 files changed

+549
-35
lines changed

src/iceberg/expression/json_serde.cc

Lines changed: 292 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,22 +17,33 @@
1717
* under the License.
1818
*/
1919

20-
#include <format>
2120
#include <ranges>
2221
#include <string>
23-
#include <utility>
2422
#include <vector>
2523

2624
#include <nlohmann/json.hpp>
2725

2826
#include "iceberg/expression/json_serde_internal.h"
2927
#include "iceberg/expression/literal.h"
28+
#include "iceberg/expression/predicate.h"
29+
#include "iceberg/expression/term.h"
30+
#include "iceberg/transform.h"
3031
#include "iceberg/util/checked_cast.h"
3132
#include "iceberg/util/json_util_internal.h"
3233
#include "iceberg/util/macros.h"
34+
#include "iceberg/util/transform_util.h"
3335

3436
namespace iceberg {
3537
namespace {
38+
// JSON field names
39+
constexpr std::string_view kType = "type";
40+
constexpr std::string_view kTerm = "term";
41+
constexpr std::string_view kTransform = "transform";
42+
constexpr std::string_view kValue = "value";
43+
constexpr std::string_view kValues = "values";
44+
constexpr std::string_view kLeft = "left";
45+
constexpr std::string_view kRight = "right";
46+
constexpr std::string_view kChild = "child";
3647
// Expression type strings
3748
constexpr std::string_view kTypeTrue = "true";
3849
constexpr std::string_view kTypeFalse = "false";
@@ -58,6 +69,53 @@ constexpr std::string_view kTypeCountNull = "count-null";
5869
constexpr std::string_view kTypeCountStar = "count-star";
5970
constexpr std::string_view kTypeMin = "min";
6071
constexpr std::string_view kTypeMax = "max";
72+
constexpr std::string_view kTypeLiteral = "literal";
73+
constexpr std::string_view kTypeReference = "reference";
74+
75+
/// Helper to check if a JSON term represents a transform
76+
bool IsTransformTerm(const nlohmann::json& json) {
77+
return json.is_object() && json.contains(kType) &&
78+
json[kType].get<std::string>() == kTransform && json.contains(kTerm);
79+
}
80+
81+
/// Template helper to create predicates from JSON with the appropriate term type
82+
template <typename B>
83+
Result<std::unique_ptr<UnboundPredicate>> MakePredicateFromJson(
84+
Expression::Operation op, std::shared_ptr<UnboundTerm<B>> term,
85+
const nlohmann::json& json) {
86+
if (IsUnaryOperation(op)) {
87+
if (json.contains(kValue)) [[unlikely]] {
88+
return JsonParseError("Unary predicate has invalid 'value' field: {}",
89+
SafeDumpJson(json));
90+
}
91+
if (json.contains(kValues)) [[unlikely]] {
92+
return JsonParseError("Unary predicate has invalid 'values' field: {}",
93+
SafeDumpJson(json));
94+
}
95+
return UnboundPredicateImpl<B>::Make(op, std::move(term));
96+
}
97+
98+
if (IsSetOperation(op)) {
99+
std::vector<Literal> literals;
100+
if (!json.contains(kValues) || !json[kValues].is_array()) [[unlikely]] {
101+
return JsonParseError("Missing or invalid 'values' field for set operation: {}",
102+
SafeDumpJson(json));
103+
}
104+
for (const auto& val : json[kValues]) {
105+
ICEBERG_ASSIGN_OR_RAISE(auto lit, LiteralFromJson(val));
106+
literals.push_back(std::move(lit));
107+
}
108+
return UnboundPredicateImpl<B>::Make(op, std::move(term), std::move(literals));
109+
}
110+
111+
// Literal predicate
112+
if (!json.contains(kValue)) [[unlikely]] {
113+
return JsonParseError("Missing 'value' field for literal predicate: {}",
114+
SafeDumpJson(json));
115+
}
116+
ICEBERG_ASSIGN_OR_RAISE(auto literal, LiteralFromJson(json[kValue]));
117+
return UnboundPredicateImpl<B>::Make(op, std::move(term), std::move(literal));
118+
}
61119
} // namespace
62120

63121
bool IsUnaryOperation(Expression::Operation op) {
@@ -83,7 +141,7 @@ bool IsSetOperation(Expression::Operation op) {
83141
}
84142

85143
Result<Expression::Operation> OperationTypeFromJson(const nlohmann::json& json) {
86-
if (!json.is_string()) {
144+
if (!json.is_string()) [[unlikely]] {
87145
return JsonParseError("Unable to create operation. Json value is not a string");
88146
}
89147
auto typeStr = json.get<std::string>();
@@ -123,27 +181,252 @@ nlohmann::json ToJson(Expression::Operation op) {
123181
return json;
124182
}
125183

184+
nlohmann::json ToJson(const NamedReference& ref) { return ref.name(); }
185+
186+
Result<std::unique_ptr<NamedReference>> NamedReferenceFromJson(
187+
const nlohmann::json& json) {
188+
if (json.is_object() && json.contains(kType) &&
189+
json[kType].get<std::string>() == kTypeReference && json.contains(kTerm)) {
190+
return NamedReference::Make(json[kTerm].get<std::string>());
191+
}
192+
if (!json.is_string()) [[unlikely]] {
193+
return JsonParseError("Expected string for named reference");
194+
}
195+
return NamedReference::Make(json.get<std::string>());
196+
}
197+
198+
nlohmann::json ToJson(const UnboundTransform& transform) {
199+
auto& mutable_transform = const_cast<UnboundTransform&>(transform);
200+
nlohmann::json json;
201+
json[kType] = kTransform;
202+
json[kTransform] = transform.transform()->ToString();
203+
json[kTerm] = mutable_transform.reference()->name();
204+
return json;
205+
}
206+
207+
Result<std::unique_ptr<UnboundTransform>> UnboundTransformFromJson(
208+
const nlohmann::json& json) {
209+
if (IsTransformTerm(json)) {
210+
ICEBERG_ASSIGN_OR_RAISE(auto transform_str,
211+
GetJsonValue<std::string>(json, kTransform));
212+
ICEBERG_ASSIGN_OR_RAISE(auto transform, TransformFromString(transform_str));
213+
ICEBERG_ASSIGN_OR_RAISE(auto ref, NamedReferenceFromJson(json[kTerm]));
214+
return UnboundTransform::Make(std::move(ref), std::move(transform));
215+
}
216+
return JsonParseError("Invalid unbound transform json: {}", SafeDumpJson(json));
217+
}
218+
219+
nlohmann::json ToJson(const Literal& literal) {
220+
if (literal.IsNull()) {
221+
return nullptr;
222+
}
223+
224+
const auto type_id = literal.type()->type_id();
225+
const auto& value = literal.value();
226+
227+
switch (type_id) {
228+
case TypeId::kBoolean:
229+
return std::get<bool>(value);
230+
case TypeId::kInt:
231+
return std::get<int32_t>(value);
232+
case TypeId::kDate:
233+
return TransformUtil::HumanDay(std::get<int32_t>(value));
234+
case TypeId::kLong:
235+
return std::get<int64_t>(value);
236+
case TypeId::kTime:
237+
return TransformUtil::HumanTime(std::get<int64_t>(value));
238+
case TypeId::kTimestamp:
239+
return TransformUtil::HumanTimestamp(std::get<int64_t>(value));
240+
case TypeId::kTimestampTz:
241+
return TransformUtil::HumanTimestampWithZone(std::get<int64_t>(value));
242+
case TypeId::kFloat:
243+
return std::get<float>(value);
244+
case TypeId::kDouble:
245+
return std::get<double>(value);
246+
case TypeId::kString:
247+
return std::get<std::string>(value);
248+
case TypeId::kBinary:
249+
case TypeId::kFixed: {
250+
const auto& bytes = std::get<std::vector<uint8_t>>(value);
251+
std::string hex;
252+
hex.reserve(bytes.size() * 2);
253+
for (uint8_t byte : bytes) {
254+
hex += std::format("{:02X}", byte);
255+
}
256+
return hex;
257+
}
258+
case TypeId::kDecimal: {
259+
return literal.ToString();
260+
}
261+
case TypeId::kUuid:
262+
return std::get<Uuid>(value).ToString();
263+
default:
264+
nlohmann::json json;
265+
return json;
266+
}
267+
}
268+
269+
Result<Literal> LiteralFromJson(const nlohmann::json& json) {
270+
// Unwrap {"type": "literal", "value": <actual>} wrapper
271+
if (json.is_object() && json.contains(kType) &&
272+
json[kType].get<std::string>() == kTypeLiteral && json.contains(kValue)) {
273+
return LiteralFromJson(json[kValue]);
274+
}
275+
if (json.is_null()) {
276+
return Literal::Null(nullptr);
277+
}
278+
if (json.is_boolean()) {
279+
return Literal::Boolean(json.get<bool>());
280+
}
281+
if (json.is_number_integer()) {
282+
return Literal::Long(json.get<int64_t>());
283+
}
284+
if (json.is_number_float()) {
285+
return Literal::Double(json.get<double>());
286+
}
287+
if (json.is_string()) {
288+
// All strings are returned as String literals.
289+
// Conversion to binary/date/time/etc. happens during binding
290+
// when schema type information is available.
291+
return Literal::String(json.get<std::string>());
292+
}
293+
return JsonParseError("Unsupported literal JSON type");
294+
}
295+
296+
nlohmann::json TermToJson(const Term& term) {
297+
switch (term.kind()) {
298+
case Term::Kind::kReference:
299+
return ToJson(static_cast<const NamedReference&>(term));
300+
case Term::Kind::kTransform:
301+
return ToJson(static_cast<const UnboundTransform&>(term));
302+
default:
303+
return nullptr;
304+
}
305+
}
306+
307+
nlohmann::json ToJson(const UnboundPredicate& pred) {
308+
nlohmann::json json;
309+
json[kType] = ToJson(pred.op());
310+
311+
// Get term and literals by casting to the appropriate impl type
312+
std::span<const Literal> literals;
313+
314+
if (auto* ref_pred = dynamic_cast<const UnboundPredicateImpl<BoundReference>*>(&pred)) {
315+
json[kTerm] = TermToJson(*ref_pred->term());
316+
literals = ref_pred->literals();
317+
} else if (auto* transform_pred =
318+
dynamic_cast<const UnboundPredicateImpl<BoundTransform>*>(&pred)) {
319+
json[kTerm] = TermToJson(*transform_pred->term());
320+
literals = transform_pred->literals();
321+
}
322+
323+
if (!IsUnaryOperation(pred.op())) {
324+
if (IsSetOperation(pred.op())) {
325+
nlohmann::json values = nlohmann::json::array();
326+
for (const auto& lit : literals) {
327+
values.push_back(ToJson(lit));
328+
}
329+
json[kValues] = std::move(values);
330+
} else if (!literals.empty()) {
331+
json[kValue] = ToJson(literals[0]);
332+
}
333+
}
334+
return json;
335+
}
336+
337+
Result<std::unique_ptr<UnboundPredicate>> UnboundPredicateFromJson(
338+
const nlohmann::json& json) {
339+
ICEBERG_ASSIGN_OR_RAISE(auto op, OperationTypeFromJson(json[kType]));
340+
341+
const auto& term_json = json[kTerm];
342+
343+
if (IsTransformTerm(term_json)) {
344+
ICEBERG_ASSIGN_OR_RAISE(auto term, UnboundTransformFromJson(term_json));
345+
return MakePredicateFromJson<BoundTransform>(op, std::move(term), json);
346+
}
347+
348+
ICEBERG_ASSIGN_OR_RAISE(auto term, NamedReferenceFromJson(term_json));
349+
return MakePredicateFromJson<BoundReference>(op, std::move(term), json);
350+
}
351+
126352
Result<std::shared_ptr<Expression>> ExpressionFromJson(const nlohmann::json& json) {
127-
// Handle boolean
353+
// Handle boolean constants
128354
if (json.is_boolean()) {
129355
return json.get<bool>()
130356
? internal::checked_pointer_cast<Expression>(True::Instance())
131357
: internal::checked_pointer_cast<Expression>(False::Instance());
132358
}
133-
return JsonParseError("Only booleans are currently supported.");
359+
360+
if (!json.is_object()) [[unlikely]] {
361+
return JsonParseError("Expression must be boolean or object");
362+
}
363+
364+
ICEBERG_ASSIGN_OR_RAISE(auto op, OperationTypeFromJson(json[kType]));
365+
366+
switch (op) {
367+
case Expression::Operation::kAnd: {
368+
if (!json.contains(kLeft) || !json.contains(kRight)) [[unlikely]] {
369+
return JsonParseError("AND expression missing 'left' or 'right' field");
370+
}
371+
ICEBERG_ASSIGN_OR_RAISE(auto left, ExpressionFromJson(json[kLeft]));
372+
ICEBERG_ASSIGN_OR_RAISE(auto right, ExpressionFromJson(json[kRight]));
373+
ICEBERG_ASSIGN_OR_RAISE(auto result, And::Make(std::move(left), std::move(right)));
374+
return std::shared_ptr<Expression>(std::move(result));
375+
}
376+
case Expression::Operation::kOr: {
377+
if (!json.contains(kLeft) || !json.contains(kRight)) [[unlikely]] {
378+
return JsonParseError("OR expression missing 'left' or 'right' field");
379+
}
380+
ICEBERG_ASSIGN_OR_RAISE(auto left, ExpressionFromJson(json[kLeft]));
381+
ICEBERG_ASSIGN_OR_RAISE(auto right, ExpressionFromJson(json[kRight]));
382+
ICEBERG_ASSIGN_OR_RAISE(auto result, Or::Make(std::move(left), std::move(right)));
383+
return std::shared_ptr<Expression>(std::move(result));
384+
}
385+
case Expression::Operation::kNot: {
386+
if (!json.contains(kChild)) [[unlikely]] {
387+
return JsonParseError("NOT expression missing 'child' field");
388+
}
389+
ICEBERG_ASSIGN_OR_RAISE(auto child, ExpressionFromJson(json[kChild]));
390+
ICEBERG_ASSIGN_OR_RAISE(auto result, Not::Make(std::move(child)));
391+
return std::shared_ptr<Expression>(std::move(result));
392+
}
393+
default:
394+
// All other operations are predicates
395+
return UnboundPredicateFromJson(json);
396+
}
134397
}
135398

136399
nlohmann::json ToJson(const Expression& expr) {
137400
switch (expr.op()) {
138401
case Expression::Operation::kTrue:
139402
return true;
140-
141403
case Expression::Operation::kFalse:
142404
return false;
405+
case Expression::Operation::kAnd: {
406+
const auto& and_expr = static_cast<const And&>(expr);
407+
nlohmann::json json;
408+
json[kType] = ToJson(expr.op());
409+
json[kLeft] = ToJson(*and_expr.left());
410+
json[kRight] = ToJson(*and_expr.right());
411+
return json;
412+
}
413+
case Expression::Operation::kOr: {
414+
const auto& or_expr = static_cast<const Or&>(expr);
415+
nlohmann::json json;
416+
json[kType] = ToJson(expr.op());
417+
json[kLeft] = ToJson(*or_expr.left());
418+
json[kRight] = ToJson(*or_expr.right());
419+
return json;
420+
}
421+
case Expression::Operation::kNot: {
422+
const auto& not_expr = static_cast<const Not&>(expr);
423+
nlohmann::json json;
424+
json[kType] = ToJson(expr.op());
425+
json[kChild] = ToJson(*not_expr.child());
426+
return json;
427+
}
143428
default:
144-
// TODO(evindj): This code will be removed as we implemented the full expression
145-
// serialization.
146-
ICEBERG_CHECK_OR_DIE(false, "Only booleans are currently supported.");
429+
return ToJson(dynamic_cast<const UnboundPredicate&>(expr));
147430
}
148431
}
149432

0 commit comments

Comments
 (0)