Skip to content

Commit 2ddcf3c

Browse files
committed
add some fixes
1 parent cadf471 commit 2ddcf3c

File tree

3 files changed

+88
-137
lines changed

3 files changed

+88
-137
lines changed

src/iceberg/expression/json_serde.cc

Lines changed: 76 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,11 @@
1717
* under the License.
1818
*/
1919

20-
#include <ranges>
2120
#include <string>
2221
#include <vector>
2322

2423
#include <nlohmann/json.hpp>
2524

26-
#include "iceberg/expression/binder.h"
2725
#include "iceberg/expression/json_serde_internal.h"
2826
#include "iceberg/expression/literal.h"
2927
#include "iceberg/expression/predicate.h"
@@ -33,6 +31,7 @@
3331
#include "iceberg/util/checked_cast.h"
3432
#include "iceberg/util/json_util_internal.h"
3533
#include "iceberg/util/macros.h"
34+
#include "iceberg/util/string_util.h"
3635
#include "iceberg/util/transform_util.h"
3736

3837
namespace iceberg {
@@ -88,11 +87,26 @@ bool IsTransformTerm(const nlohmann::json& json) {
8887
json[kType].get<std::string>() == kTransform && json.contains(kTerm);
8988
}
9089

91-
/// Template helper to create predicates from JSON with the appropriate term type
90+
/// Template helper to create predicates from JSON with the appropriate term type.
9291
template <typename B>
9392
Result<std::unique_ptr<UnboundPredicate>> PredicateFromJson(
9493
Expression::Operation op, std::shared_ptr<UnboundTerm<B>> term,
95-
const nlohmann::json& json) {
94+
const nlohmann::json& json, const Schema* schema) {
95+
// Bind the term against the schema so we can pass the resolved type to
96+
// LiteralFromJson for type-aware parsing.
97+
std::shared_ptr<B> bound_term;
98+
if (schema != nullptr) {
99+
ICEBERG_ASSIGN_OR_RAISE(bound_term, term->Bind(*schema, /*case_sensitive=*/false));
100+
}
101+
102+
// Helper that selects type-aware or naive literal parsing.
103+
auto parse_literal = [&](const nlohmann::json& val) -> Result<Literal> {
104+
if (bound_term != nullptr) {
105+
return LiteralFromJson(val, bound_term->type().get());
106+
}
107+
return LiteralFromJson(val);
108+
};
109+
96110
if (IsUnaryOperation(op)) {
97111
if (json.contains(kValue)) [[unlikely]] {
98112
return JsonParseError("Unary predicate has invalid 'value' field: {}",
@@ -115,7 +129,7 @@ Result<std::unique_ptr<UnboundPredicate>> PredicateFromJson(
115129
SafeDumpJson(json));
116130
}
117131
for (const auto& val : json[kValues]) {
118-
ICEBERG_ASSIGN_OR_RAISE(auto lit, LiteralFromJson(val));
132+
ICEBERG_ASSIGN_OR_RAISE(auto lit, parse_literal(val));
119133
literals.push_back(std::move(lit));
120134
}
121135
return UnboundPredicateImpl<B>::Make(op, std::move(term), std::move(literals));
@@ -127,7 +141,7 @@ Result<std::unique_ptr<UnboundPredicate>> PredicateFromJson(
127141
"Literal predicate requires 'value' and must not include 'values': {}",
128142
SafeDumpJson(json));
129143
}
130-
ICEBERG_ASSIGN_OR_RAISE(auto literal, LiteralFromJson(json[kValue]));
144+
ICEBERG_ASSIGN_OR_RAISE(auto literal, parse_literal(json[kValue]));
131145
return UnboundPredicateImpl<B>::Make(op, std::move(term), std::move(literal));
132146
}
133147
} // namespace
@@ -184,18 +198,30 @@ Result<Expression::Operation> OperationTypeFromJson(const nlohmann::json& json)
184198
if (typeStr == kMin) return Expression::Operation::kMin;
185199
if (typeStr == kMax) return Expression::Operation::kMax;
186200

187-
return JsonParseError("Unknown expression type: {}", typeStr);
201+
return JsonParseError("Unknown expression operation: '{}'", typeStr);
188202
}
189203

190204
nlohmann::json ToJson(Expression::Operation op) {
191205
std::string json(ToString(op));
192206
std::ranges::transform(json, json.begin(), [](unsigned char c) -> char {
193207
return (c == '_') ? '-' : static_cast<char>(std::tolower(c));
194208
});
195-
return nlohmann::json(std::move(json));
209+
return json;
210+
}
211+
212+
nlohmann::json ToJson(const NamedReference& ref) { return ref.name(); }
213+
214+
nlohmann::json ToJson(const UnboundTransform& transform) {
215+
auto& mut = const_cast<UnboundTransform&>(transform);
216+
return MakeTransformJson(transform.transform()->ToString(), mut.reference()->name());
196217
}
197218

198-
nlohmann::json ToJson(const NamedReference& ref) { return nlohmann::json(ref.name()); }
219+
nlohmann::json ToJson(const BoundReference& ref) { return ref.name(); }
220+
221+
nlohmann::json ToJson(const BoundTransform& transform) {
222+
auto& mut = const_cast<BoundTransform&>(transform);
223+
return MakeTransformJson(transform.transform()->ToString(), mut.reference()->name());
224+
}
199225

200226
Result<std::unique_ptr<NamedReference>> NamedReferenceFromJson(
201227
const nlohmann::json& json) {
@@ -209,28 +235,16 @@ Result<std::unique_ptr<NamedReference>> NamedReferenceFromJson(
209235
return NamedReference::Make(json.get<std::string>());
210236
}
211237

212-
nlohmann::json ToJson(const UnboundTransform& transform) {
213-
auto& mut = const_cast<UnboundTransform&>(transform);
214-
return MakeTransformJson(transform.transform()->ToString(), mut.reference()->name());
215-
}
216-
217-
nlohmann::json ToJson(const BoundReference& ref) { return nlohmann::json(ref.name()); }
218-
219-
nlohmann::json ToJson(const BoundTransform& transform) {
220-
auto& mut = const_cast<BoundTransform&>(transform);
221-
return MakeTransformJson(transform.transform()->ToString(), mut.reference()->name());
222-
}
223-
224238
Result<std::unique_ptr<UnboundTransform>> UnboundTransformFromJson(
225239
const nlohmann::json& json) {
226-
if (IsTransformTerm(json)) {
227-
ICEBERG_ASSIGN_OR_RAISE(auto transform_str,
228-
GetJsonValue<std::string>(json, kTransform));
229-
ICEBERG_ASSIGN_OR_RAISE(auto transform, TransformFromString(transform_str));
230-
ICEBERG_ASSIGN_OR_RAISE(auto ref, NamedReferenceFromJson(json[kTerm]));
231-
return UnboundTransform::Make(std::move(ref), std::move(transform));
240+
if (!IsTransformTerm(json)) {
241+
return JsonParseError("Invalid unbound transform: {}", SafeDumpJson(json));
232242
}
233-
return JsonParseError("Invalid unbound transform json: {}", SafeDumpJson(json));
243+
ICEBERG_ASSIGN_OR_RAISE(auto transform_str,
244+
GetJsonValue<std::string>(json, kTransform));
245+
ICEBERG_ASSIGN_OR_RAISE(auto transform, TransformFromString(transform_str));
246+
ICEBERG_ASSIGN_OR_RAISE(auto ref, NamedReferenceFromJson(json[kTerm]));
247+
return UnboundTransform::Make(std::move(ref), std::move(transform));
234248
}
235249

236250
Result<nlohmann::json> ToJson(const Literal& literal) {
@@ -274,9 +288,8 @@ Result<nlohmann::json> ToJson(const Literal& literal) {
274288
}
275289
return nlohmann::json(std::move(hex));
276290
}
277-
case TypeId::kDecimal: {
291+
case TypeId::kDecimal:
278292
return nlohmann::json(literal.ToString());
279-
}
280293
case TypeId::kUuid:
281294
return nlohmann::json(std::get<Uuid>(value).ToString());
282295
default:
@@ -285,6 +298,12 @@ Result<nlohmann::json> ToJson(const Literal& literal) {
285298
}
286299
}
287300

301+
Result<Literal> LiteralFromJson(const nlohmann::json& json, const Type* /*type*/) {
302+
// TODO(gangwu): implement type-aware literal parsing equivalent to Java's
303+
// SingleValueParser.fromJson(type, node).
304+
return LiteralFromJson(json);
305+
}
306+
288307
Result<Literal> LiteralFromJson(const nlohmann::json& json) {
289308
// Unwrap {"type": "literal", "value": <actual>} wrapper
290309
if (json.is_object() && json.contains(kType) &&
@@ -304,38 +323,34 @@ Result<Literal> LiteralFromJson(const nlohmann::json& json) {
304323
return Literal::Double(json.get<double>());
305324
}
306325
if (json.is_string()) {
307-
// All strings are returned as String literals.
308-
// Conversion to binary/date/time/etc. happens during binding
309-
// when schema type information is available.
310326
return Literal::String(json.get<std::string>());
311327
}
312-
return JsonParseError("Unsupported literal JSON type");
328+
return JsonParseError("Unsupported literal JSON: {}", SafeDumpJson(json));
313329
}
314330

315331
Result<nlohmann::json> ToJson(const Term& term) {
316332
switch (term.kind()) {
317333
case Term::Kind::kReference:
318-
if (term.is_unbound())
334+
if (term.is_unbound()) {
319335
return ToJson(internal::checked_cast<const NamedReference&>(term));
336+
}
320337
return ToJson(internal::checked_cast<const BoundReference&>(term));
321338
case Term::Kind::kTransform:
322-
if (term.is_unbound())
339+
if (term.is_unbound()) {
323340
return ToJson(internal::checked_cast<const UnboundTransform&>(term));
341+
}
324342
return ToJson(internal::checked_cast<const BoundTransform&>(term));
325343
default:
326-
return NotSupported(
327-
"Unsupported term kind for JSON serialization only reference and transform "
328-
"terms are supported");
344+
return NotSupported("Unsupported term for JSON serialization: {}", term.ToString());
329345
}
330346
}
331347

332348
Result<nlohmann::json> ToJson(const UnboundPredicate& pred) {
333349
nlohmann::json json;
334350
json[kType] = ToJson(pred.op());
335-
336351
ICEBERG_ASSIGN_OR_RAISE(json[kTerm], ToJson(pred.unbound_term()));
337-
std::span<const Literal> literals = pred.literals();
338352

353+
std::span<const Literal> literals = pred.literals();
339354
if (IsSetOperation(pred.op())) {
340355
nlohmann::json values = nlohmann::json::array();
341356
for (const auto& lit : literals) {
@@ -344,8 +359,9 @@ Result<nlohmann::json> ToJson(const UnboundPredicate& pred) {
344359
}
345360
json[kValues] = std::move(values);
346361
} else if (!literals.empty()) {
347-
ICEBERG_DCHECK(literals.size() == 1,
348-
"Expected exactly one literal for non-set predicate");
362+
ICEBERG_CHECK(literals.size() == 1,
363+
"Expected exactly one literal for non-set predicate but got {}: {}",
364+
literals.size(), pred.ToString());
349365
ICEBERG_ASSIGN_OR_RAISE(json[kValue], ToJson(literals[0]));
350366
}
351367
return json;
@@ -372,22 +388,21 @@ Result<nlohmann::json> ToJson(const BoundPredicate& pred) {
372388
}
373389

374390
Result<std::unique_ptr<UnboundPredicate>> UnboundPredicateFromJson(
375-
const nlohmann::json& json) {
391+
const nlohmann::json& json, const Schema* schema) {
376392
if (!json.contains(kType) || !json.contains(kTerm)) [[unlikely]] {
377393
return JsonParseError("Invalid predicate JSON: missing 'type' or 'term' field : {}",
378394
SafeDumpJson(json));
379395
}
380396
ICEBERG_ASSIGN_OR_RAISE(auto op, OperationTypeFromJson(json[kType]));
381-
382397
const auto& term_json = json[kTerm];
383398

384399
if (IsTransformTerm(term_json)) {
385400
ICEBERG_ASSIGN_OR_RAISE(auto term, UnboundTransformFromJson(term_json));
386-
return PredicateFromJson<BoundTransform>(op, std::move(term), json);
401+
return PredicateFromJson<BoundTransform>(op, std::move(term), json, schema);
387402
}
388403

389404
ICEBERG_ASSIGN_OR_RAISE(auto term, NamedReferenceFromJson(term_json));
390-
return PredicateFromJson<BoundReference>(op, std::move(term), json);
405+
return PredicateFromJson<BoundReference>(op, std::move(term), json, schema);
391406
}
392407

393408
Result<std::shared_ptr<Expression>> ExpressionFromJson(const nlohmann::json& json,
@@ -399,18 +414,17 @@ Result<std::shared_ptr<Expression>> ExpressionFromJson(const nlohmann::json& jso
399414
: internal::checked_pointer_cast<Expression>(False::Instance());
400415
}
401416
if (json.is_string()) {
402-
auto s = json.get<std::string>();
403-
std::ranges::transform(s, s.begin(), [](unsigned char c) -> char {
404-
return static_cast<char>(std::tolower(c));
405-
});
406-
if (s == kTrue) return internal::checked_pointer_cast<Expression>(True::Instance());
407-
if (s == kFalse) return internal::checked_pointer_cast<Expression>(False::Instance());
417+
auto s = StringUtils::ToLower(json.get<std::string>());
418+
if (s == kTrue) return True::Instance();
419+
if (s == kFalse) return False::Instance();
420+
return JsonParseError("Unknown expression string constant: {}", s);
408421
}
409422

410423
if (!json.is_object() || !json.contains(kType)) [[unlikely]] {
411-
return JsonParseError("expression JSON must be an object with a 'type' field: {}",
424+
return JsonParseError("Expression JSON must be an object with a 'type' field: {}",
412425
SafeDumpJson(json));
413426
}
427+
414428
if (json[kType].get<std::string>() == kLiteral) {
415429
if (!json.contains(kValue) || !json[kValue].is_boolean()) [[unlikely]] {
416430
return JsonParseError(
@@ -457,13 +471,8 @@ Result<std::shared_ptr<Expression>> ExpressionFromJson(const nlohmann::json& jso
457471
return NotSupported("Unsupported expression type for JSON deserialization: {}",
458472
ToString(op));
459473
}
460-
default: {
461-
ICEBERG_ASSIGN_OR_RAISE(auto pred, UnboundPredicateFromJson(json));
462-
if (schema != nullptr) {
463-
return pred->Bind(*schema, false);
464-
}
465-
return pred;
466-
}
474+
default:
475+
return UnboundPredicateFromJson(json, schema);
467476
}
468477
}
469478

@@ -497,10 +506,12 @@ Result<nlohmann::json> ToJson(const Expression& expr) {
497506
return json;
498507
}
499508
default:
500-
if (expr.is_unbound_predicate())
501-
return ToJson(internal::checked_cast<const UnboundPredicate&>(expr));
502-
if (expr.is_bound_predicate())
503-
return ToJson(internal::checked_cast<const BoundPredicate&>(expr));
509+
if (expr.is_unbound_predicate()) {
510+
return ToJson(dynamic_cast<const UnboundPredicate&>(expr));
511+
}
512+
if (expr.is_bound_predicate()) {
513+
return ToJson(dynamic_cast<const BoundPredicate&>(expr));
514+
}
504515
return NotSupported("Unsupported expression type for JSON serialization");
505516
}
506517
}

src/iceberg/expression/json_serde_internal.h

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,9 @@ ICEBERG_EXPORT nlohmann::json ToJson(Expression::Operation op);
4646

4747
/// \brief Deserializes a JSON object into an Expression.
4848
///
49-
/// When \p schema is nullptr the result is an unbound expression. When \p schema is
50-
/// provided the expression is bound against the schema before being returned.
51-
///
5249
/// \param json A JSON object representing an expression
5350
/// \param schema Optional schema used to bind field references and coerce literal
54-
/// types. When null, returns an unbound expression.
51+
/// types.
5552
/// \return A shared pointer to the deserialized Expression or an error
5653
ICEBERG_EXPORT Result<std::shared_ptr<Expression>> ExpressionFromJson(
5754
const nlohmann::json& json, const Schema* schema = nullptr);
@@ -100,6 +97,14 @@ ICEBERG_EXPORT Result<nlohmann::json> ToJson(const Literal& literal);
10097
/// \return The deserialized Literal or an error.
10198
ICEBERG_EXPORT Result<Literal> LiteralFromJson(const nlohmann::json& json);
10299

100+
/// \brief Deserializes a JSON value into a Literal with optional type context.
101+
///
102+
/// \param json A JSON value representing a literal.
103+
/// \param type Optional target type used to guide parsing.
104+
/// \return The deserialized Literal or an error.
105+
ICEBERG_EXPORT Result<Literal> LiteralFromJson(const nlohmann::json& json,
106+
const Type* type);
107+
103108
/// \brief Serializes an UnboundPredicate into its JSON representation.
104109
///
105110
/// \param pred The unbound predicate to serialize
@@ -118,9 +123,10 @@ ICEBERG_EXPORT Result<nlohmann::json> ToJson(const BoundPredicate& pred);
118123
/// \brief Deserializes a JSON object into an UnboundPredicate.
119124
///
120125
/// \param json A JSON object representing an unbound predicate
126+
/// \param schema Optional schema used to resolve literal types.
121127
/// \return A pointer to the deserialized UnboundPredicate or an error
122128
ICEBERG_EXPORT Result<std::unique_ptr<UnboundPredicate>> UnboundPredicateFromJson(
123-
const nlohmann::json& json);
129+
const nlohmann::json& json, const Schema* schema = nullptr);
124130

125131
/// \brief Serializes a Term into its JSON representation.
126132
///

0 commit comments

Comments
 (0)