Skip to content

Commit 3a3879e

Browse files
committed
create inspect method for lazy values
1 parent 7b4ac8e commit 3a3879e

2 files changed

Lines changed: 96 additions & 16 deletions

File tree

src/mrb_cbor.c

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3012,9 +3012,6 @@ path_walk_wildcards(mrb_state *mrb, mrb_value node,
30123012
mrb_value resolved;
30133013
uint8_t major;
30143014

3015-
/* Chase sharedref chain until we land on a concrete array header.
3016-
* lazy_resolve_tags handles Tag 28 registration and Tag 29 resolution;
3017-
* we just need to re-seat `node` on each sharedref target and retry. */
30183015
for (;;) {
30193016
cbor_lazy_t *p = mrb_data_get_ptr(mrb, node, &cbor_lazy_type);
30203017
Reader r;
@@ -3024,28 +3021,36 @@ path_walk_wildcards(mrb_state *mrb, mrb_value node,
30243021
mrb_value sharedrefs = mrb_iv_get(mrb, node, MRB_SYM(sharedrefs));
30253022
mrb_assert(mrb_array_p(sharedrefs));
30263023

3027-
mrb_int sharedrefs_before = RARRAY_LEN(sharedrefs);
30283024
major = lazy_resolve_tags(mrb, &r, p->buf, sharedrefs, &resolved);
30293025

30303026
if (major == 4) {
30313027
mrb_int len = cbor_len_to_mrb_int(mrb, read_cbor_uint(mrb, &r, r.info));
30323028
mrb_int nseg = RARRAY_LEN(segments);
30333029
mrb_value next_steps = mrb_ary_ref(mrb, segments, depth + 1);
30343030
mrb_bool is_leaf = (depth == nseg - 2);
3031+
mrb_value kcache = mrb_iv_get(mrb, node, MRB_SYM(kcache));
3032+
mrb_value results = mrb_ary_new_capa(mrb, len);
3033+
int arena = mrb_gc_arena_save(mrb);
30353034

3036-
/* If lazy_resolve_tags consumed a Tag28 and registered the inner Lazy,
3037-
* use that Lazy (which starts at the array header, not the Tag28 byte)
3038-
* for cbor_lazy_aref calls. Without this, each call re-reads the Tag28
3039-
* and pushes a duplicate into sharedrefs, corrupting Tag29 resolution. */
3040-
mrb_value work_node = (RARRAY_LEN(sharedrefs) > sharedrefs_before)
3041-
? mrb_ary_ref(mrb, sharedrefs, RARRAY_LEN(sharedrefs) - 1)
3042-
: node;
3043-
3044-
mrb_value results = mrb_ary_new_capa(mrb, len);
3045-
mrb_int arena = mrb_gc_arena_save(mrb);
3035+
/* All elements cached iff kcache has at least len entries.
3036+
* In that case r.p tracking and skip_cbor are not needed. */
3037+
mrb_bool fully_cached = (mrb_hash_size(mrb, kcache) >= len);
30463038

30473039
for (mrb_int i = 0; i < len; i++) {
3048-
mrb_value elem = cbor_lazy_aref(mrb, work_node, mrb_convert_mrb_int(mrb, i));
3040+
mrb_value idx_v = mrb_convert_mrb_int(mrb, i);
3041+
mrb_value elem = mrb_hash_fetch(mrb, kcache, idx_v, mrb_undef_value());
3042+
3043+
if (mrb_undef_p(elem)) {
3044+
/* Cache miss — r.p is at element i, create lazy and advance. */
3045+
mrb_int elem_offset = cbor_pdiff(mrb, r.p, r.base);
3046+
elem = cbor_lazy_new(mrb, p->buf, elem_offset, sharedrefs);
3047+
mrb_hash_set(mrb, kcache, idx_v, elem);
3048+
}
3049+
3050+
if (!fully_cached) {
3051+
skip_cbor(mrb, &r, p->buf, sharedrefs);
3052+
}
3053+
30493054
mrb_value next = path_walk_steps(mrb, elem, next_steps);
30503055
mrb_value val = is_leaf
30513056
? cbor_lazy_value(mrb, next)
@@ -3058,7 +3063,6 @@ path_walk_wildcards(mrb_state *mrb, mrb_value node,
30583063
}
30593064

30603065
if (major == 0xFF) {
3061-
/* Tag 29 resolved — re-seat on target and loop. */
30623066
if (!mrb_data_check_get_ptr(mrb, resolved, &cbor_lazy_type))
30633067
mrb_raise(mrb, E_TYPE_ERROR,
30643068
"CBOR::Path [*]: sharedref target is not indexable");
@@ -3095,6 +3099,16 @@ mrb_cbor_path_at(mrb_state *mrb, mrb_value self)
30953099
: path_walk_wildcards(mrb, node, segments, 0);
30963100
}
30973101

3102+
static mrb_value
3103+
cbor_lazy_inspect(mrb_state *mrb, mrb_value self)
3104+
{
3105+
cbor_lazy_t *p = mrb_data_get_ptr(mrb, self, &cbor_lazy_type);
3106+
mrb_value sub = mrb_str_byte_subseq(mrb, p->buf, p->offset,
3107+
RSTRING_LEN(p->buf) - p->offset);
3108+
return mrb_funcall_argv(mrb, mrb_obj_value(mrb_module_get_id(mrb, MRB_SYM(CBOR))),
3109+
MRB_SYM(diagnose), 1, &sub);
3110+
}
3111+
30983112
MRB_BEGIN_DECL
30993113

31003114
MRB_API mrb_value
@@ -3209,6 +3223,7 @@ mrb_mruby_cbor_gem_init(mrb_state* mrb)
32093223
mrb_define_method_id(mrb, lazy, MRB_OPSYM(aref), cbor_lazy_aref_m, MRB_ARGS_REQ(1));
32103224
mrb_define_method_id(mrb, lazy, MRB_SYM(value), cbor_lazy_value, MRB_ARGS_NONE());
32113225
mrb_define_method_id(mrb, lazy, MRB_SYM(dig), cbor_lazy_dig, MRB_ARGS_ANY());
3226+
mrb_define_method_id(mrb, lazy, MRB_SYM(inspect),cbor_lazy_inspect,MRB_ARGS_NONE());
32123227

32133228
struct RClass *path =
32143229
mrb_define_class_under_id(mrb, cbor, MRB_SYM(Path), mrb->object_class);

test/test.rb

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4397,3 +4397,68 @@ class MutualB
43974397
assert_equal [1, 2], lazy["ref"].value
43984398
end
43994399

4400+
assert('CBOR::Path wildcard + direct aref share same element lazies') do
4401+
docs = (0...5).map { |i| { "id" => i, "name" => "item#{i}" } }
4402+
buf = CBOR.encode({ "items" => docs })
4403+
path = CBOR::Path.compile("$.items[*].name")
4404+
lazy = CBOR.decode_lazy(buf)
4405+
4406+
# path.at populates kcache for each element
4407+
path.at(lazy)
4408+
4409+
# direct aref must return same Lazy object (cache hit)
4410+
items = lazy["items"]
4411+
(0...5).each do |i|
4412+
via_path_elem = lazy["items"][i]["name"]
4413+
via_aref_elem = items[i]["name"]
4414+
via_dig_elem = lazy.dig("items", i, "name")
4415+
assert_same via_path_elem, via_aref_elem
4416+
assert_same via_aref_elem, via_dig_elem
4417+
end
4418+
end
4419+
4420+
assert('CBOR::Path wildcard + .value cache consistent across apis') do
4421+
docs = (0...5).map { |i| { "id" => i, "name" => "item#{i}" } }
4422+
buf = CBOR.encode({ "items" => docs })
4423+
path = CBOR::Path.compile("$.items[*].name")
4424+
lazy = CBOR.decode_lazy(buf)
4425+
4426+
# materialize via path first
4427+
names_via_path = path.at(lazy)
4428+
4429+
# then via direct aref + value — must be same Ruby objects (vcache hit)
4430+
(0...5).each do |i|
4431+
via_aref = lazy["items"][i]["name"].value
4432+
assert_same names_via_path[i], via_aref
4433+
end
4434+
end
4435+
4436+
assert('direct aref first, then path.at reuses populated kcache') do
4437+
docs = (0...5).map { |i| { "id" => i, "name" => "item#{i}" } }
4438+
buf = CBOR.encode({ "items" => docs })
4439+
path = CBOR::Path.compile("$.items[*].name")
4440+
lazy = CBOR.decode_lazy(buf)
4441+
4442+
# warm kcache via direct aref first
4443+
pre = (0...5).map { |i| lazy["items"][i]["name"] }
4444+
4445+
# path.at must hit the existing cache — same Lazy objects
4446+
path.at(lazy)
4447+
(0...5).each do |i|
4448+
assert_same pre[i], lazy["items"][i]["name"]
4449+
end
4450+
end
4451+
4452+
assert('dig first, then path.at reuses populated kcache') do
4453+
docs = (0...5).map { |i| { "id" => i, "name" => "item#{i}" } }
4454+
buf = CBOR.encode({ "items" => docs })
4455+
path = CBOR::Path.compile("$.items[*].name")
4456+
lazy = CBOR.decode_lazy(buf)
4457+
4458+
pre = (0...5).map { |i| lazy.dig("items", i, "name") }
4459+
4460+
path.at(lazy)
4461+
(0...5).each do |i|
4462+
assert_same pre[i], lazy.dig("items", i, "name")
4463+
end
4464+
end

0 commit comments

Comments
 (0)