Skip to content

Commit 8a15c41

Browse files
committed
rewrite doc_end in c
1 parent e9b2694 commit 8a15c41

2 files changed

Lines changed: 199 additions & 80 deletions

File tree

mrblib/stream.rb

Lines changed: 0 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -83,73 +83,5 @@ def stream_socket(io, &block)
8383
end
8484
end
8585
end
86-
87-
def doc_end(buf, offset = 0)
88-
b = buf.getbyte(offset) or return nil
89-
major = b >> 5
90-
info = b & 0x1f
91-
offset += 1
92-
arg = 0
93-
94-
if info < 24
95-
arg = info
96-
elsif info == 24
97-
return nil if offset >= buf.bytesize
98-
arg = buf.getbyte(offset)
99-
offset += 1
100-
elsif info == 25
101-
return nil if offset + 2 > buf.bytesize
102-
arg = (buf.getbyte(offset) << 8) | buf.getbyte(offset + 1)
103-
offset += 2
104-
elsif info == 26
105-
return nil if offset + 4 > buf.bytesize
106-
arg = (buf.getbyte(offset) << 24) |
107-
(buf.getbyte(offset + 1) << 16) |
108-
(buf.getbyte(offset + 2) << 8) |
109-
buf.getbyte(offset + 3)
110-
offset += 4
111-
elsif info == 27
112-
return nil if offset + 8 > buf.bytesize
113-
arg = (buf.getbyte(offset) << 56) |
114-
(buf.getbyte(offset + 1) << 48) |
115-
(buf.getbyte(offset + 2) << 40) |
116-
(buf.getbyte(offset + 3) << 32) |
117-
(buf.getbyte(offset + 4) << 24) |
118-
(buf.getbyte(offset + 5) << 16) |
119-
(buf.getbyte(offset + 6) << 8) |
120-
buf.getbyte(offset + 7)
121-
offset += 8
122-
else
123-
raise TypeError, "payload at offset is not a number"
124-
end
125-
126-
case major
127-
when 0, 1
128-
offset
129-
when 2, 3
130-
return nil if offset + arg > buf.bytesize
131-
offset + arg
132-
when 4
133-
i = 0
134-
while i < arg
135-
offset = doc_end(buf, offset)
136-
i += 1
137-
return nil unless offset
138-
end
139-
offset
140-
when 5
141-
i = 0
142-
while i < arg * 2
143-
offset = doc_end(buf, offset)
144-
i += 1
145-
return nil unless offset
146-
end
147-
offset
148-
when 6
149-
doc_end(buf, offset)
150-
when 7
151-
offset
152-
end
153-
end
15486
end
15587
end

src/mrb_cbor.c

Lines changed: 199 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -113,14 +113,13 @@ static void encode_registered_tag(CborWriter *w, mrb_value obj, mrb_int tag
113113
static mrb_value decode_registered_tag(mrb_state *mrb, Reader *r, mrb_value src, mrb_value sharedrefs, mrb_value klass);
114114

115115
/* Safe pointer-diff -> mrb_int. Raises if negative or > MRB_INT_MAX. */
116-
static mrb_int
116+
static inline mrb_int
117117
cbor_pdiff(mrb_state *mrb, const uint8_t *p, const uint8_t *base)
118118
{
119-
ptrdiff_t d = p - base;
120-
if (likely(d >= 0 && d <= (ptrdiff_t)MRB_INT_MAX))
121-
return (mrb_int)d;
122-
mrb_raise(mrb, E_RANGE_ERROR, "CBOR offset out of range");
123-
return 0;
119+
mrb_int i = mrb_integer(mrb_to_int(mrb, mrb_convert_ptrdiff(mrb, p - base)));
120+
if (likely(i >= 0))
121+
return i;
122+
mrb_raise(mrb, E_RANGE_ERROR, "ptrdiff was negative");
124123
}
125124

126125
static uint8_t hex_nibble(uint8_t c)
@@ -516,7 +515,7 @@ decode_tagged_bignum(mrb_state* mrb, Reader* r, mrb_value src, mrb_value tag)
516515
r->p += len;
517516

518517
const uint8_t* buf = (const uint8_t*)RSTRING_PTR(src) + off;
519-
mrb_bool negative = (mrb_cmp(mrb, tag, mrb_fixnum_value(3)) == 0);
518+
const mrb_bool negative = (mrb_cmp(mrb, tag, mrb_fixnum_value(3)) == 0);
520519

521520
const uint8_t* bigbuf = buf;
522521

@@ -1666,7 +1665,7 @@ skip_cbor(mrb_state *mrb, Reader *r, mrb_value buf, mrb_value sharedrefs)
16661665

16671666
case 0: /* unsigned integer */
16681667
case 1: /* negative integer */
1669-
if (info >= 24) read_cbor_uint(mrb, r, info); /* consume extra bytes */
1668+
if (info >= 24) read_cbor_uint(mrb, r, info);
16701669
break;
16711670

16721671
case 2: /* byte string */
@@ -1689,16 +1688,18 @@ skip_cbor(mrb_state *mrb, Reader *r, mrb_value buf, mrb_value sharedrefs)
16891688
mrb_value len_v = read_cbor_uint(mrb, r, info);
16901689
mrb_int len = cbor_value_to_len(mrb, len_v);
16911690
for (mrb_int i = 0; i < len; i++) {
1692-
skip_cbor(mrb, r, buf, sharedrefs); /* key */
1693-
skip_cbor(mrb, r, buf, sharedrefs); /* value */
1691+
skip_cbor(mrb, r, buf, sharedrefs);
1692+
skip_cbor(mrb, r, buf, sharedrefs);
16941693
}
16951694
break;
16961695
}
16971696

16981697
case 6: {
16991698
mrb_value tag = read_cbor_uint(mrb, r, info);
17001699

1701-
if (mrb_cmp(mrb, tag, mrb_fixnum_value(28)) == 0) {
1700+
/* Only register Tag 28 if buf/sharedrefs provided (lazy mode) */
1701+
if (!mrb_undef_p(buf) && !mrb_undef_p(sharedrefs) &&
1702+
mrb_cmp(mrb, tag, mrb_fixnum_value(28)) == 0) {
17021703
if (likely(mrb_array_p(sharedrefs))) {
17031704
mrb_int inner_offset = cbor_pdiff(mrb, r->p, r->base);
17041705
mrb_value lazy = cbor_lazy_new(mrb, buf, inner_offset, sharedrefs);
@@ -1707,7 +1708,6 @@ skip_cbor(mrb_state *mrb, Reader *r, mrb_value buf, mrb_value sharedrefs)
17071708
mrb_raise(mrb, E_TYPE_ERROR, "sharedrefs is not a array");
17081709
}
17091710
}
1710-
/* tag 29: just skip the uint index, nothing to register */
17111711

17121712
skip_cbor(mrb, r, buf, sharedrefs);
17131713
break;
@@ -1736,6 +1736,192 @@ skip_cbor(mrb_state *mrb, Reader *r, mrb_value buf, mrb_value sharedrefs)
17361736
r->depth--;
17371737
}
17381738

1739+
1740+
/*
1741+
* Variant of skip logic for doc_end: returns false on incomplete buffer,
1742+
* no exception flow control, raises only for invalid CBOR.
1743+
*/
1744+
static mrb_bool
1745+
skip_cbor_try(mrb_state *mrb, Reader *r)
1746+
{
1747+
/* Early check: can't read header byte → incomplete */
1748+
if (unlikely(r->p >= r->end))
1749+
return FALSE;
1750+
1751+
if (unlikely(r->depth >= CBOR_MAX_DEPTH))
1752+
mrb_raise(mrb, E_RUNTIME_ERROR, "CBOR nesting depth exceeded");
1753+
r->depth++;
1754+
1755+
uint8_t b = *r->p++;
1756+
uint8_t major = b >> 5;
1757+
uint8_t info = b & 0x1F;
1758+
1759+
mrb_bool ok = TRUE;
1760+
1761+
switch (major) {
1762+
1763+
case 0: /* unsigned integer */
1764+
case 1: /* negative integer */
1765+
if (info >= 24) {
1766+
/* Check availability of length bytes before reading */
1767+
if (unlikely(info == 24 && r->p >= r->end)) ok = FALSE;
1768+
else if (unlikely(info == 25 && (r->end - r->p) < 2)) ok = FALSE;
1769+
else if (unlikely(info == 26 && (r->end - r->p) < 4)) ok = FALSE;
1770+
else if (unlikely(info == 27 && (r->end - r->p) < 8)) ok = FALSE;
1771+
else {
1772+
mrb_value len_v = read_cbor_uint(mrb, r, info);
1773+
if (unlikely(!mrb_integer_p(len_v))) ok = FALSE;
1774+
}
1775+
}
1776+
break;
1777+
1778+
case 2: /* byte string */
1779+
case 3: { /* text string */
1780+
if (unlikely(info == 24 && r->p >= r->end)) { ok = FALSE; break; }
1781+
if (unlikely(info == 25 && (r->end - r->p) < 2)) { ok = FALSE; break; }
1782+
if (unlikely(info == 26 && (r->end - r->p) < 4)) { ok = FALSE; break; }
1783+
if (unlikely(info == 27 && (r->end - r->p) < 8)) { ok = FALSE; break; }
1784+
1785+
mrb_value len_v = read_cbor_uint(mrb, r, info);
1786+
if (unlikely(!mrb_integer_p(len_v))) { ok = FALSE; break; }
1787+
1788+
mrb_int len = mrb_integer(len_v);
1789+
if (unlikely(len < 0)) { ok = FALSE; break; }
1790+
if (unlikely((mrb_int)(r->end - r->p) < len)) { ok = FALSE; break; }
1791+
1792+
r->p += len;
1793+
break;
1794+
}
1795+
1796+
case 4: { /* array */
1797+
if (unlikely(info == 24 && r->p >= r->end)) { ok = FALSE; break; }
1798+
if (unlikely(info == 25 && (r->end - r->p) < 2)) { ok = FALSE; break; }
1799+
if (unlikely(info == 26 && (r->end - r->p) < 4)) { ok = FALSE; break; }
1800+
if (unlikely(info == 27 && (r->end - r->p) < 8)) { ok = FALSE; break; }
1801+
1802+
mrb_value len_v = read_cbor_uint(mrb, r, info);
1803+
if (unlikely(!mrb_integer_p(len_v))) { ok = FALSE; break; }
1804+
1805+
mrb_int len = mrb_integer(len_v);
1806+
if (unlikely(len < 0)) { ok = FALSE; break; }
1807+
1808+
for (mrb_int i = 0; i < len; i++) {
1809+
if (!skip_cbor_try(mrb, r)) { ok = FALSE; break; }
1810+
}
1811+
break;
1812+
}
1813+
1814+
case 5: { /* map */
1815+
if (unlikely(info == 24 && r->p >= r->end)) { ok = FALSE; break; }
1816+
if (unlikely(info == 25 && (r->end - r->p) < 2)) { ok = FALSE; break; }
1817+
if (unlikely(info == 26 && (r->end - r->p) < 4)) { ok = FALSE; break; }
1818+
if (unlikely(info == 27 && (r->end - r->p) < 8)) { ok = FALSE; break; }
1819+
1820+
mrb_value len_v = read_cbor_uint(mrb, r, info);
1821+
if (unlikely(!mrb_integer_p(len_v))) { ok = FALSE; break; }
1822+
1823+
mrb_int len = mrb_integer(len_v);
1824+
if (unlikely(len < 0)) { ok = FALSE; break; }
1825+
1826+
for (mrb_int i = 0; i < len; i++) {
1827+
if (!skip_cbor_try(mrb, r)) { ok = FALSE; break; }
1828+
if (!skip_cbor_try(mrb, r)) { ok = FALSE; break; }
1829+
}
1830+
break;
1831+
}
1832+
1833+
case 6: {
1834+
if (unlikely(info == 24 && r->p >= r->end)) { ok = FALSE; break; }
1835+
if (unlikely(info == 25 && (r->end - r->p) < 2)) { ok = FALSE; break; }
1836+
if (unlikely(info == 26 && (r->end - r->p) < 4)) { ok = FALSE; break; }
1837+
if (unlikely(info == 27 && (r->end - r->p) < 8)) { ok = FALSE; break; }
1838+
1839+
mrb_value tag = read_cbor_uint(mrb, r, info);
1840+
if (unlikely(!mrb_integer_p(tag))) { ok = FALSE; break; }
1841+
1842+
if (!skip_cbor_try(mrb, r)) ok = FALSE;
1843+
break;
1844+
}
1845+
1846+
case 7: {
1847+
if (info < 24) break;
1848+
switch (info) {
1849+
case 24:
1850+
if (unlikely(r->p >= r->end)) ok = FALSE;
1851+
else r->p++;
1852+
break;
1853+
case 25:
1854+
if (unlikely((r->end - r->p) < 2)) ok = FALSE;
1855+
else r->p += 2;
1856+
break;
1857+
case 26:
1858+
if (unlikely((r->end - r->p) < 4)) ok = FALSE;
1859+
else r->p += 4;
1860+
break;
1861+
case 27:
1862+
if (unlikely((r->end - r->p) < 8)) ok = FALSE;
1863+
else r->p += 8;
1864+
break;
1865+
case 31:
1866+
mrb_raise(mrb, E_NOTIMP_ERROR, "indefinite-length items not supported");
1867+
ok = FALSE;
1868+
break;
1869+
default:
1870+
break;
1871+
}
1872+
break;
1873+
}
1874+
1875+
default:
1876+
mrb_raisef(mrb, E_NOTIMP_ERROR, "Not implemented CBOR major type '%d'", major);
1877+
ok = FALSE;
1878+
}
1879+
1880+
r->depth--;
1881+
return ok;
1882+
}
1883+
1884+
/*
1885+
* Find the byte offset of the end of a complete CBOR document.
1886+
* Returns mrb_fixnum or mrb_nil_value() if incomplete.
1887+
*/
1888+
static mrb_value
1889+
cbor_doc_end(mrb_state *mrb, const uint8_t *buf, size_t buf_len, mrb_int offset)
1890+
{
1891+
if (unlikely(offset < 0 || (size_t)offset >= buf_len))
1892+
return mrb_nil_value();
1893+
1894+
Reader r;
1895+
r.base = buf;
1896+
r.p = buf + offset;
1897+
r.end = buf + buf_len;
1898+
r.depth = 0;
1899+
1900+
if (unlikely(r.p >= r.end))
1901+
return mrb_nil_value();
1902+
1903+
if (!skip_cbor_try(mrb, &r))
1904+
return mrb_nil_value();
1905+
1906+
return mrb_convert_ptrdiff(mrb, (r.p - buf));
1907+
}
1908+
1909+
/* Ruby binding for doc_end */
1910+
static mrb_value
1911+
mrb_cbor_doc_end(mrb_state *mrb, mrb_value self)
1912+
{
1913+
mrb_value buf;
1914+
mrb_int offset = 0;
1915+
(void)self;
1916+
1917+
mrb_get_args(mrb, "S|i", &buf, &offset);
1918+
1919+
const uint8_t *data = (const uint8_t *)RSTRING_PTR(buf);
1920+
size_t len = (size_t)RSTRING_LEN(buf);
1921+
1922+
return cbor_doc_end(mrb, data, len, offset);
1923+
}
1924+
17391925
/* Lazy#value */
17401926
MRB_API mrb_value
17411927
cbor_lazy_value(mrb_state *mrb, mrb_value self)
@@ -2124,6 +2310,7 @@ mrb_mruby_cbor_gem_init(mrb_state* mrb)
21242310
mrb_define_module_function_id(mrb, cbor, MRB_SYM(decode), mrb_cbor_decode, MRB_ARGS_REQ(1));
21252311
mrb_define_module_function_id(mrb, cbor, MRB_SYM(register_tag), mrb_cbor_register_tag, MRB_ARGS_REQ(2));
21262312
mrb_define_module_function_id(mrb, cbor, MRB_SYM(encode), mrb_cbor_encode, MRB_ARGS_REQ(1)|MRB_ARGS_KEY(0, 1));
2313+
mrb_define_module_function_id(mrb, cbor, MRB_SYM(doc_end), mrb_cbor_doc_end, MRB_ARGS_ARG(1, 1));
21272314

21282315
struct RClass *lazy = mrb_define_class_under_id(mrb, cbor, MRB_SYM(Lazy), mrb->object_class);
21292316
MRB_SET_INSTANCE_TT(lazy, MRB_TT_CDATA);

0 commit comments

Comments
 (0)