Skip to content

Commit 3c7c03c

Browse files
committed
option to allow trailing characters while parsing
This adds an option `allowtrailing` to tolerate additional trailing characters in the buffer while parsing json. It is off by default, which keeps the parser strict and tries to parse the entire buffer as json. But when it is switched on, it allows parsing a valid json from the beginning of the buffer and ignore any additional following characters if they are present. This is useful in parsing scenarios that contain multiple json objects without a delimiter. E.g. `{"name": "value"}{"name": "value"}`. Or a json followed by other characters. E.g. `{"name": "value"} : this is...`. This also matches the pre 1.x behavior of this package.
1 parent f4fbb5a commit 3c7c03c

3 files changed

Lines changed: 21 additions & 3 deletions

File tree

src/lazy.jl

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ Currently supported keyword arguments include:
5959
- `inf::String = "Infinity"`: the string that will be used to parse `Inf` if `allownan=true`
6060
- `nan::String = "NaN"`: the string that will be sued to parse `NaN` if `allownan=true`
6161
- `jsonlines::Bool = false`: whether the JSON input should be treated as an implicit array, with newlines separating individual JSON elements with no leading `'['` or trailing `']'` characters. Common in logging or streaming workflows. Defaults to `true` when used with `JSON.parsefile` and the filename extension is `.jsonl` or `ndjson`. Note this ensures that parsing will _always_ return an array at the root-level.
62+
- `allowtrailing::Bool = false`: whether to tolerate trailing characters after parsing a valid JSON element
6263
6364
Note that validation is only fully done on `null`, `true`, and `false`,
6465
while other values are only lazily inferred from the first non-whitespace character:
@@ -80,6 +81,7 @@ function lazy end
8081
inf::String = "Infinity"
8182
nan::String = "NaN"
8283
jsonlines::Bool = false
84+
allowtrailing::Bool = false
8385
end
8486

8587
lazy(io::Union{IO, Base.AbstractCmd}; kw...) = lazy(Base.read(io); kw...)
@@ -163,6 +165,7 @@ getpos(x) = getfield(x, :pos)
163165
gettype(x) = getfield(x, :type)
164166
getopts(x) = getfield(x, :opts)
165167
getisroot(x) = getfield(x, :isroot)
168+
getallowtrailing(x) = getopts(x).allowtrailing
166169

167170
const LazyValues{T} = Union{LazyValue{T}, LazyObject{T}, LazyArray{T}}
168171

@@ -376,7 +379,7 @@ function applyarray(keyvalfunc, x::LazyValues)
376379
# for jsonlines, we need to make sure that recursive
377380
# lazy values *don't* consider individual lines *also*
378381
# to be jsonlines
379-
opts = LazyOptions(; allownan=opts.allownan, ninf=opts.ninf, inf=opts.inf, nan=opts.nan, jsonlines=false)
382+
opts = LazyOptions(; allownan=opts.allownan, ninf=opts.ninf, inf=opts.inf, nan=opts.nan, jsonlines=false, allowtrailing=opts.allowtrailing)
380383
end
381384
i = 1
382385
while true

src/parse.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ parse(x::LazyValue, ::Type{T}=Any; dicttype::Type{O}=DEFAULT_OBJECT_TYPE, null=n
191191

192192
function _parse(x::LazyValue, ::Type{T}, dicttype::Type{O}, null, style::StructStyle) where {T,O}
193193
y, pos = StructUtils.make(style, T, x)
194-
getisroot(x) && checkendpos(x, T, pos)
194+
getisroot(x) && !getallowtrailing(x) && checkendpos(x, T, pos)
195195
return y
196196
end
197197

@@ -205,7 +205,7 @@ end
205205
function _parse(x::LazyValue, ::Type{Any}, ::Type{DEFAULT_OBJECT_TYPE}, null, ::StructStyle)
206206
out = ValueClosure()
207207
pos = applyvalue(out, x, null)
208-
getisroot(x) && checkendpos(x, Any, pos)
208+
getisroot(x) && !getallowtrailing(x) && checkendpos(x, Any, pos)
209209
return out.value
210210
end
211211

test/parse.jl

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -771,3 +771,18 @@ end
771771
@test_throws ArgumentError JSON.parse("{}", Tuple{Int, Int, Int})
772772
@test_throws ArgumentError JSON.parse("{\"a\":1,\"b\":2}", Tuple{Int, Int, Int})
773773
end
774+
775+
@testset "allowtrailing" begin
776+
# default behavior: trailing content causes an error
777+
@test_throws ArgumentError JSON.parse("{\"hello\": \"world\"} asdaa")
778+
@test_throws ArgumentError JSON.parse("[1,2,3] extra")
779+
@test_throws ArgumentError JSON.parse("123 {}")
780+
781+
# allowtrailing=true: trailing content is ignored
782+
@test JSON.parse("{\"hello\": \"world\"} asdaa", allowtrailing=true) == JSON.Object("hello" => "world")
783+
@test JSON.parse("[1,2,3] extra", allowtrailing=true) == Any[1, 2, 3]
784+
@test JSON.parse("123 {}", allowtrailing=true) == 123
785+
786+
# allowtrailing=true with typed parse
787+
@test JSON.parse("{\"a\": 1, \"b\": 2.0, \"c\": \"hi\"} trailing", D; allowtrailing=true) == D(1, 2.0, "hi")
788+
end

0 commit comments

Comments
 (0)