Skip to content

Commit ccdda2f

Browse files
authored
Merge pull request #108 from julia-vscode/sp/serialize-robustness
feat(symbolserver): increase (de)serialization robustness
2 parents 6fd45ab + 7269238 commit ccdda2f

10 files changed

Lines changed: 217 additions & 25 deletions

File tree

shared/symbolserver/faketypes.jl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@ function FakeTypeName(@nospecialize(x))
4242
end
4343

4444
struct FakeTypeofBottom end
45+
# stands in for values the cache writer could not serialize (oversized tuples,
46+
# over-deep/cyclic subtrees)
47+
struct Unserializable end
48+
Base.show(io::IO, ::Unserializable) = print(io, "")
4549
struct FakeUnion
4650
a
4751
b

shared/symbolserver/serialize.jl

Lines changed: 68 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
module CacheStore
2-
using ..SymbolServer: VarRef, FakeTypeName, FakeTypeofBottom, FakeTypeVar, FakeUnion, FakeUnionAll
2+
using ..SymbolServer: VarRef, FakeTypeName, FakeTypeofBottom, FakeTypeVar, FakeUnion, FakeUnionAll, Unserializable
33
using ..SymbolServer: ModuleStore, Package, FunctionStore, MethodStore, DataTypeStore, GenericStore
44
@static if !(Vararg isa Type)
55
using ..SymbolServer: FakeTypeofVararg
@@ -27,6 +27,7 @@ const FalseHeader = 0x13
2727
const TupleHeader = 0x14
2828
const FakeTypeofVarargHeader = 0x15
2929
const UndefHeader = 0x16
30+
const UnserializableHeader = 0x17
3031

3132
# reserve 0x00-0xfe for type headers and indicate that this file is binary by not starting
3233
# with ASCII
@@ -50,6 +51,11 @@ function _check_len(io, n)
5051
end
5152

5253
const MAX_DEPTH = 256
54+
const MAX_TUPLE_LEN = 10_000
55+
# cutoff for degrading over-deep subtrees in Any-typed slots; the margin leaves
56+
# room for typed substructure below the last Any slot (VarRef chains etc.) so
57+
# written files always stay within the reader's MAX_DEPTH
58+
const MAX_ANY_DEPTH = MAX_DEPTH - 64
5359

5460
function write(io, x)
5561
_write_header(io)
@@ -61,6 +67,16 @@ function _write_header(io)
6167
Base.write(io, StoreVersion)
6268
end
6369

70+
# for values in Any-typed slots, which can absorb the sentinel: cut cycles and
71+
# over-deep nesting here instead of erroring out of the whole write
72+
function _write_any(io, x, depth::Int)
73+
if depth > MAX_ANY_DEPTH
74+
Base.write(io, UnserializableHeader)
75+
return
76+
end
77+
_write(io, x, depth)
78+
end
79+
6480
function _write(io, x::VarRef, depth::Int)
6581
depth > MAX_DEPTH && throw(ArgumentError("serialization depth limit exceeded — possible cycle in $(typeof(x))"))
6682
depth += 1
@@ -88,12 +104,16 @@ function _write(io, x::Symbol, depth::Int)
88104
Base.write(io, String(x))
89105
end
90106
function _write(io, x::NTuple{N,Any}, depth::Int) where N
107+
if N > MAX_TUPLE_LEN
108+
Base.write(io, UnserializableHeader)
109+
return
110+
end
91111
depth > MAX_DEPTH && throw(ArgumentError("serialization depth limit exceeded — possible cycle in $(typeof(x))"))
92112
depth += 1
93113
Base.write(io, TupleHeader)
94114
Base.write(io, N)
95115
for i = 1:N
96-
_write(io, x[i], depth)
116+
_write_any(io, x[i], depth)
97117
end
98118
end
99119
function _write(io, x::String, depth::Int)
@@ -114,31 +134,31 @@ function _write(io, x::FakeTypeVar, depth::Int)
114134
depth += 1
115135
Base.write(io, FakeTypeVarHeader)
116136
_write(io, x.name, depth)
117-
_write(io, x.lb, depth)
118-
_write(io, x.ub, depth)
137+
_write_any(io, x.lb, depth)
138+
_write_any(io, x.ub, depth)
119139
end
120140
function _write(io, x::FakeUnion, depth::Int)
121141
depth > MAX_DEPTH && throw(ArgumentError("serialization depth limit exceeded — possible cycle in $(typeof(x))"))
122142
depth += 1
123143
Base.write(io, FakeUnionHeader)
124-
_write(io, x.a, depth)
125-
_write(io, x.b, depth)
144+
_write_any(io, x.a, depth)
145+
_write_any(io, x.b, depth)
126146
end
127147
function _write(io, x::FakeUnionAll, depth::Int)
128148
depth > MAX_DEPTH && throw(ArgumentError("serialization depth limit exceeded — possible cycle in $(typeof(x))"))
129149
depth += 1
130150
Base.write(io, FakeUnionAllHeader)
131151
_write(io, x.var, depth)
132-
_write(io, x.body, depth)
152+
_write_any(io, x.body, depth)
133153
end
134154

135155
@static if !(Vararg isa Type)
136156
function _write(io, x::FakeTypeofVararg, depth::Int)
137157
depth > MAX_DEPTH && throw(ArgumentError("serialization depth limit exceeded — possible cycle in $(typeof(x))"))
138158
depth += 1
139159
Base.write(io, FakeTypeofVarargHeader)
140-
isdefined(x, :T) ? _write(io, x.T, depth) : Base.write(io, UndefHeader)
141-
isdefined(x, :N) ? _write(io, x.N, depth) : Base.write(io, UndefHeader)
160+
isdefined(x, :T) ? _write_any(io, x.T, depth) : Base.write(io, UndefHeader)
161+
isdefined(x, :N) ? _write_any(io, x.N, depth) : Base.write(io, UndefHeader)
142162
end
143163
end
144164

@@ -152,11 +172,11 @@ function _write(io, x::MethodStore, depth::Int)
152172
Base.write(io, x.line)
153173
Base.write(io, length(x.sig))
154174
for p in x.sig
155-
_write(io, p[1], depth)
156-
_write(io, p[2], depth)
175+
_write_any(io, p[1], depth)
176+
_write_any(io, p[2], depth)
157177
end
158178
_write_vector(io, x.kws, depth)
159-
_write(io, x.rt, depth)
179+
_write_any(io, x.rt, depth)
160180
end
161181

162182
function _write(io, x::FunctionStore, depth::Int)
@@ -189,7 +209,7 @@ function _write(io, x::GenericStore, depth::Int)
189209
depth += 1
190210
Base.write(io, GenericStoreHeader)
191211
_write(io, x.name, depth)
192-
_write(io, x.typ, depth)
212+
_write_any(io, x.typ, depth)
193213
_write(io, x.doc, depth)
194214
_write(io, x.exported, depth)
195215
end
@@ -200,9 +220,21 @@ function _write(io, x::ModuleStore, depth::Int)
200220
Base.write(io, ModuleStoreHeader)
201221
_write(io, x.name, depth)
202222
Base.write(io, length(x.vals))
223+
buf = IOBuffer()
203224
for p in x.vals
204225
_write(io, p[1], depth)
205-
_write(io, p[2], depth)
226+
# serialize the value out-of-band so one pathological binding (cycle,
227+
# unserializable type) degrades to a sentinel — which the reader drops —
228+
# instead of failing the whole cache file
229+
try
230+
_write_any(buf, p[2], depth)
231+
Base.write(io, take!(buf))
232+
catch err
233+
take!(buf)
234+
(err isa InterruptException || err isa OutOfMemoryError || err isa StackOverflowError) && rethrow()
235+
@debug "skipping unserializable module binding" key = p[1] exception = err
236+
Base.write(io, UnserializableHeader)
237+
end
206238
end
207239
_write(io, x.doc, depth)
208240
_write(io, x.exported, depth)
@@ -223,8 +255,11 @@ end
223255
function _write_vector(io, x, depth::Int)
224256
Base.write(io, length(x))
225257
depth += 1
258+
# only Any-typed elements may degrade to the sentinel; in typed vectors it
259+
# would come back as `nothing` and fail the read
260+
elwrite = eltype(x) === Any ? _write_any : _write
226261
for p in x
227-
_write(io, p, depth)
262+
elwrite(io, p, depth)
228263
end
229264
end
230265

@@ -237,10 +272,16 @@ function read(io)
237272
_read_header(io)
238273
return _read(io)
239274
catch err
240-
if err isa EOFError
275+
if err isa CacheCorruptedError || err isa InterruptException || err isa OutOfMemoryError || err isa StackOverflowError
276+
rethrow()
277+
elseif err isa EOFError
241278
throw(CacheCorruptedError("unexpected end of stream"))
279+
else
280+
# crafted tags can type-confuse strictly typed struct fields
281+
# (MethodError/TypeError), produce invalid code points, etc. —
282+
# all of it means the file is bad, not that the process is
283+
throw(CacheCorruptedError("malformed cache data: $(typeof(err))"))
242284
end
243-
rethrow()
244285
end
245286
end
246287

@@ -304,6 +345,8 @@ function _read(io, t = Base.read(io, UInt8), depth::Int = 0)
304345
end
305346
elseif t === UndefHeader
306347
nothing
348+
elseif t === UnserializableHeader
349+
Unserializable()
307350
elseif t === MethodStoreHeader
308351
yield()
309352
name = _read(io, Base.read(io, UInt8), depth)
@@ -357,8 +400,10 @@ function _read(io, t = Base.read(io, UInt8), depth::Int = 0)
357400
sizehint!(vals, n)
358401
for _ = 1:n
359402
k = _read(io, Base.read(io, UInt8), depth)
360-
v = _read(io, Base.read(io, UInt8), depth)
361-
vals[k] = v
403+
vt = Base.read(io, UInt8)
404+
# drop bindings the writer couldn't serialize
405+
vt === UnserializableHeader && continue
406+
vals[k] = _read(io, vt, depth)
362407
end
363408
doc = _read(io, Base.read(io, UInt8), depth)
364409
exported = _read(io, Base.read(io, UInt8), depth)
@@ -372,13 +417,16 @@ function _read(io, t = Base.read(io, UInt8), depth::Int = 0)
372417
elseif t === TupleHeader
373418
N = Base.read(io, Int)
374419
_check_len(io, N)
420+
N > MAX_TUPLE_LEN && throw(CacheCorruptedError("tuple length $N exceeds limit $MAX_TUPLE_LEN"))
375421
ntuple(i->_read(io, Base.read(io, UInt8), depth), N)
376422
elseif t === PackageHeader
377423
yield()
378424
name = _read(io, Base.read(io, UInt8), depth)
379425
val = _read(io, Base.read(io, UInt8), depth)
380426
uuid = Base.UUID(Base.read(io, UInt128))
381-
sha = Base.read(io, 32)
427+
# Base.read(io, n) returns short on EOF; read! errors instead
428+
sha = Vector{UInt8}(undef, 32)
429+
read!(io, sha)
382430
Package(name, val, uuid, all(x == 0x00 for x in sha) ? nothing : sha)
383431
else
384432
throw(CacheCorruptedError("unknown type tag: 0x$(string(t, base=16, pad=2))"))

shared/symbolserver/utils.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,9 @@ function _doc(binding::Base.Docs.Binding)
242242
end
243243
end
244244

245+
# fallback for type refs that can't name a store entry (FakeTypeVar,
246+
# FakeTypeofBottom, Unserializable, ...)
247+
_lookup(vr, depot::EnvStore, cont=false) = nothing
245248
_lookup(vr::FakeUnion, depot::EnvStore, cont=false) = nothing
246249
_lookup(vr::FakeTypeName, depot::EnvStore, cont=false) = _lookup(vr.name, depot, cont)
247250
_lookup(vr::FakeUnionAll, depot::EnvStore, cont=false) = _lookup(vr.body, depot, cont)

test/test_completions.jl

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
@testitem "Completions: latex completions" begin
2+
using JuliaWorkspaces: JuliaWorkspace, add_file!, TextFile, SourceText, get_completions
23
using JuliaWorkspaces.URIs2: URI
34

45
project_toml = """
@@ -51,6 +52,7 @@
5152
end
5253

5354
@testitem "Completions: keyword / snippet completions" begin
55+
using JuliaWorkspaces: JuliaWorkspaces, JuliaWorkspace, add_file!, TextFile, SourceText, get_completions, InsertFormats
5456
using JuliaWorkspaces.URIs2: URI
5557

5658
project_toml = """
@@ -101,6 +103,7 @@ end
101103
end
102104

103105
@testitem "Completions: getfield completions" begin
106+
using JuliaWorkspaces: JuliaWorkspace, add_file!, TextFile, SourceText, get_completions
104107
using JuliaWorkspaces.URIs2: URI
105108

106109
project_toml = """
@@ -148,6 +151,7 @@ end
148151
end
149152

150153
@testitem "Completions: getfield partial completions" begin
154+
using JuliaWorkspaces: JuliaWorkspace, add_file!, TextFile, SourceText, get_completions
151155
using JuliaWorkspaces.URIs2: URI
152156

153157
project_toml = """
@@ -195,6 +199,7 @@ end
195199
end
196200

197201
@testitem "Completions: token completions" begin
202+
using JuliaWorkspaces: JuliaWorkspace, add_file!, TextFile, SourceText, get_completions
198203
using JuliaWorkspaces.URIs2: URI
199204

200205
project_toml = """
@@ -242,6 +247,7 @@ end
242247
end
243248

244249
@testitem "Completions: scope variable completions" begin
250+
using JuliaWorkspaces: JuliaWorkspace, add_file!, TextFile, SourceText, get_completions
245251
using JuliaWorkspaces.URIs2: URI
246252

247253
project_toml = """
@@ -290,6 +296,7 @@ end
290296
end
291297

292298
@testitem "Completions: import completions" begin
299+
using JuliaWorkspaces: JuliaWorkspace, add_file!, TextFile, SourceText, get_completions, CompletionResult
293300
using JuliaWorkspaces.URIs2: URI
294301

295302
project_toml = """
@@ -339,6 +346,8 @@ end
339346
end
340347

341348
@testitem "Completions: is_completion_match" begin
349+
using JuliaWorkspaces: is_completion_match
350+
342351
# Test the exported fuzzy matching util
343352
@test is_completion_match("rand", "ran")
344353
@test is_completion_match("Base", "Bas")
@@ -351,6 +360,7 @@ end
351360
end
352361

353362
@testitem "Completions: empty result for empty file" begin
363+
using JuliaWorkspaces: JuliaWorkspace, add_file!, TextFile, SourceText, get_completions, CompletionResult
354364
using JuliaWorkspaces.URIs2: URI
355365

356366
project_toml = """
@@ -384,6 +394,7 @@ end
384394
end
385395

386396
@testitem "Completions: completion kinds" begin
397+
using JuliaWorkspaces: JuliaWorkspace, add_file!, TextFile, SourceText, get_completions, CompletionKinds
387398
using JuliaWorkspaces.URIs2: URI
388399

389400
project_toml = """
@@ -435,6 +446,7 @@ end
435446
end
436447

437448
@testitem "Completions: relative import completions" begin
449+
using JuliaWorkspaces: JuliaWorkspace, add_file!, TextFile, SourceText, get_completions
438450
using JuliaWorkspaces.URIs2: URI
439451

440452
project_toml = """
@@ -483,6 +495,7 @@ end
483495
end
484496

485497
@testitem "Completions: standalone file (no project)" begin
498+
using JuliaWorkspaces: JuliaWorkspace, add_file!, TextFile, SourceText, get_completions
486499
using JuliaWorkspaces.URIs2: URI
487500

488501
# No Project.toml or Manifest.toml — exercises _stdlib_only_env() path.
@@ -527,6 +540,7 @@ end
527540
end
528541

529542
@testitem "Completions: package without manifest (pre-DJP)" begin
543+
using JuliaWorkspaces: JuliaWorkspace, add_file!, TextFile, SourceText, get_completions
530544
using JuliaWorkspaces.URIs2: URI
531545

532546
# Project.toml present but no Manifest.toml — pre-DJP state.
@@ -578,6 +592,7 @@ end
578592
end
579593

580594
@testitem "Completions: unresolvable VarRef does not truncate module symbols" begin
595+
using JuliaWorkspaces: JuliaWorkspaces, SourceText
581596
using JuliaWorkspaces.URIs2: @uri_str
582597
const SS = JuliaWorkspaces.SymbolServer
583598
const SL = JuliaWorkspaces.StaticLint

0 commit comments

Comments
 (0)