11module CacheStore
2- using .. SymbolServer: VarRef, FakeTypeName, FakeTypeofBottom, FakeTypeVar, FakeUnion, FakeUnionAll
2+ using .. SymbolServer: VarRef, FakeTypeName, FakeTypeofBottom, FakeTypeVar, FakeUnion, FakeUnionAll, Unserializable
33using .. SymbolServer: ModuleStore, Package, FunctionStore, MethodStore, DataTypeStore, GenericStore
44@static if ! (Vararg isa Type)
55 using .. SymbolServer: FakeTypeofVararg
@@ -27,6 +27,7 @@ const FalseHeader = 0x13
2727const TupleHeader = 0x14
2828const FakeTypeofVarargHeader = 0x15
2929const UndefHeader = 0x16
30+ const UnserializableHeader = 0x17
3031
3132# reserve 0x00-0xfe for type headers and indicate that this file is binary by not starting
3233# with ASCII
@@ -50,6 +51,11 @@ function _check_len(io, n)
5051end
5152
5253const MAX_DEPTH = 256
54+ const MAX_TUPLE_LEN = 10_000
55+ # cutoff for degrading over-deep subtrees in Any-typed slots; the margin leaves
56+ # room for typed substructure below the last Any slot (VarRef chains etc.) so
57+ # written files always stay within the reader's MAX_DEPTH
58+ const MAX_ANY_DEPTH = MAX_DEPTH - 64
5359
5460function write (io, x)
5561 _write_header (io)
@@ -61,6 +67,16 @@ function _write_header(io)
6167 Base. write (io, StoreVersion)
6268end
6369
70+ # for values in Any-typed slots, which can absorb the sentinel: cut cycles and
71+ # over-deep nesting here instead of erroring out of the whole write
72+ function _write_any (io, x, depth:: Int )
73+ if depth > MAX_ANY_DEPTH
74+ Base. write (io, UnserializableHeader)
75+ return
76+ end
77+ _write (io, x, depth)
78+ end
79+
6480function _write (io, x:: VarRef , depth:: Int )
6581 depth > MAX_DEPTH && throw (ArgumentError (" serialization depth limit exceeded — possible cycle in $(typeof (x)) " ))
6682 depth += 1
@@ -88,12 +104,16 @@ function _write(io, x::Symbol, depth::Int)
88104 Base. write (io, String (x))
89105end
90106function _write (io, x:: NTuple{N,Any} , depth:: Int ) where N
107+ if N > MAX_TUPLE_LEN
108+ Base. write (io, UnserializableHeader)
109+ return
110+ end
91111 depth > MAX_DEPTH && throw (ArgumentError (" serialization depth limit exceeded — possible cycle in $(typeof (x)) " ))
92112 depth += 1
93113 Base. write (io, TupleHeader)
94114 Base. write (io, N)
95115 for i = 1 : N
96- _write (io, x[i], depth)
116+ _write_any (io, x[i], depth)
97117 end
98118end
99119function _write (io, x:: String , depth:: Int )
@@ -114,31 +134,31 @@ function _write(io, x::FakeTypeVar, depth::Int)
114134 depth += 1
115135 Base. write (io, FakeTypeVarHeader)
116136 _write (io, x. name, depth)
117- _write (io, x. lb, depth)
118- _write (io, x. ub, depth)
137+ _write_any (io, x. lb, depth)
138+ _write_any (io, x. ub, depth)
119139end
120140function _write (io, x:: FakeUnion , depth:: Int )
121141 depth > MAX_DEPTH && throw (ArgumentError (" serialization depth limit exceeded — possible cycle in $(typeof (x)) " ))
122142 depth += 1
123143 Base. write (io, FakeUnionHeader)
124- _write (io, x. a, depth)
125- _write (io, x. b, depth)
144+ _write_any (io, x. a, depth)
145+ _write_any (io, x. b, depth)
126146end
127147function _write (io, x:: FakeUnionAll , depth:: Int )
128148 depth > MAX_DEPTH && throw (ArgumentError (" serialization depth limit exceeded — possible cycle in $(typeof (x)) " ))
129149 depth += 1
130150 Base. write (io, FakeUnionAllHeader)
131151 _write (io, x. var, depth)
132- _write (io, x. body, depth)
152+ _write_any (io, x. body, depth)
133153end
134154
135155@static if ! (Vararg isa Type)
136156 function _write (io, x:: FakeTypeofVararg , depth:: Int )
137157 depth > MAX_DEPTH && throw (ArgumentError (" serialization depth limit exceeded — possible cycle in $(typeof (x)) " ))
138158 depth += 1
139159 Base. write (io, FakeTypeofVarargHeader)
140- isdefined (x, :T ) ? _write (io, x. T, depth) : Base. write (io, UndefHeader)
141- isdefined (x, :N ) ? _write (io, x. N, depth) : Base. write (io, UndefHeader)
160+ isdefined (x, :T ) ? _write_any (io, x. T, depth) : Base. write (io, UndefHeader)
161+ isdefined (x, :N ) ? _write_any (io, x. N, depth) : Base. write (io, UndefHeader)
142162 end
143163end
144164
@@ -152,11 +172,11 @@ function _write(io, x::MethodStore, depth::Int)
152172 Base. write (io, x. line)
153173 Base. write (io, length (x. sig))
154174 for p in x. sig
155- _write (io, p[1 ], depth)
156- _write (io, p[2 ], depth)
175+ _write_any (io, p[1 ], depth)
176+ _write_any (io, p[2 ], depth)
157177 end
158178 _write_vector (io, x. kws, depth)
159- _write (io, x. rt, depth)
179+ _write_any (io, x. rt, depth)
160180end
161181
162182function _write (io, x:: FunctionStore , depth:: Int )
@@ -189,7 +209,7 @@ function _write(io, x::GenericStore, depth::Int)
189209 depth += 1
190210 Base. write (io, GenericStoreHeader)
191211 _write (io, x. name, depth)
192- _write (io, x. typ, depth)
212+ _write_any (io, x. typ, depth)
193213 _write (io, x. doc, depth)
194214 _write (io, x. exported, depth)
195215end
@@ -200,9 +220,21 @@ function _write(io, x::ModuleStore, depth::Int)
200220 Base. write (io, ModuleStoreHeader)
201221 _write (io, x. name, depth)
202222 Base. write (io, length (x. vals))
223+ buf = IOBuffer ()
203224 for p in x. vals
204225 _write (io, p[1 ], depth)
205- _write (io, p[2 ], depth)
226+ # serialize the value out-of-band so one pathological binding (cycle,
227+ # unserializable type) degrades to a sentinel — which the reader drops —
228+ # instead of failing the whole cache file
229+ try
230+ _write_any (buf, p[2 ], depth)
231+ Base. write (io, take! (buf))
232+ catch err
233+ take! (buf)
234+ (err isa InterruptException || err isa OutOfMemoryError || err isa StackOverflowError) && rethrow ()
235+ @debug " skipping unserializable module binding" key = p[1 ] exception = err
236+ Base. write (io, UnserializableHeader)
237+ end
206238 end
207239 _write (io, x. doc, depth)
208240 _write (io, x. exported, depth)
223255function _write_vector (io, x, depth:: Int )
224256 Base. write (io, length (x))
225257 depth += 1
258+ # only Any-typed elements may degrade to the sentinel; in typed vectors it
259+ # would come back as `nothing` and fail the read
260+ elwrite = eltype (x) === Any ? _write_any : _write
226261 for p in x
227- _write (io, p, depth)
262+ elwrite (io, p, depth)
228263 end
229264end
230265
@@ -237,10 +272,16 @@ function read(io)
237272 _read_header (io)
238273 return _read (io)
239274 catch err
240- if err isa EOFError
275+ if err isa CacheCorruptedError || err isa InterruptException || err isa OutOfMemoryError || err isa StackOverflowError
276+ rethrow ()
277+ elseif err isa EOFError
241278 throw (CacheCorruptedError (" unexpected end of stream" ))
279+ else
280+ # crafted tags can type-confuse strictly typed struct fields
281+ # (MethodError/TypeError), produce invalid code points, etc. —
282+ # all of it means the file is bad, not that the process is
283+ throw (CacheCorruptedError (" malformed cache data: $(typeof (err)) " ))
242284 end
243- rethrow ()
244285 end
245286end
246287
@@ -304,6 +345,8 @@ function _read(io, t = Base.read(io, UInt8), depth::Int = 0)
304345 end
305346 elseif t === UndefHeader
306347 nothing
348+ elseif t === UnserializableHeader
349+ Unserializable ()
307350 elseif t === MethodStoreHeader
308351 yield ()
309352 name = _read (io, Base. read (io, UInt8), depth)
@@ -357,8 +400,10 @@ function _read(io, t = Base.read(io, UInt8), depth::Int = 0)
357400 sizehint! (vals, n)
358401 for _ = 1 : n
359402 k = _read (io, Base. read (io, UInt8), depth)
360- v = _read (io, Base. read (io, UInt8), depth)
361- vals[k] = v
403+ vt = Base. read (io, UInt8)
404+ # drop bindings the writer couldn't serialize
405+ vt === UnserializableHeader && continue
406+ vals[k] = _read (io, vt, depth)
362407 end
363408 doc = _read (io, Base. read (io, UInt8), depth)
364409 exported = _read (io, Base. read (io, UInt8), depth)
@@ -372,13 +417,16 @@ function _read(io, t = Base.read(io, UInt8), depth::Int = 0)
372417 elseif t === TupleHeader
373418 N = Base. read (io, Int)
374419 _check_len (io, N)
420+ N > MAX_TUPLE_LEN && throw (CacheCorruptedError (" tuple length $N exceeds limit $MAX_TUPLE_LEN " ))
375421 ntuple (i-> _read (io, Base. read (io, UInt8), depth), N)
376422 elseif t === PackageHeader
377423 yield ()
378424 name = _read (io, Base. read (io, UInt8), depth)
379425 val = _read (io, Base. read (io, UInt8), depth)
380426 uuid = Base. UUID (Base. read (io, UInt128))
381- sha = Base. read (io, 32 )
427+ # Base.read(io, n) returns short on EOF; read! errors instead
428+ sha = Vector {UInt8} (undef, 32 )
429+ read! (io, sha)
382430 Package (name, val, uuid, all (x == 0x00 for x in sha) ? nothing : sha)
383431 else
384432 throw (CacheCorruptedError (" unknown type tag: 0x$(string (t, base= 16 , pad= 2 )) " ))
0 commit comments