Skip to content

Commit 12e83a4

Browse files
authored
Add pickle package for generating exploit payloads (#587)
* feat: add pickle package for generating exploit payloads
1 parent d75d8bd commit 12e83a4

6 files changed

Lines changed: 972 additions & 0 deletions

File tree

pickle/disasm.go

Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
package pickle
2+
3+
import (
4+
"bytes"
5+
"errors"
6+
"fmt"
7+
"io"
8+
)
9+
10+
// Op is a decoded pickle opcode. Arg is nil for no-arg opcodes; a
11+
// string for BINUNICODE / SHORT_BINUNICODE / GLOBAL; a []byte for
12+
// BINBYTES / SHORT_BINBYTES; an int for BININTx / PROTO / FRAME.
13+
// GLOBAL packs (module, attr) into a [2]string.
14+
type Op struct {
15+
Name string
16+
Code byte
17+
Arg any
18+
}
19+
20+
// String formats an Op the way pickletools.dis does.
21+
func (o Op) String() string {
22+
if o.Arg == nil {
23+
return o.Name
24+
}
25+
switch v := o.Arg.(type) {
26+
case string:
27+
return fmt.Sprintf("%s %q", o.Name, v)
28+
case [2]string:
29+
return fmt.Sprintf("%s %q %q", o.Name, v[0], v[1])
30+
case []byte:
31+
return fmt.Sprintf("%s %q", o.Name, v)
32+
case int:
33+
return fmt.Sprintf("%s %d", o.Name, v)
34+
default:
35+
return fmt.Sprintf("%s %v", o.Name, v)
36+
}
37+
}
38+
39+
// Sentinel errors for errors.Is matching.
40+
var (
41+
ErrUnknownOpcode = errors.New("pickle: unknown opcode")
42+
ErrTruncated = errors.New("pickle: truncated stream")
43+
)
44+
45+
// noArgOps maps opcodes that take no argument to their symbolic name.
46+
var noArgOps = map[byte]string{
47+
OpStop: "STOP",
48+
OpMark: "MARK",
49+
OpReduce: "REDUCE",
50+
OpBuild: "BUILD",
51+
OpEmptyTuple: "EMPTY_TUPLE",
52+
OpTuple: "TUPLE",
53+
OpTuple1: "TUPLE1",
54+
OpTuple2: "TUPLE2",
55+
OpTuple3: "TUPLE3",
56+
OpEmptyList: "EMPTY_LIST",
57+
OpEmptyDict: "EMPTY_DICT",
58+
OpAppend: "APPEND",
59+
OpAppends: "APPENDS",
60+
OpSetItem: "SETITEM",
61+
OpSetItems: "SETITEMS",
62+
OpNewTrue: "NEWTRUE",
63+
OpNewFalse: "NEWFALSE",
64+
OpNone: "NONE",
65+
OpMemoize: "MEMOIZE",
66+
OpDup: "DUP",
67+
OpPop: "POP",
68+
OpPopMark: "POP_MARK",
69+
}
70+
71+
// Disassemble walks a pickle stream into a list of named opcodes.
72+
// Returns an error on truncated input or unknown opcodes rather than
73+
// silently skipping.
74+
func Disassemble(data []byte) ([]Op, error) {
75+
r := bytes.NewReader(data)
76+
ops := []Op{}
77+
78+
for r.Len() > 0 {
79+
code, err := r.ReadByte()
80+
if err != nil {
81+
return ops, fmt.Errorf("read opcode: %w", err)
82+
}
83+
op, err := decodeOp(code, r)
84+
if err != nil {
85+
return ops, fmt.Errorf("at byte %d (opcode 0x%02x): %w",
86+
len(data)-r.Len()-1, code, err)
87+
}
88+
ops = append(ops, op)
89+
if code == OpStop {
90+
break
91+
}
92+
}
93+
94+
return ops, nil
95+
}
96+
97+
// argSpec describes how to decode an opcode's argument. read pulls the
98+
// arg value from r and returns it ready to drop into Op.Arg. Centralising
99+
// the (name, reader) pair as data lets decodeOp stay a one-liner lookup.
100+
type argSpec struct {
101+
name string
102+
read func(*bytes.Reader) (any, error)
103+
}
104+
105+
// argOps maps every opcode that carries an argument to its spec. The
106+
// no-arg opcodes live in noArgOps above.
107+
var argOps = map[byte]argSpec{
108+
OpProto: {"PROTO", readUintArg(1)},
109+
OpBinPut: {"BINPUT", readUintArg(1)},
110+
OpBinGet: {"BINGET", readUintArg(1)},
111+
OpBinInt1: {"BININT1", readUintArg(1)},
112+
OpBinInt2: {"BININT2", readUintArg(2)},
113+
OpBinInt: {"BININT", readSignedInt32Arg},
114+
OpFrame: {"FRAME", readUintArg(8)},
115+
OpGlobal: {"GLOBAL", readGlobalArg},
116+
OpBinUnicode: {"BINUNICODE", readStringArg(4)},
117+
OpShortBinUnicode: {"SHORT_BINUNICODE", readStringArg(1)},
118+
OpBinUnicode8: {"BINUNICODE8", readStringArg(8)},
119+
OpBinBytes: {"BINBYTES", readBytesArg(4)},
120+
OpShortBinBytes: {"SHORT_BINBYTES", readBytesArg(1)},
121+
}
122+
123+
// decodeOp dispatches an opcode to its argument reader.
124+
func decodeOp(code byte, r *bytes.Reader) (Op, error) {
125+
if name, ok := noArgOps[code]; ok {
126+
return Op{Name: name, Code: code}, nil
127+
}
128+
spec, ok := argOps[code]
129+
if !ok {
130+
return Op{}, fmt.Errorf("%w: 0x%02x", ErrUnknownOpcode, code)
131+
}
132+
arg, err := spec.read(r)
133+
if err != nil {
134+
return Op{}, fmt.Errorf("%w reading %s arg: %w", ErrTruncated, spec.name, err)
135+
}
136+
137+
return Op{Name: spec.name, Code: code, Arg: arg}, nil
138+
}
139+
140+
// readUintArg returns a reader that pulls byteCount little-endian bytes
141+
// as an unsigned int.
142+
func readUintArg(byteCount int) func(*bytes.Reader) (any, error) {
143+
return func(r *bytes.Reader) (any, error) {
144+
u, err := readUint(r, byteCount)
145+
146+
return int(u), err
147+
}
148+
}
149+
150+
// readSignedInt32Arg pulls 4 LE bytes as a signed int (BININT).
151+
func readSignedInt32Arg(r *bytes.Reader) (any, error) {
152+
u, err := readUint(r, 4)
153+
154+
return int(int32(u)), err
155+
}
156+
157+
// readStringArg / readBytesArg share readPrefixedBytes; they differ only
158+
// in how the payload is typed.
159+
func readStringArg(lenBytes int) func(*bytes.Reader) (any, error) {
160+
return func(r *bytes.Reader) (any, error) {
161+
body, err := readPrefixedBytes(r, lenBytes)
162+
163+
return string(body), err
164+
}
165+
}
166+
167+
func readBytesArg(lenBytes int) func(*bytes.Reader) (any, error) {
168+
return func(r *bytes.Reader) (any, error) {
169+
body, err := readPrefixedBytes(r, lenBytes)
170+
171+
return body, err
172+
}
173+
}
174+
175+
// readGlobalArg pulls (module, attr) as two newline-terminated strings.
176+
func readGlobalArg(r *bytes.Reader) (any, error) {
177+
mod, err := readLine(r)
178+
if err != nil {
179+
return nil, err
180+
}
181+
attr, err := readLine(r)
182+
if err != nil {
183+
return nil, err
184+
}
185+
186+
return [2]string{mod, attr}, nil
187+
}
188+
189+
// readUint reads byteCount little-endian bytes as a uint64. byteCount
190+
// must be 1, 2, 4 or 8 (the widths used by every pickle integer /
191+
// length opcode).
192+
func readUint(r *bytes.Reader, byteCount int) (uint64, error) {
193+
if byteCount < 1 || byteCount > 8 {
194+
return 0, fmt.Errorf("%w: invalid byte count %d", ErrTruncated, byteCount)
195+
}
196+
buf := make([]byte, byteCount)
197+
if _, err := io.ReadFull(r, buf); err != nil {
198+
return 0, fmt.Errorf("read %d bytes: %w", byteCount, err)
199+
}
200+
var v uint64
201+
for i := byteCount - 1; i >= 0; i-- {
202+
v = v<<8 | uint64(buf[i])
203+
}
204+
205+
return v, nil
206+
}
207+
208+
func readLine(r *bytes.Reader) (string, error) {
209+
var buf bytes.Buffer
210+
for {
211+
b, err := r.ReadByte()
212+
if err != nil {
213+
return "", fmt.Errorf("read line byte: %w", err)
214+
}
215+
if b == '\n' {
216+
return buf.String(), nil
217+
}
218+
buf.WriteByte(b)
219+
}
220+
}
221+
222+
// readPrefixedBytes reads a 1/4/8-byte LE length then that many bytes.
223+
func readPrefixedBytes(r *bytes.Reader, lenBytes int) ([]byte, error) {
224+
n, err := readUint(r, lenBytes)
225+
if err != nil {
226+
return nil, fmt.Errorf("read length: %w", err)
227+
}
228+
if n > uint64(r.Len()) {
229+
return nil, fmt.Errorf("%w: length %d exceeds remaining %d", ErrTruncated, n, r.Len())
230+
}
231+
buf := make([]byte, n)
232+
if _, err := io.ReadFull(r, buf); err != nil {
233+
return nil, fmt.Errorf("read body: %w", err)
234+
}
235+
236+
return buf, nil
237+
}

pickle/fragments.go

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
package pickle
2+
3+
// Fragment is a pickle byte slice that pushes exactly one value onto
4+
// the unpickler stack. Primitives, composites and calls all return
5+
// Fragments, so they nest. Dump wraps a top-level Fragment with PROTO
6+
// + STOP into a complete stream.
7+
type Fragment []byte
8+
9+
// Dump wraps top with PROTO + STOP into a complete pickle stream.
10+
func Dump(top Fragment) []byte {
11+
out := make([]byte, 0, 2+len(top)+1)
12+
out = append(out, OpProto, Protocol)
13+
out = append(out, top...)
14+
out = append(out, OpStop)
15+
16+
return out
17+
}
18+
19+
// concatFragments concatenates parts into a single pre-sized Fragment.
20+
func concatFragments(parts ...Fragment) Fragment {
21+
total := 0
22+
for _, p := range parts {
23+
total += len(p)
24+
}
25+
out := make(Fragment, 0, total)
26+
for _, p := range parts {
27+
out = append(out, p...)
28+
}
29+
30+
return out
31+
}
32+
33+
// Str pushes the Python string s onto the stack. Returns ok=false if s
34+
// exceeds the 4-byte length cap baked into BINUNICODE.
35+
func Str(s string) (Fragment, bool) {
36+
out, ok := BinUnicode(s)
37+
if !ok {
38+
return nil, false
39+
}
40+
41+
return Fragment(out), true
42+
}
43+
44+
// Int pushes a Python int via the smallest BININTx opcode that fits.
45+
// Wider values (LONG1 / LONG4) are not implemented; use the int32
46+
// signature so callers convert at the call site rather than at runtime.
47+
func Int(n int32) Fragment {
48+
switch {
49+
case n >= 0 && n <= 0xff:
50+
return Fragment(BinInt1(uint8(n)))
51+
case n >= 0 && n <= 0xffff:
52+
return Fragment(BinInt2(uint16(n)))
53+
default:
54+
return Fragment(BinInt(n))
55+
}
56+
}
57+
58+
// Bool pushes True or False onto the stack.
59+
func Bool(b bool) Fragment {
60+
if b {
61+
return Fragment{OpNewTrue}
62+
}
63+
64+
return Fragment{OpNewFalse}
65+
}
66+
67+
// None pushes Python None.
68+
func None() Fragment { return Fragment{OpNone} }
69+
70+
// tupleSmallClosers indexes the protocol-2 fast-path opcodes by tuple
71+
// arity: EMPTY_TUPLE for 0, TUPLE1..TUPLE3 for 1..3.
72+
var tupleSmallClosers = [...]byte{OpEmptyTuple, OpTuple1, OpTuple2, OpTuple3}
73+
74+
// TupleOf pushes a Python tuple. Arities 0..3 use the dedicated
75+
// EMPTY_TUPLE / TUPLE1..3 fast-path opcodes; larger tuples fall back
76+
// to MARK + items + TUPLE.
77+
func TupleOf(elems ...Fragment) Fragment {
78+
if n := len(elems); n < len(tupleSmallClosers) {
79+
parts := make([]Fragment, 0, n+1)
80+
parts = append(parts, elems...)
81+
parts = append(parts, Fragment{tupleSmallClosers[n]})
82+
83+
return concatFragments(parts...)
84+
}
85+
parts := make([]Fragment, 0, 2+len(elems))
86+
parts = append(parts, Fragment{OpMark})
87+
parts = append(parts, elems...)
88+
parts = append(parts, Fragment{OpTuple})
89+
90+
return concatFragments(parts...)
91+
}
92+
93+
// collection emits an EMPTY_<X> opener and, if items is non-empty,
94+
// wraps the items in MARK + items + closeOp. Shared body of ListOf
95+
// and DictOf, since the only thing that differs between them is the
96+
// pair of opcodes and how items are flattened from the input shape.
97+
func collection(emptyOp, closeOp byte, items []Fragment) Fragment {
98+
if len(items) == 0 {
99+
return Fragment{emptyOp}
100+
}
101+
parts := make([]Fragment, 0, 3+len(items))
102+
parts = append(parts, Fragment{emptyOp}, Fragment{OpMark})
103+
parts = append(parts, items...)
104+
parts = append(parts, Fragment{closeOp})
105+
106+
return concatFragments(parts...)
107+
}
108+
109+
// ListOf pushes a Python list. Wire form: EMPTY_LIST [+ MARK + items
110+
// + APPENDS].
111+
func ListOf(elems ...Fragment) Fragment {
112+
return collection(OpEmptyList, OpAppends, elems)
113+
}
114+
115+
// DictEntry is a key/value pair for DictOf. Both sides are Fragments
116+
// so they can themselves be calls or other composites.
117+
type DictEntry struct {
118+
Key Fragment
119+
Value Fragment
120+
}
121+
122+
// DictOf pushes a Python dict. Wire form: EMPTY_DICT [+ MARK + pairs
123+
// + SETITEMS].
124+
func DictOf(entries ...DictEntry) Fragment {
125+
items := make([]Fragment, 0, 2*len(entries))
126+
for _, e := range entries {
127+
items = append(items, e.Key, e.Value)
128+
}
129+
130+
return collection(OpEmptyDict, OpSetItems, items)
131+
}
132+
133+
// Call pushes module.attr(*args) using the legacy GLOBAL opcode.
134+
func Call(module, attr string, args ...Fragment) Fragment {
135+
return CallFragment(Fragment(Global(module, attr)), args...)
136+
}
137+
138+
// CallFragment pushes callable(*args) for a callable already on the
139+
// stack (from a previous Call, Method, attribute access, etc.).
140+
func CallFragment(callable Fragment, args ...Fragment) Fragment {
141+
return concatFragments(callable, TupleOf(args...), Fragment{OpReduce})
142+
}
143+
144+
// Method pushes obj.name(*args) via getattr(obj, name)(*args); pickle
145+
// has no dedicated method-call opcode. Returns ok=false if name cannot
146+
// be encoded as a BINUNICODE string (delegated to Str).
147+
func Method(obj Fragment, name string, args ...Fragment) (Fragment, bool) {
148+
nameFrag, ok := Str(name)
149+
if !ok {
150+
return nil, false
151+
}
152+
153+
return CallFragment(Call("builtins", "getattr", obj, nameFrag), args...), true
154+
}
155+
156+
// Build pushes obj with state applied via __setstate__. Wire form:
157+
// obj + state + BUILD.
158+
func Build(obj Fragment, state Fragment) Fragment {
159+
return concatFragments(obj, state, Fragment{OpBuild})
160+
}

0 commit comments

Comments
 (0)