Skip to content

Commit cc5b1ba

Browse files
committed
Add fuzzer for codecs module
1 parent 2ca91df commit cc5b1ba

3 files changed

Lines changed: 140 additions & 1 deletion

File tree

Makefile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
all: fuzzer-binascii fuzzer-html fuzzer-email fuzzer-httpclient fuzzer-json fuzzer-difflib fuzzer-csv fuzzer-decode fuzzer-ast fuzzer-tarfile fuzzer-tarfile-hypothesis fuzzer-zipfile fuzzer-zipfile-hypothesis fuzzer-re fuzzer-configparser fuzzer-tomllib fuzzer-plistlib fuzzer-xml fuzzer-zoneinfo
1+
all: fuzzer-binascii fuzzer-html fuzzer-email fuzzer-httpclient fuzzer-json fuzzer-difflib fuzzer-csv fuzzer-decode fuzzer-ast fuzzer-tarfile fuzzer-tarfile-hypothesis fuzzer-zipfile fuzzer-zipfile-hypothesis fuzzer-re fuzzer-configparser fuzzer-tomllib fuzzer-plistlib fuzzer-xml fuzzer-zoneinfo fuzzer-codecs
22

33
PYTHON_CONFIG_PATH=$(CPYTHON_INSTALL_PATH)/bin/python3-config
44
CXXFLAGS += $(shell $(PYTHON_CONFIG_PATH) --cflags)
@@ -42,3 +42,6 @@ fuzzer-xml:
4242
clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"xml.py\"" -ldl $(LDFLAGS) -o fuzzer-xml
4343
fuzzer-zoneinfo:
4444
clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"zoneinfo.py\"" -ldl $(LDFLAGS) -o fuzzer-zoneinfo
45+
46+
fuzzer-codecs:
47+
clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"codecs.py\"" -ldl $(LDFLAGS) -o fuzzer-codecs

codecs.py

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
from fuzzeddataprovider import FuzzedDataProvider
2+
import codecs
3+
import io
4+
5+
DECODERS = [
6+
"utf-7",
7+
"shift_jis",
8+
"euc-jp",
9+
"gb2312",
10+
"big5",
11+
"iso-2022-jp",
12+
"euc-kr",
13+
"gb18030",
14+
"big5hkscs",
15+
"charmap",
16+
"ascii",
17+
"latin-1",
18+
"cp1252",
19+
"unicode_escape",
20+
"raw_unicode_escape",
21+
"utf-16",
22+
"utf-32",
23+
]
24+
25+
ENCODERS = [
26+
"shift_jis",
27+
"euc-jp",
28+
"gb2312",
29+
"big5",
30+
"iso-2022-jp",
31+
"euc-kr",
32+
"gb18030",
33+
"big5hkscs",
34+
"unicode_escape",
35+
"raw_unicode_escape",
36+
"utf-7",
37+
"utf-8",
38+
"utf-16",
39+
"utf-16-le",
40+
"utf-16-be",
41+
"utf-32",
42+
"latin-1",
43+
"ascii",
44+
"charmap",
45+
]
46+
47+
INC_DEC_CODECS = ["shift_jis", "gb18030", "utf-16"]
48+
INC_ENC_CODECS = ["shift_jis", "utf-8"]
49+
50+
OP_DECODE = 0
51+
OP_ENCODE = 1
52+
OP_INCREMENTAL_DECODE = 2
53+
OP_INCREMENTAL_ENCODE = 3
54+
OP_STREAM_READ = 4
55+
56+
57+
def op_decode(fdp):
58+
codec = fdp.PickValueInList(DECODERS)
59+
data = fdp.ConsumeBytes(fdp.remaining_bytes())
60+
codecs.decode(data, codec, "replace")
61+
62+
63+
def op_encode(fdp):
64+
codec = fdp.PickValueInList(ENCODERS)
65+
n = (
66+
fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 10000))
67+
if fdp.remaining_bytes() > 0
68+
else 0
69+
)
70+
if n == 0:
71+
return
72+
s = fdp.ConsumeUnicode(n)
73+
codecs.encode(s, codec, "replace")
74+
75+
76+
def op_incremental_decode(fdp):
77+
codec = fdp.PickValueInList(INC_DEC_CODECS)
78+
chunk1_size = fdp.ConsumeIntInRange(0, 10000)
79+
chunk1 = fdp.ConsumeBytes(chunk1_size)
80+
chunk2 = fdp.ConsumeBytes(fdp.remaining_bytes())
81+
decoder = codecs.getincrementaldecoder(codec)("replace")
82+
decoder.decode(chunk1)
83+
decoder.decode(chunk2, True)
84+
decoder.getstate()
85+
decoder.reset()
86+
87+
88+
def op_incremental_encode(fdp):
89+
codec = fdp.PickValueInList(INC_ENC_CODECS)
90+
n = (
91+
fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 10000))
92+
if fdp.remaining_bytes() > 0
93+
else 0
94+
)
95+
if n == 0:
96+
return
97+
s = fdp.ConsumeUnicode(n)
98+
split = fdp.ConsumeIntInRange(0, len(s))
99+
encoder = codecs.getincrementalencoder(codec)("replace")
100+
encoder.encode(s[:split])
101+
encoder.reset()
102+
encoder.encode(s[split:])
103+
encoder.getstate()
104+
105+
106+
def op_stream(fdp):
107+
data = fdp.ConsumeBytes(fdp.remaining_bytes())
108+
bio = io.BytesIO(data)
109+
reader = codecs.getreader("utf-8")(bio, "replace")
110+
reader.read()
111+
112+
113+
# Fuzzes CPython's codec infrastructure (Modules/cjkcodecs/, Python/codecs.c).
114+
# Exercises full and incremental encode/decode for CJK codecs (Shift-JIS,
115+
# EUC-JP, GB2312, Big5, ISO-2022-JP, EUC-KR, GB18030, Big5-HKSCS) and
116+
# Western/Unicode codecs (UTF-7/16/32, charmap, unicode_escape, latin-1).
117+
# Also tests stream-based reading via codecs.getreader().
118+
def FuzzerRunOne(FuzzerInput):
119+
if len(FuzzerInput) < 1 or len(FuzzerInput) > 0x100000:
120+
return
121+
fdp = FuzzedDataProvider(FuzzerInput)
122+
op = fdp.ConsumeIntInRange(OP_DECODE, OP_STREAM_READ)
123+
try:
124+
if op == OP_DECODE:
125+
op_decode(fdp)
126+
elif op == OP_ENCODE:
127+
op_encode(fdp)
128+
elif op == OP_INCREMENTAL_DECODE:
129+
op_incremental_decode(fdp)
130+
elif op == OP_INCREMENTAL_ENCODE:
131+
op_incremental_encode(fdp)
132+
else:
133+
op_stream(fdp)
134+
except Exception:
135+
pass

fuzz_targets.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
ast ast.py
22
binascii binascii.py
3+
codecs codecs.py
34
configparser configparser.py
45
csv csv.py
56
decode decode.py

0 commit comments

Comments
 (0)