|
| 1 | +from fuzzeddataprovider import FuzzedDataProvider |
| 2 | +import codecs |
| 3 | +import io |
| 4 | + |
| 5 | +DECODERS = [ |
| 6 | + "utf-7", |
| 7 | + "shift_jis", |
| 8 | + "euc-jp", |
| 9 | + "gb2312", |
| 10 | + "big5", |
| 11 | + "iso-2022-jp", |
| 12 | + "euc-kr", |
| 13 | + "gb18030", |
| 14 | + "big5hkscs", |
| 15 | + "charmap", |
| 16 | + "ascii", |
| 17 | + "latin-1", |
| 18 | + "cp1252", |
| 19 | + "unicode_escape", |
| 20 | + "raw_unicode_escape", |
| 21 | + "utf-16", |
| 22 | + "utf-32", |
| 23 | +] |
| 24 | + |
| 25 | +ENCODERS = [ |
| 26 | + "shift_jis", |
| 27 | + "euc-jp", |
| 28 | + "gb2312", |
| 29 | + "big5", |
| 30 | + "iso-2022-jp", |
| 31 | + "euc-kr", |
| 32 | + "gb18030", |
| 33 | + "big5hkscs", |
| 34 | + "unicode_escape", |
| 35 | + "raw_unicode_escape", |
| 36 | + "utf-7", |
| 37 | + "utf-8", |
| 38 | + "utf-16", |
| 39 | + "utf-16-le", |
| 40 | + "utf-16-be", |
| 41 | + "utf-32", |
| 42 | + "latin-1", |
| 43 | + "ascii", |
| 44 | + "charmap", |
| 45 | +] |
| 46 | + |
| 47 | +INC_DEC_CODECS = ["shift_jis", "gb18030", "utf-16"] |
| 48 | +INC_ENC_CODECS = ["shift_jis", "utf-8"] |
| 49 | + |
| 50 | +OP_DECODE = 0 |
| 51 | +OP_ENCODE = 1 |
| 52 | +OP_INCREMENTAL_DECODE = 2 |
| 53 | +OP_INCREMENTAL_ENCODE = 3 |
| 54 | +OP_STREAM_READ = 4 |
| 55 | + |
| 56 | + |
| 57 | +def op_decode(fdp): |
| 58 | + codec = fdp.PickValueInList(DECODERS) |
| 59 | + data = fdp.ConsumeBytes(fdp.remaining_bytes()) |
| 60 | + codecs.decode(data, codec, "replace") |
| 61 | + |
| 62 | + |
| 63 | +def op_encode(fdp): |
| 64 | + codec = fdp.PickValueInList(ENCODERS) |
| 65 | + n = ( |
| 66 | + fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 10000)) |
| 67 | + if fdp.remaining_bytes() > 0 |
| 68 | + else 0 |
| 69 | + ) |
| 70 | + if n == 0: |
| 71 | + return |
| 72 | + s = fdp.ConsumeUnicode(n) |
| 73 | + codecs.encode(s, codec, "replace") |
| 74 | + |
| 75 | + |
| 76 | +def op_incremental_decode(fdp): |
| 77 | + codec = fdp.PickValueInList(INC_DEC_CODECS) |
| 78 | + chunk1_size = fdp.ConsumeIntInRange(0, 10000) |
| 79 | + chunk1 = fdp.ConsumeBytes(chunk1_size) |
| 80 | + chunk2 = fdp.ConsumeBytes(fdp.remaining_bytes()) |
| 81 | + decoder = codecs.getincrementaldecoder(codec)("replace") |
| 82 | + decoder.decode(chunk1) |
| 83 | + decoder.decode(chunk2, True) |
| 84 | + decoder.getstate() |
| 85 | + decoder.reset() |
| 86 | + |
| 87 | + |
| 88 | +def op_incremental_encode(fdp): |
| 89 | + codec = fdp.PickValueInList(INC_ENC_CODECS) |
| 90 | + n = ( |
| 91 | + fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 10000)) |
| 92 | + if fdp.remaining_bytes() > 0 |
| 93 | + else 0 |
| 94 | + ) |
| 95 | + if n == 0: |
| 96 | + return |
| 97 | + s = fdp.ConsumeUnicode(n) |
| 98 | + split = fdp.ConsumeIntInRange(0, len(s)) |
| 99 | + encoder = codecs.getincrementalencoder(codec)("replace") |
| 100 | + encoder.encode(s[:split]) |
| 101 | + encoder.reset() |
| 102 | + encoder.encode(s[split:]) |
| 103 | + encoder.getstate() |
| 104 | + |
| 105 | + |
| 106 | +def op_stream(fdp): |
| 107 | + data = fdp.ConsumeBytes(fdp.remaining_bytes()) |
| 108 | + bio = io.BytesIO(data) |
| 109 | + reader = codecs.getreader("utf-8")(bio, "replace") |
| 110 | + reader.read() |
| 111 | + |
| 112 | + |
| 113 | +# Fuzzes CPython's codec infrastructure (Modules/cjkcodecs/, Python/codecs.c). |
| 114 | +# Exercises full and incremental encode/decode for CJK codecs (Shift-JIS, |
| 115 | +# EUC-JP, GB2312, Big5, ISO-2022-JP, EUC-KR, GB18030, Big5-HKSCS) and |
| 116 | +# Western/Unicode codecs (UTF-7/16/32, charmap, unicode_escape, latin-1). |
| 117 | +# Also tests stream-based reading via codecs.getreader(). |
| 118 | +def FuzzerRunOne(FuzzerInput): |
| 119 | + if len(FuzzerInput) < 1 or len(FuzzerInput) > 0x100000: |
| 120 | + return |
| 121 | + fdp = FuzzedDataProvider(FuzzerInput) |
| 122 | + op = fdp.ConsumeIntInRange(OP_DECODE, OP_STREAM_READ) |
| 123 | + try: |
| 124 | + if op == OP_DECODE: |
| 125 | + op_decode(fdp) |
| 126 | + elif op == OP_ENCODE: |
| 127 | + op_encode(fdp) |
| 128 | + elif op == OP_INCREMENTAL_DECODE: |
| 129 | + op_incremental_decode(fdp) |
| 130 | + elif op == OP_INCREMENTAL_ENCODE: |
| 131 | + op_incremental_encode(fdp) |
| 132 | + else: |
| 133 | + op_stream(fdp) |
| 134 | + except Exception: |
| 135 | + pass |
0 commit comments