Skip to content
This repository was archived by the owner on May 7, 2026. It is now read-only.

Commit 2c880d4

Browse files
committed
Fix handling of false-positive Unicode escapes in rxing-cli raw output
1 parent f427036 commit 2c880d4

2 files changed

Lines changed: 14 additions & 17 deletions

File tree

test/test_all.py

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -160,14 +160,14 @@ def test_rxing_parsing():
160160
stdout = (r"""
161161
[Barcode Format] qrcode
162162
[Points] [PointT { x: -0.123, y: 456 }, PointT { x: 1.5, y: 1.5 }, PointT { x: 2.5, y: -2.5 }, PointT { x: -0, y: 0.0 }]
163-
[Data] \u{a1}Atenci\u{f3}n
163+
[Data] \u{a1}Atenci\u{f3}n, \\u{f00} is not a Unicode escape
164164
""")
165165
dec = zxing.BarCode.parse_rxing(stdout, '/tmp/test.png')
166166
assert dec.uri == 'file:///tmp/test.png'
167167
assert dec.path == '/tmp/test.png'
168168
assert dec.format == 'QR_CODE'
169169
assert dec.type == 'TEXT'
170-
assert dec.raw == dec.parsed == '\u00a1Atenci\u00f3n'
170+
assert dec.raw == dec.parsed == '\u00a1Atenci\u00f3n, \\u{f00} is not a Unicode escape'
171171
assert dec.raw_bits is None
172172
assert dec.points == [(-0.123, 456.0), (1.5, 1.5), (2.5, -2.5), (0.0, 0.0)]
173173
r = repr(dec)
@@ -190,15 +190,6 @@ def test_rxing_parsing_not_found():
190190
assert r.startswith('BarCode(') and r.endswith(')')
191191

192192

193-
def test_rxing_parsing_WRONG():
194-
# FIXME: This is wrong, should not be parsed as a Unicode escape
195-
stdout = (r"""
196-
[Data] 00\\u{ff}11
197-
""")
198-
dec = zxing.BarCode.parse_rxing(stdout, '/tmp/test.png')
199-
assert dec.raw == dec.parsed == r'00\u{ff}11'
200-
201-
202193
def test_wrong_formats():
203194
all_test_formats = {fmt for fn, fmt, raw in test_barcodes}
204195
yield from ((_check_decoding, filename, expected_format, None, dict(possible_formats=all_test_formats - {expected_format}))

zxing/__init__.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -235,8 +235,17 @@ class CLROutputBlock(Enum):
235235
RAW_BITS = 4
236236

237237

238+
RUST_UNICODE_ESCAPE = re.compile(r'(\\+)u\{([a-f0-9]+)\}')
239+
def _unrustify_repr(rust_repr):
240+
# Rust's repr of a Unicode string can include backslash escapes, which are mostly
241+
# compatible with Python's. Unlike Python, however, its Unicode character escapes
242+
# are not always 4 hex digits, and have braces (e.g. '\u{123}')
243+
# To handle the case of an escaped '\\' followed by 'u{...}', we ignore even
244+
# numbers of backslashes.
245+
unrust = RUST_UNICODE_ESCAPE.sub(lambda m: r'\u' + m.group(2).rjust(4, '0') if len(m.group(1)) % 2 else m.group(0), rust_repr)
246+
return unrust.encode().decode('unicode_escape')
247+
238248
class BarCode(object):
239-
RUST_UNICODE_ESCAPE = re.compile(r'\\u\{([a-f0-9]+)\}')
240249
POINTS = re.compile(r'PointT\s*\{\s*x\:\s*([-\d.]+),\s*y\:\s*([-\d.]+)\s*\}')
241250

242251
RXING_FORMAT_TO_ZXING = {'QRCODE': 'QR_CODE'}
@@ -260,11 +269,8 @@ def parse_rxing(cls, rxing_output, fn):
260269
format = l.removeprefix('[Barcode Format] ').replace(' ', '_').upper()
261270
format = cls.RXING_FORMAT_TO_ZXING.get(format, format)
262271
elif l.startswith('[Data] '):
263-
# This is a Rust repr of a string, include backslash escapes. Unlike Python, its
264-
# unicode character escapes are not always 4 digits, and have braces (e.g. '\u{123}')
265-
# FIXME: This incorrectly handles the case of an escaped '\\' followed by 'u{...}'
266-
raw = l.removeprefix('[Data] ')
267-
raw = cls.RUST_UNICODE_ESCAPE.sub(lambda m: r'\u' + m.group(1).rjust(4, '0'), raw).encode().decode('unicode_escape')
272+
# This is a Rust repr of a string, which is almost-but-not-quite handled by bytes.decode('unicode_escape')
273+
raw = _unrustify_repr(l.removeprefix('[Data] '))
268274
elif l.startswith('[Points] '):
269275
points = [((float(m[0]), float(m[1]))) for m in cls.POINTS.findall(l.removeprefix('[Points] '))]
270276

0 commit comments

Comments
 (0)