Skip to content

Commit 07ec4c3

Browse files
committed
Update _NameSanitizer to try appending the current internal_index before generating an entirely new name. Fix a bug where a generated name could collide with an identifier name. Add tests.
1 parent 3c12083 commit 07ec4c3

3 files changed

Lines changed: 108 additions & 29 deletions

File tree

pyrtl/core.py

Lines changed: 75 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1193,13 +1193,39 @@ def next_index(self):
11931193

11941194

11951195
class _NameSanitizer(_NameIndexer):
1196-
"""Sanitizes the names so that names can be used in places that don't allow for
1197-
arbitrary names while not mangling valid names.
1196+
"""Sanitizes names so they can be used in contexts that don't allow arbitrary names.
11981197
1199-
Put the values you want to validate into make_valid_string the first time you want
1200-
to sanitize a particular string (or before the first time), and retrieve from the
1201-
_NameSanitizer through indexing directly thereafter eg: sani["__&sfhs"] for
1202-
retrieval after the first time
1198+
For example, ``a.b`` is a valid ``WireVector`` name, but ``a.b`` is not a valid name
1199+
for a Python variable. If we want to generate Python code for ``a.b`` (like
1200+
``FastSimulation``), the name must be sanitized.
1201+
1202+
Sanitization first attempts to replace non-word characters (anything that's not
1203+
alphanumeric or an underscore) with an underscore. If that didn't work, we try
1204+
appending a unique integer value. If that still doesn't work, we generate an
1205+
entirely new name consisting of ``internal_prefix`` followed by a unique integer
1206+
value.
1207+
1208+
``make_valid_string`` must be called once to generate the sanitized version of a
1209+
name.
1210+
1211+
.. doctest only::
1212+
1213+
>>> import pyrtl
1214+
1215+
After ``make_valid_string`` has been called, the sanitized name can be retrieved
1216+
with ``__getitem__`` any number of times. For example::
1217+
1218+
>>> sanitizer = pyrtl.core._NameSanitizer(pyrtl.core._py_regex)
1219+
1220+
>>> sanitizer.make_valid_string("foo.bar")
1221+
'foo_bar'
1222+
>>> sanitizer["foo.bar"]
1223+
'foo_bar'
1224+
>>> sanitizer["foo.bar"]
1225+
'foo_bar'
1226+
1227+
>>> sanitizer.make_valid_string("foo_bar")
1228+
'foo_bar0'
12031229
"""
12041230

12051231
def __init__(
@@ -1211,43 +1237,64 @@ def __init__(
12111237
if identifier_regex_str[-1] != "$":
12121238
identifier_regex_str += "$"
12131239
self.identifier = re.compile(identifier_regex_str)
1240+
# Map from un-sanitized name to sanitized name.
12141241
self.val_map = {}
1242+
# Set of all generated sanitized names.
1243+
self.sanitized_names = set()
12151244
self.extra_checks = extra_checks
12161245
super().__init__(internal_prefix)
12171246

1218-
def __getitem__(self, item):
1219-
"""Get a value from the sanitizer"""
1220-
return self.val_map[item]
1247+
def __getitem__(self, name: str) -> str:
1248+
"""Return the sanitized name for an un-sanitized name that was generated by
1249+
``make_valid_string``.
1250+
"""
1251+
return self.val_map[name]
12211252

1222-
def is_valid_str(self, string):
1253+
def is_valid_str(self, string: str) -> bool:
1254+
"""Return ``True`` iff ``string`` matches ``identifier_regex_str`` and satisfies
1255+
``extra_checks``.
1256+
"""
12231257
return self.identifier.match(string) and self.extra_checks(string)
12241258

1225-
def make_valid_string(self, string=""):
1226-
"""Inputting a value for the first time."""
1227-
if self.is_valid_str(string):
1228-
self.val_map[string] = string
1229-
return string
1230-
1259+
def make_valid_string(self, string: str = "") -> str:
1260+
"""Generate a sanitized name from an un-sanitized name."""
12311261
if string in self.val_map:
12321262
msg = f"Value {string} has already been given to the sanitizer"
12331263
raise IndexError(msg)
1234-
# Try replacing non-word characters with ``_``.
1235-
internal_name = re.sub(r"\W", "_", string)
1236-
1237-
if not self.is_valid_str(internal_name) or internal_name in self.val_map:
1238-
# If that didn't work, try prepending ``_``.
1239-
internal_name = f"_{internal_name}"
1240-
while not self.is_valid_str(internal_name) or internal_name in self.val_map:
1241-
# If that didn't work, generate names starting with ``internal_prefix``
1242-
# until we find something acceptable.
1243-
internal_name = super().make_valid_string()
1264+
1265+
def is_usable(name: str) -> bool:
1266+
"""Return ``True`` iff ``name`` can be used as a sanitized name.
1267+
1268+
A sanitized name is usable if it ``is_valid_str``, and isn't already in use.
1269+
"""
1270+
return self.is_valid_str(name) and name not in self.sanitized_names
1271+
1272+
internal_name = string
1273+
if not is_usable(internal_name):
1274+
# Try replacing non-word characters with ``_``.
1275+
internal_name = re.sub(r"\W", "_", string)
1276+
1277+
if not is_usable(internal_name):
1278+
# If that didn't work, try appending the next ``internal_index``.
1279+
internal_name = f"{internal_name}{self.next_index()}"
1280+
1281+
if not is_usable(internal_name):
1282+
# If that didn't work, generate an entirely new name starting with
1283+
# ``internal_prefix``.
1284+
internal_name = super().make_valid_string()
1285+
1286+
if not is_usable(internal_name):
1287+
msg = f"Could not generate a usable sanitized name for {string}"
1288+
raise PyrtlError(msg)
1289+
12441290
self.val_map[string] = internal_name
1291+
self.sanitized_names.add(internal_name)
12451292
return internal_name
12461293

12471294

12481295
class _PythonSanitizer(_NameSanitizer):
12491296
"""Name Sanitizer specifically built for Python identifers."""
12501297

1251-
def __init__(self, internal_prefix="_sani_temp", map_valid_vals=True):
1252-
super().__init__(_py_regex, internal_prefix, map_valid_vals)
1298+
def __init__(self, internal_prefix="_sani_temp"):
1299+
super().__init__(_py_regex, internal_prefix)
12531300
self.extra_checks = lambda s: not keyword.iskeyword(s)

pyrtl/importexport.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -755,7 +755,7 @@ class _VerilogSanitizer(_NameSanitizer):
755755
unsigned use vectored wait wand weak0 weak1 while wire wor xnor xor
756756
"""
757757

758-
def __init__(self, internal_prefix="_sani_temp"):
758+
def __init__(self, internal_prefix):
759759
self._verilog_reserved_set = frozenset(self._verilog_reserved.split())
760760
super().__init__(self._ver_regex, internal_prefix, self._extra_checks)
761761

tests/test_core.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -785,5 +785,37 @@ def test_wire_not_in_net_connections(self):
785785
dst_nets[w]
786786

787787

788+
class TestNameSanitizer(unittest.TestCase):
789+
def test_name_collision(self):
790+
# Test a sanitized name that collides with an unsanitized name.
791+
sanitizer = pyrtl.core._NameSanitizer(
792+
identifier_regex_str=pyrtl.core._py_regex,
793+
internal_prefix="sanitized",
794+
)
795+
# This name should be sanitized by replacing the dot with an underscore.
796+
sanitized_dot = sanitizer.make_valid_string("foo.bar")
797+
self.assertEqual(sanitized_dot, "foo_bar")
798+
self.assertEqual(sanitizer["foo.bar"], "foo_bar")
799+
800+
# This name collides with the sanitized name we just generated, so the first
801+
# internal_index should be appended.
802+
sanitized_underscore = sanitizer.make_valid_string("foo_bar")
803+
self.assertEqual(sanitized_underscore, "foo_bar0")
804+
self.assertEqual(sanitizer["foo_bar"], "foo_bar0")
805+
806+
# This name does not require sanitization, but it will collide with the next
807+
# foo.bar variant.
808+
sanitized_one = sanitizer.make_valid_string("foo_bar1")
809+
self.assertEqual(sanitized_one, "foo_bar1")
810+
self.assertEqual(sanitizer["foo_bar1"], "foo_bar1")
811+
812+
# Attempting to sanitize foo!bar by appending internal_index collides with the
813+
# name we just registered. _NameSanitizer should give up and use
814+
# internal_prefix.
815+
sanitized_bang = sanitizer.make_valid_string("foo!bar")
816+
self.assertEqual(sanitized_bang, "sanitized2")
817+
self.assertEqual(sanitizer["foo!bar"], "sanitized2")
818+
819+
788820
if __name__ == "__main__":
789821
unittest.main()

0 commit comments

Comments
 (0)