@@ -1193,13 +1193,39 @@ def next_index(self):
11931193
11941194
11951195class _NameSanitizer (_NameIndexer ):
1196- """Sanitizes the names so that names can be used in places that don't allow for
1197- arbitrary names while not mangling valid names.
1196+ """Sanitizes names so they can be used in contexts that don't allow arbitrary names.
11981197
1199- Put the values you want to validate into make_valid_string the first time you want
1200- to sanitize a particular string (or before the first time), and retrieve from the
1201- _NameSanitizer through indexing directly thereafter eg: sani["__&sfhs"] for
1202- retrieval after the first time
1198+ For example, ``a.b`` is a valid ``WireVector`` name, but ``a.b`` is not a valid name
1199+ for a Python variable. If we want to generate Python code for ``a.b`` (like
1200+ ``FastSimulation``), the name must be sanitized.
1201+
1202+ Sanitization first attempts to replace non-word characters (anything that's not
1203+ alphanumeric or an underscore) with an underscore. If that didn't work, we try
1204+ appending a unique integer value. If that still doesn't work, we generate an
1205+ entirely new name consisting of ``internal_prefix`` followed by a unique integer
1206+ value.
1207+
1208+ ``make_valid_string`` must be called once to generate the sanitized version of a
1209+ name.
1210+
1211+ .. doctest only::
1212+
1213+ >>> import pyrtl
1214+
1215+ After ``make_valid_string`` has been called, the sanitized name can be retrieved
1216+ with ``__getitem__`` any number of times. For example::
1217+
1218+ >>> sanitizer = pyrtl.core._NameSanitizer(pyrtl.core._py_regex)
1219+
1220+ >>> sanitizer.make_valid_string("foo.bar")
1221+ 'foo_bar'
1222+ >>> sanitizer["foo.bar"]
1223+ 'foo_bar'
1224+ >>> sanitizer["foo.bar"]
1225+ 'foo_bar'
1226+
1227+ >>> sanitizer.make_valid_string("foo_bar")
1228+ 'foo_bar0'
12031229 """
12041230
12051231 def __init__ (
@@ -1211,43 +1237,64 @@ def __init__(
12111237 if identifier_regex_str [- 1 ] != "$" :
12121238 identifier_regex_str += "$"
12131239 self .identifier = re .compile (identifier_regex_str )
1240+ # Map from un-sanitized name to sanitized name.
12141241 self .val_map = {}
1242+ # Set of all generated sanitized names.
1243+ self .sanitized_names = set ()
12151244 self .extra_checks = extra_checks
12161245 super ().__init__ (internal_prefix )
12171246
1218- def __getitem__ (self , item ):
1219- """Get a value from the sanitizer"""
1220- return self .val_map [item ]
1247+ def __getitem__ (self , name : str ) -> str :
1248+ """Return the sanitized name for an un-sanitized name that was generated by
1249+ ``make_valid_string``.
1250+ """
1251+ return self .val_map [name ]
12211252
1222- def is_valid_str (self , string ):
1253+ def is_valid_str (self , string : str ) -> bool :
1254+ """Return ``True`` iff ``string`` matches ``identifier_regex_str`` and satisfies
1255+ ``extra_checks``.
1256+ """
12231257 return self .identifier .match (string ) and self .extra_checks (string )
12241258
1225- def make_valid_string (self , string = "" ):
1226- """Inputting a value for the first time."""
1227- if self .is_valid_str (string ):
1228- self .val_map [string ] = string
1229- return string
1230-
1259+ def make_valid_string (self , string : str = "" ) -> str :
1260+ """Generate a sanitized name from an un-sanitized name."""
12311261 if string in self .val_map :
12321262 msg = f"Value { string } has already been given to the sanitizer"
12331263 raise IndexError (msg )
1234- # Try replacing non-word characters with ``_``.
1235- internal_name = re .sub (r"\W" , "_" , string )
1236-
1237- if not self .is_valid_str (internal_name ) or internal_name in self .val_map :
1238- # If that didn't work, try prepending ``_``.
1239- internal_name = f"_{ internal_name } "
1240- while not self .is_valid_str (internal_name ) or internal_name in self .val_map :
1241- # If that didn't work, generate names starting with ``internal_prefix``
1242- # until we find something acceptable.
1243- internal_name = super ().make_valid_string ()
1264+
1265+ def is_usable (name : str ) -> bool :
1266+ """Return ``True`` iff ``name`` can be used as a sanitized name.
1267+
1268+ A sanitized name is usable if it ``is_valid_str``, and isn't already in use.
1269+ """
1270+ return self .is_valid_str (name ) and name not in self .sanitized_names
1271+
1272+ internal_name = string
1273+ if not is_usable (internal_name ):
1274+ # Try replacing non-word characters with ``_``.
1275+ internal_name = re .sub (r"\W" , "_" , string )
1276+
1277+ if not is_usable (internal_name ):
1278+ # If that didn't work, try appending the next ``internal_index``.
1279+ internal_name = f"{ internal_name } { self .next_index ()} "
1280+
1281+ if not is_usable (internal_name ):
1282+ # If that didn't work, generate an entirely new name starting with
1283+ # ``internal_prefix``.
1284+ internal_name = super ().make_valid_string ()
1285+
1286+ if not is_usable (internal_name ):
1287+ msg = f"Could not generate a usable sanitized name for { string } "
1288+ raise PyrtlError (msg )
1289+
12441290 self .val_map [string ] = internal_name
1291+ self .sanitized_names .add (internal_name )
12451292 return internal_name
12461293
12471294
12481295class _PythonSanitizer (_NameSanitizer ):
12491296 """Name Sanitizer specifically built for Python identifers."""
12501297
1251- def __init__ (self , internal_prefix = "_sani_temp" , map_valid_vals = True ):
1252- super ().__init__ (_py_regex , internal_prefix , map_valid_vals )
1298+ def __init__ (self , internal_prefix = "_sani_temp" ):
1299+ super ().__init__ (_py_regex , internal_prefix )
12531300 self .extra_checks = lambda s : not keyword .iskeyword (s )
0 commit comments