Skip to content

Commit c821399

Browse files
fix: address PR review feedback (regex optimization, typos, duplicate sigs)
1 parent 26f4111 commit c821399

4 files changed

Lines changed: 13 additions & 26 deletions

File tree

capa/features/extractors/ts/engine.py

Lines changed: 9 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,11 @@
2929
)
3030
from capa.features.extractors.ts.tools import LANGUAGE_TOOLKITS, BaseNamespace, CSharpNamespace, LanguageToolkit
3131

32+
_RE_CSHARP_PAGE = re.compile(r'@ .*Page Language\s*=\s*"C#".*'.encode(), re.IGNORECASE)
33+
_RE_ASPX_IMPORT_DIRECTIVE = re.compile(r"@\s*Import Namespace=".encode(), re.IGNORECASE)
34+
_RE_ASPX_NAMESPACE = re.compile(r'@\s*Import namespace="(.*?)"'.encode(), re.IGNORECASE)
35+
_RE_RUNAT_SERVER = re.compile(r'runat\s*=\s*"server"'.encode())
36+
3237

3338
class TreeSitterBaseEngine:
3439
buf: bytes
@@ -227,29 +232,13 @@ def get_namespaces(self) -> Iterator[BaseNamespace]:
227232
yield from self.get_imported_namespaces()
228233

229234
def is_c_sharp(self, node: Node) -> bool:
230-
return bool(
231-
re.match(
232-
r'@ .*Page Language\s*=\s*"C#".*'.encode(),
233-
self.get_byte_range(node),
234-
re.IGNORECASE,
235-
)
236-
)
235+
return bool(_RE_CSHARP_PAGE.match(self.get_byte_range(node)))
237236

238237
def is_aspx_import_directive(self, node: Node) -> bool:
239-
return bool(
240-
re.match(
241-
r"@\s*Import Namespace=".encode(),
242-
self.get_byte_range(node),
243-
re.IGNORECASE,
244-
)
245-
)
238+
return bool(_RE_ASPX_IMPORT_DIRECTIVE.match(self.get_byte_range(node)))
246239

247240
def get_aspx_namespace(self, node: Node) -> Optional[BaseNamespace]:
248-
match = re.search(
249-
r'@\s*Import namespace="(.*?)"'.encode(),
250-
self.get_byte_range(node),
251-
re.IGNORECASE,
252-
)
241+
match = _RE_ASPX_NAMESPACE.search(self.get_byte_range(node))
253242
return CSharpNamespace(match.group(1).decode("utf-8"), node) if match is not None else None
254243

255244

@@ -295,4 +284,4 @@ def identify_language(self, node: Node) -> str:
295284
return LANG_JS
296285

297286
def is_server_side_c_sharp(self, node: Node) -> bool:
298-
return bool(re.findall(r'runat\s*=\s*"server"'.encode(), self.get_byte_range(node)))
287+
return bool(_RE_RUNAT_SERVER.search(self.get_byte_range(node)))

capa/features/extractors/ts/signatures/cs.json

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
"System.IO.File.GetLastAccessTime",
3434
"System.IO.File.GetLastWriteTime",
3535
"System.IO.File.ReadAllBytes",
36-
"System.IO.File.ReadAllBytes",
3736
"System.IO.File.ReadAllBytesAsync",
3837
"System.IO.File.ReadAllLines",
3938
"System.IO.File.ReadAllLinesAsync",
@@ -45,7 +44,6 @@
4544
"System.IO.File.SetLastAccessTime",
4645
"System.IO.File.SetLastWriteTime",
4746
"System.IO.File.WriteAllBytes",
48-
"System.IO.File.WriteAllBytes",
4947
"System.IO.File.WriteAllBytesAsync",
5048
"System.IO.File.WriteAllLines",
5149
"System.IO.File.WriteAllLinesAsync",

capa/features/extractors/ts/tools.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def join(self, name: str) -> str:
5050
"""import subprocess ; subprocess.Popen => subprocess.Popen
5151
from threading import Timer (threading.Timer) => Timer
5252
"""
53-
toolkit = LANGUAGE_TOOLKITS[LANG_CS]
53+
toolkit = LANGUAGE_TOOLKITS[LANG_PY]
5454
qualified_names = toolkit.split_name(self.name)
5555
if len(qualified_names) < 2:
5656
return name
@@ -224,8 +224,8 @@ class PythonToolkit(LanguageToolkit):
224224
property_query_type: str = "attribute"
225225
string_delimiters: str = "\"'"
226226
integer_prefixes: List[Tuple[Union[str, Tuple[str, ...]], int]] = [
227-
(("0b, 0B"), 2),
228-
(("0o, 0O"), 8),
227+
(("0b", "0B"), 2),
228+
(("0o", "0O"), 8),
229229
(("0x", "0X"), 16),
230230
]
231231
integer_suffixes: Tuple[str, ...] = ()

tests/data

Submodule data updated from 86c79e8 to 413fd28

0 commit comments

Comments
 (0)