Skip to content

Commit ee913f7

Browse files
Copiloteleanorjboyd
andcommitted
Implement path resolution caching for pytest discovery performance
Co-authored-by: eleanorjboyd <26030610+eleanorjboyd@users.noreply.github.com>
1 parent f83fd3e commit ee913f7

1 file changed

Lines changed: 69 additions & 29 deletions

File tree

python_files/vscode_pytest/__init__.py

Lines changed: 69 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,11 @@ def __init__(self, message):
8080
SYMLINK_PATH = None
8181
INCLUDE_BRANCHES = False
8282

83+
# Performance optimization caches for path resolution
84+
_path_cache: dict[int, pathlib.Path] = {} # Cache node paths by object id
85+
_path_to_str_cache: dict[pathlib.Path, str] = {} # Cache path-to-string conversions
86+
_CACHED_CWD: pathlib.Path | None = None # Cache cwd once instead of thousands of calls
87+
8388

8489
def pytest_load_initial_conftests(early_config, parser, args): # noqa: ARG001
8590
has_pytest_cov = early_config.pluginmanager.hasplugin("pytest_cov")
@@ -619,20 +624,22 @@ def process_parameterized_test(
619624

620625
class_and_method = second_split[1] + "::" # This has "::" separator at both ends
621626
# construct the parent id, so it is absolute path :: any class and method :: parent_part
622-
parent_id = os.fspath(get_node_path(test_case)) + class_and_method + parent_part
627+
parent_id = cached_fsdecode(get_node_path(test_case)) + class_and_method + parent_part
623628

624629
try:
625630
function_name = test_case.originalname # type: ignore
626-
function_test_node = function_nodes_dict[parent_id]
627631
except AttributeError: # actual error has occurred
628632
ERRORS.append(
629633
f"unable to find original name for {test_case.name} with parameterization detected."
630634
)
631635
raise VSCodePytestError(
632636
"Unable to find original name for parameterized test case"
633637
) from None
634-
except KeyError:
635-
function_test_node: TestNode = create_parameterized_function_node(
638+
639+
# Use dict.get() instead of exception-based control flow
640+
function_test_node = function_nodes_dict.get(parent_id)
641+
if function_test_node is None:
642+
function_test_node = create_parameterized_function_node(
636643
function_name, get_node_path(test_case), parent_id
637644
)
638645
function_nodes_dict[parent_id] = function_test_node
@@ -644,11 +651,11 @@ def process_parameterized_test(
644651
if isinstance(test_case.parent, pytest.File):
645652
# calculate the parent path of the test case
646653
parent_path = get_node_path(test_case.parent)
647-
try:
648-
parent_test_case = file_nodes_dict[os.fspath(parent_path)]
649-
except KeyError:
654+
parent_path_key = cached_fsdecode(parent_path)
655+
parent_test_case = file_nodes_dict.get(parent_path_key)
656+
if parent_test_case is None:
650657
parent_test_case = create_file_node(parent_path)
651-
file_nodes_dict[os.fspath(parent_path)] = parent_test_case
658+
file_nodes_dict[parent_path_key] = parent_test_case
652659
if function_test_node not in parent_test_case["children"]:
653660
parent_test_case["children"].append(function_test_node)
654661

@@ -693,9 +700,9 @@ def build_test_tree(session: pytest.Session) -> TestNode:
693700
USES_PYTEST_DESCRIBE and isinstance(case_iter, DescribeBlock)
694701
):
695702
# While the given node is a class, create a class and nest the previous node as a child.
696-
try:
697-
test_class_node = class_nodes_dict[case_iter.nodeid]
698-
except KeyError:
703+
# Use dict.get() instead of exception-based control flow
704+
test_class_node = class_nodes_dict.get(case_iter.nodeid)
705+
if test_class_node is None:
699706
test_class_node = create_class_node(case_iter)
700707
class_nodes_dict[case_iter.nodeid] = test_class_node
701708
# Check if the class already has the child node. This will occur if the test is parameterized.
@@ -712,11 +719,11 @@ def build_test_tree(session: pytest.Session) -> TestNode:
712719
break
713720
parent_path = get_node_path(parent_module)
714721
# Create a file node that has the last class as a child.
715-
try:
716-
test_file_node: TestNode = file_nodes_dict[os.fspath(parent_path)]
717-
except KeyError:
722+
parent_path_key = cached_fsdecode(parent_path)
723+
test_file_node = file_nodes_dict.get(parent_path_key)
724+
if test_file_node is None:
718725
test_file_node = create_file_node(parent_path)
719-
file_nodes_dict[os.fspath(parent_path)] = test_file_node
726+
file_nodes_dict[parent_path_key] = test_file_node
720727
# Check if the class is already a child of the file node.
721728
if test_class_node is not None and test_class_node not in test_file_node["children"]:
722729
test_file_node["children"].append(test_class_node)
@@ -731,11 +738,11 @@ def build_test_tree(session: pytest.Session) -> TestNode:
731738
test_case.parent,
732739
)
733740
)
734-
try:
735-
parent_test_case = file_nodes_dict[os.fspath(parent_path)]
736-
except KeyError:
741+
parent_path_key = cached_fsdecode(parent_path)
742+
parent_test_case = file_nodes_dict.get(parent_path_key)
743+
if parent_test_case is None:
737744
parent_test_case = create_file_node(parent_path)
738-
file_nodes_dict[os.fspath(parent_path)] = parent_test_case
745+
file_nodes_dict[parent_path_key] = parent_test_case
739746
parent_test_case["children"].append(test_node)
740747
# Process all files and construct them into nested folders
741748
session_children_dict = construct_nested_folders(
@@ -776,11 +783,11 @@ def build_nested_folders(
776783
max_iter = 100
777784
while iterator_path != session_node_path:
778785
curr_folder_name = iterator_path.name
779-
try:
780-
curr_folder_node: TestNode = created_files_folders_dict[os.fspath(iterator_path)]
781-
except KeyError:
782-
curr_folder_node: TestNode = create_folder_node(curr_folder_name, iterator_path)
783-
created_files_folders_dict[os.fspath(iterator_path)] = curr_folder_node
786+
iterator_path_key = cached_fsdecode(iterator_path)
787+
curr_folder_node = created_files_folders_dict.get(iterator_path_key)
788+
if curr_folder_node is None:
789+
curr_folder_node = create_folder_node(curr_folder_name, iterator_path)
790+
created_files_folders_dict[iterator_path_key] = curr_folder_node
784791
if prev_folder_node not in curr_folder_node["children"]:
785792
curr_folder_node["children"].append(prev_folder_node)
786793
iterator_path = iterator_path.parent
@@ -942,6 +949,24 @@ class CoveragePayloadDict(Dict):
942949
error: str | None # Currently unused need to check
943950

944951

952+
def cached_fsdecode(path: pathlib.Path) -> str:
953+
"""Convert path to string with caching for performance.
954+
955+
This function caches path-to-string conversions to avoid redundant
956+
os.fsdecode() calls during test tree building. For large test suites,
957+
this can save millions of string conversion operations.
958+
959+
Parameters:
960+
path: The pathlib.Path object to convert to string.
961+
962+
Returns:
963+
str: The string representation of the path.
964+
"""
965+
if path not in _path_to_str_cache:
966+
_path_to_str_cache[path] = os.fspath(path)
967+
return _path_to_str_cache[path]
968+
969+
945970
def get_node_path(
946971
node: pytest.Session
947972
| pytest.Item
@@ -953,6 +978,7 @@ def get_node_path(
953978
"""A function that returns the path of a node given the switch to pathlib.Path.
954979
955980
It also evaluates if the node is a symlink and returns the equivalent path.
981+
This function uses caching to avoid redundant path resolution operations.
956982
957983
Parameters:
958984
node: A pytest object or any object that has a path or fspath attribute.
@@ -961,6 +987,11 @@ def get_node_path(
961987
Returns:
962988
pathlib.Path: The resolved path for the node.
963989
"""
990+
# Use object id as cache key for O(1) lookups
991+
cache_key = id(node)
992+
if cache_key in _path_cache:
993+
return _path_cache[cache_key]
994+
964995
node_path = getattr(node, "path", None)
965996
if node_path is None:
966997
fspath = getattr(node, "fspath", None)
@@ -982,19 +1013,28 @@ def get_node_path(
9821013
common_path = os.path.commonpath([symlink_str, node_path_str])
9831014
if common_path == os.fsdecode(SYMLINK_PATH):
9841015
# The node path is already relative to the SYMLINK_PATH root therefore return
985-
return node_path
1016+
result = node_path
9861017
else:
9871018
# If the node path is not a symlink, then we need to calculate the equivalent symlink path
9881019
# get the relative path between the cwd and the node path (as the node path is not a symlink).
989-
rel_path = node_path.relative_to(pathlib.Path.cwd())
1020+
# Use cached cwd to avoid repeated system calls
1021+
global _CACHED_CWD
1022+
if _CACHED_CWD is None:
1023+
_CACHED_CWD = pathlib.Path.cwd()
1024+
rel_path = node_path.relative_to(_CACHED_CWD)
9901025
# combine the difference between the cwd and the node path with the symlink path
991-
return pathlib.Path(SYMLINK_PATH, rel_path)
1026+
result = pathlib.Path(SYMLINK_PATH, rel_path)
9921027
except Exception as e:
9931028
raise VSCodePytestError(
9941029
f"Error occurred while calculating symlink equivalent from node path: {e}"
995-
f"\n SYMLINK_PATH: {SYMLINK_PATH}, \n node path: {node_path}, \n cwd: {pathlib.Path.cwd()}"
1030+
f"\n SYMLINK_PATH: {SYMLINK_PATH}, \n node path: {node_path}, \n cwd: {_CACHED_CWD if _CACHED_CWD else pathlib.Path.cwd()}"
9961031
) from e
997-
return node_path
1032+
else:
1033+
result = node_path
1034+
1035+
# Cache before returning
1036+
_path_cache[cache_key] = result
1037+
return result
9981038

9991039

10001040
__writer = None

0 commit comments

Comments
 (0)