Skip to content

Commit fe0a2e8

Browse files
committed
perf(pet-windows-registry): parallelize HKLM/HKCU walk and reuse cache across find() (Fixes #454)
1 parent 4a45be5 commit fe0a2e8

2 files changed

Lines changed: 231 additions & 54 deletions

File tree

crates/pet-windows-registry/src/environments.rs

Lines changed: 139 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ use pet_core::reporter::Reporter;
88
#[cfg(windows)]
99
use pet_core::{
1010
arch::Architecture,
11-
manager::EnvManager,
1211
python_environment::{PythonEnvironmentBuilder, PythonEnvironmentKind},
1312
LocatorResult,
1413
};
@@ -19,60 +18,155 @@ use std::{path::PathBuf, sync::Arc};
1918
#[cfg(windows)]
2019
use winreg::RegKey;
2120

21+
#[cfg(windows)]
22+
fn empty_result() -> LocatorResult {
23+
LocatorResult {
24+
environments: vec![],
25+
managers: vec![],
26+
}
27+
}
28+
29+
/// Logs a warning if a spawned registry-walk thread panicked, then
30+
/// substitutes an empty result so the surviving hive/companies still
31+
/// surface their environments. Without this we'd silently lose the entire
32+
/// hive when one company's walk panics — exactly the kind of regression
33+
/// that's hardest to debug after the fact.
34+
#[cfg(windows)]
35+
fn join_or_warn(join_result: std::thread::Result<LocatorResult>, label: &str) -> LocatorResult {
36+
use log::warn;
37+
match join_result {
38+
Ok(result) => result,
39+
Err(panic_payload) => {
40+
// Try to render the payload for the log; payloads are commonly
41+
// either a `&'static str` or a `String`.
42+
let message = panic_payload
43+
.downcast_ref::<&'static str>()
44+
.map(|s| (*s).to_string())
45+
.or_else(|| panic_payload.downcast_ref::<String>().cloned())
46+
.unwrap_or_else(|| "<non-string panic payload>".to_string());
47+
warn!("Registry walk thread for {} panicked: {}", label, message);
48+
empty_result()
49+
}
50+
}
51+
}
52+
2253
#[cfg(windows)]
2354
pub fn get_registry_pythons(
2455
conda_locator: &Arc<dyn CondaLocator>,
2556
reporter: &Option<&dyn Reporter>,
2657
) -> LocatorResult {
27-
use log::{trace, warn};
58+
use std::thread;
2859

29-
let mut environments = vec![];
30-
let mut managers: Vec<EnvManager> = vec![];
60+
// Walk both hives in parallel. Each hive walks its companies in parallel
61+
// too (see `get_registry_pythons_for_hive`). HKLM and HKCU sit on
62+
// independent registry trees and Defender intercepts every read, so the
63+
// serial baseline was paying for both round-trips back to back; the
64+
// scope-spawn pattern matches `pet-pyenv` / `pet-homebrew` / `pet-conda`.
65+
let (hklm_result, hkcu_result) = thread::scope(|s| {
66+
let hklm = s.spawn(|| {
67+
get_registry_pythons_for_hive(
68+
"HKLM",
69+
RegKey::predef(winreg::enums::HKEY_LOCAL_MACHINE),
70+
conda_locator,
71+
reporter,
72+
)
73+
});
74+
let hkcu = s.spawn(|| {
75+
get_registry_pythons_for_hive(
76+
"HKCU",
77+
RegKey::predef(winreg::enums::HKEY_CURRENT_USER),
78+
conda_locator,
79+
reporter,
80+
)
81+
});
82+
(
83+
join_or_warn(hklm.join(), "HKLM"),
84+
join_or_warn(hkcu.join(), "HKCU"),
85+
)
86+
});
87+
88+
let mut environments = hklm_result.environments;
89+
environments.extend(hkcu_result.environments);
90+
let mut managers = hklm_result.managers;
91+
managers.extend(hkcu_result.managers);
3192

32-
struct RegistryKey {
33-
pub name: &'static str,
34-
pub key: winreg::RegKey,
93+
LocatorResult {
94+
environments,
95+
managers,
3596
}
36-
let search_keys = [
37-
RegistryKey {
38-
name: "HKLM",
39-
key: winreg::RegKey::predef(winreg::enums::HKEY_LOCAL_MACHINE),
40-
},
41-
RegistryKey {
42-
name: "HKCU",
43-
key: winreg::RegKey::predef(winreg::enums::HKEY_CURRENT_USER),
44-
},
45-
];
46-
for (name, key) in search_keys.iter().map(|f| (f.name, &f.key)) {
47-
match key.open_subkey("Software\\Python") {
48-
Ok(python_key) => {
49-
for company in python_key.enum_keys().filter_map(Result::ok) {
50-
trace!("Searching {}\\Software\\Python\\{}", name, company);
51-
match python_key.open_subkey(&company) {
52-
Ok(company_key) => {
53-
let result = get_registry_pythons_from_key_for_company(
54-
name,
55-
&company_key,
56-
&company,
57-
conda_locator,
58-
reporter,
59-
);
60-
managers.extend(result.managers);
61-
environments.extend(result.environments);
62-
}
63-
Err(err) => {
64-
warn!(
65-
"Failed to open {}\\Software\\Python\\{}, {:?}",
66-
name, company, err
67-
);
68-
}
69-
}
70-
}
71-
}
97+
}
98+
99+
/// Walks `<hive>\Software\Python\<company>` for every company in the given
100+
/// hive. Companies are processed in parallel; each spawned thread owns its
101+
/// own `RegKey` handle (which is `Send` but not `Sync` in `winreg`).
102+
#[cfg(windows)]
103+
fn get_registry_pythons_for_hive(
104+
name: &'static str,
105+
hive: RegKey,
106+
conda_locator: &Arc<dyn CondaLocator>,
107+
reporter: &Option<&dyn Reporter>,
108+
) -> LocatorResult {
109+
use log::{trace, warn};
110+
use std::thread;
111+
112+
let python_key = match hive.open_subkey("Software\\Python") {
113+
Ok(k) => k,
114+
Err(err) => {
115+
warn!("Failed to open {}\\Software\\Python, {:?}", name, err);
116+
return empty_result();
117+
}
118+
};
119+
120+
// Open each company subkey serially. Opening a registry handle is cheap
121+
// (no recursive enumeration); the heavy work happens once we start
122+
// pulling values out of `<company>\<install>\InstallPath`. Collecting
123+
// owned `(String, RegKey)` pairs lets us hand each company to its own
124+
// thread without sharing a `RegKey` (which is `Send` but not `Sync`).
125+
let companies: Vec<(String, RegKey)> = python_key
126+
.enum_keys()
127+
.filter_map(Result::ok)
128+
.filter_map(|company| match python_key.open_subkey(&company) {
129+
Ok(company_key) => Some((company, company_key)),
72130
Err(err) => {
73-
warn!("Failed to open {}\\Software\\Python, {:?}", name, err)
131+
warn!(
132+
"Failed to open {}\\Software\\Python\\{}, {:?}",
133+
name, company, err
134+
);
135+
None
74136
}
75-
}
137+
})
138+
.collect();
139+
140+
let results: Vec<LocatorResult> = thread::scope(|s| {
141+
let handles: Vec<_> = companies
142+
.into_iter()
143+
.map(|(company, company_key)| {
144+
s.spawn(move || {
145+
// Trace order is intentionally relaxed: companies are
146+
// walked in parallel, so this line interleaves with the
147+
// others from the same hive.
148+
trace!("Searching {}\\Software\\Python\\{}", name, company);
149+
get_registry_pythons_from_key_for_company(
150+
name,
151+
&company_key,
152+
&company,
153+
conda_locator,
154+
reporter,
155+
)
156+
})
157+
})
158+
.collect();
159+
handles
160+
.into_iter()
161+
.map(|h| join_or_warn(h.join(), "per-company walk"))
162+
.collect()
163+
});
164+
165+
let mut environments = vec![];
166+
let mut managers = vec![];
167+
for r in results {
168+
environments.extend(r.environments);
169+
managers.extend(r.managers);
76170
}
77171
LocatorResult {
78172
environments,

crates/pet-windows-registry/src/lib.rs

Lines changed: 92 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,6 @@ impl WindowsRegistry {
4444

4545
Some(registry_result)
4646
}
47-
#[cfg(windows)]
48-
fn clear(&self) {
49-
let mut search_result = self
50-
.search_result
51-
.lock()
52-
.expect("search_result mutex poisoned");
53-
search_result.take();
54-
}
5547

5648
fn sync_search_result_from(&self, source: &WindowsRegistry) {
5749
let search_result = source
@@ -132,7 +124,12 @@ impl Locator for WindowsRegistry {
132124

133125
#[cfg(windows)]
134126
fn find(&self, reporter: &dyn Reporter) {
135-
self.clear();
127+
// We no longer reset `search_result` here: `find_with_cache` already
128+
// returns the cached result on a hit, and `find()` is invoked on
129+
// transient locators per refresh, so the cache is empty by
130+
// construction the first time. Re-clearing forced every refresh to
131+
// re-walk both registry hives, each of which is intercepted by
132+
// Windows Defender (issue #454).
136133
let _ = self.find_with_cache(Some(reporter));
137134
}
138135
#[cfg(unix)]
@@ -326,4 +323,90 @@ mod tests {
326323

327324
assert!(locator.try_from(&env).is_none());
328325
}
326+
327+
/// `find()` must NOT clear the cache before populating it. The previous
328+
/// implementation called `self.clear()` first, which forced every
329+
/// `refresh` RPC to re-walk both registry hives — a Defender-intercepted
330+
/// hot path tracked in #454. This test pins down the new contract:
331+
/// pre-populate the cache, run `find()`, and assert the original entries
332+
/// survived (i.e. `find_with_cache` short-circuited on the cache hit).
333+
#[cfg(windows)]
334+
#[test]
335+
fn test_find_reuses_cached_results_within_locator_lifetime() {
336+
use pet_core::manager::EnvManager;
337+
use pet_core::python_environment::PythonEnvironment;
338+
use pet_core::reporter::Reporter;
339+
use pet_core::telemetry::TelemetryEvent;
340+
341+
struct CountingReporter;
342+
impl Reporter for CountingReporter {
343+
fn report_manager(&self, _manager: &EnvManager) {}
344+
fn report_environment(&self, _env: &PythonEnvironment) {}
345+
fn report_telemetry(&self, _event: &TelemetryEvent) {}
346+
}
347+
348+
let locator = create_locator();
349+
let cached = LocatorResult {
350+
managers: vec![],
351+
environments: vec![PythonEnvironment {
352+
name: Some("cached".to_string()),
353+
..Default::default()
354+
}],
355+
};
356+
locator
357+
.search_result
358+
.lock()
359+
.unwrap()
360+
.replace(cached.clone());
361+
362+
locator.find(&CountingReporter);
363+
364+
let after = locator
365+
.search_result
366+
.lock()
367+
.unwrap()
368+
.clone()
369+
.expect("cache must remain populated after find()");
370+
assert_eq!(
371+
after.environments.len(),
372+
1,
373+
"find() must not clear the cache before populating",
374+
);
375+
assert_eq!(after.environments[0].name.as_deref(), Some("cached"));
376+
}
377+
378+
/// Smoke test: on a fresh locator (empty cache), `find()` runs the new
379+
/// parallel walk through HKLM and HKCU and never panics or deadlocks.
380+
/// The discovered environment list may legitimately be empty on a CI
381+
/// runner without any Python registry installs — we only assert the
382+
/// cache was populated (i.e. the walk completed and `Some(_)` was
383+
/// stored), not its contents.
384+
#[cfg(windows)]
385+
#[test]
386+
fn test_find_on_fresh_locator_completes_parallel_walk() {
387+
use pet_core::manager::EnvManager;
388+
use pet_core::python_environment::PythonEnvironment;
389+
use pet_core::reporter::Reporter;
390+
use pet_core::telemetry::TelemetryEvent;
391+
392+
struct NoopReporter;
393+
impl Reporter for NoopReporter {
394+
fn report_manager(&self, _manager: &EnvManager) {}
395+
fn report_environment(&self, _env: &PythonEnvironment) {}
396+
fn report_telemetry(&self, _event: &TelemetryEvent) {}
397+
}
398+
399+
let locator = create_locator();
400+
assert!(
401+
locator.search_result.lock().unwrap().is_none(),
402+
"freshly built locator must start with an empty cache",
403+
);
404+
405+
locator.find(&NoopReporter);
406+
407+
assert!(
408+
locator.search_result.lock().unwrap().is_some(),
409+
"find() must populate the cache after walking both hives",
410+
);
411+
}
329412
}

0 commit comments

Comments
 (0)