Skip to content

Commit f602708

Browse files
committed
fix: skip caching registry walk results when worker thread panicked (PR #458)
1 parent 9eec5f5 commit f602708

2 files changed

Lines changed: 76 additions & 19 deletions

File tree

crates/pet-windows-registry/src/environments.rs

Lines changed: 68 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,18 @@ fn empty_result() -> LocatorResult {
3131
/// surface their environments. Without this we'd silently lose the entire
3232
/// hive when one company's walk panics — exactly the kind of regression
3333
/// that's hardest to debug after the fact.
34+
///
35+
/// Returns `(result, had_panic)` so callers can decide not to cache a
36+
/// partial result (otherwise a single transient panic would persist as a
37+
/// stale empty/partial cache across refreshes — see issue #454).
3438
#[cfg(windows)]
35-
fn join_or_warn(join_result: std::thread::Result<LocatorResult>, label: &str) -> LocatorResult {
39+
fn join_or_warn(
40+
join_result: std::thread::Result<LocatorResult>,
41+
label: &str,
42+
) -> (LocatorResult, bool) {
3643
use log::warn;
3744
match join_result {
38-
Ok(result) => result,
45+
Ok(result) => (result, false),
3946
Err(panic_payload) => {
4047
// Try to render the payload for the log; payloads are commonly
4148
// either a `&'static str` or a `String`.
@@ -45,24 +52,34 @@ fn join_or_warn(join_result: std::thread::Result<LocatorResult>, label: &str) ->
4552
.or_else(|| panic_payload.downcast_ref::<String>().cloned())
4653
.unwrap_or_else(|| "<non-string panic payload>".to_string());
4754
warn!("Registry walk thread for {} panicked: {}", label, message);
48-
empty_result()
55+
(empty_result(), true)
4956
}
5057
}
5158
}
5259

60+
/// Outcome of a registry walk: the discovered environments/managers and a
61+
/// flag indicating whether any worker thread panicked. Callers should
62+
/// avoid caching a result with `had_panic = true` so a transient failure
63+
/// can be retried on the next refresh instead of becoming sticky.
64+
#[cfg(windows)]
65+
pub struct RegistryWalkOutcome {
66+
pub result: LocatorResult,
67+
pub had_panic: bool,
68+
}
69+
5370
#[cfg(windows)]
5471
pub fn get_registry_pythons(
5572
conda_locator: &Arc<dyn CondaLocator>,
5673
reporter: &Option<&dyn Reporter>,
57-
) -> LocatorResult {
74+
) -> RegistryWalkOutcome {
5875
use std::thread;
5976

6077
// Walk both hives in parallel. Each hive walks its companies in parallel
6178
// too (see `get_registry_pythons_for_hive`). HKLM and HKCU sit on
6279
// independent registry trees and Defender intercepts every read, so the
6380
// serial baseline was paying for both round-trips back to back; the
6481
// scope-spawn pattern matches `pet-pyenv` / `pet-homebrew` / `pet-conda`.
65-
let (hklm_result, hkcu_result) = thread::scope(|s| {
82+
let ((hklm_result, hklm_panic), (hkcu_result, hkcu_panic)) = thread::scope(|s| {
6683
let hklm = s.spawn(|| {
6784
get_registry_pythons_for_hive(
6885
"HKLM",
@@ -80,8 +97,8 @@ pub fn get_registry_pythons(
8097
)
8198
});
8299
(
83-
join_or_warn(hklm.join(), "HKLM"),
84-
join_or_warn(hkcu.join(), "HKCU"),
100+
join_hive_outcome(hklm.join(), "HKLM"),
101+
join_hive_outcome(hkcu.join(), "HKCU"),
85102
)
86103
});
87104

@@ -90,9 +107,35 @@ pub fn get_registry_pythons(
90107
let mut managers = hklm_result.managers;
91108
managers.extend(hkcu_result.managers);
92109

93-
LocatorResult {
94-
environments,
95-
managers,
110+
RegistryWalkOutcome {
111+
result: LocatorResult {
112+
environments,
113+
managers,
114+
},
115+
had_panic: hklm_panic || hkcu_panic,
116+
}
117+
}
118+
119+
/// Sibling of `join_or_warn` for hive-level threads. Returns the recovered
120+
/// `LocatorResult` plus a `had_panic` flag that already accounts for any
121+
/// company-level panics propagated through `RegistryWalkOutcome`.
122+
#[cfg(windows)]
123+
fn join_hive_outcome(
124+
join_result: std::thread::Result<RegistryWalkOutcome>,
125+
label: &str,
126+
) -> (LocatorResult, bool) {
127+
use log::warn;
128+
match join_result {
129+
Ok(outcome) => (outcome.result, outcome.had_panic),
130+
Err(panic_payload) => {
131+
let message = panic_payload
132+
.downcast_ref::<&'static str>()
133+
.map(|s| (*s).to_string())
134+
.or_else(|| panic_payload.downcast_ref::<String>().cloned())
135+
.unwrap_or_else(|| "<non-string panic payload>".to_string());
136+
warn!("Registry walk thread for {} panicked: {}", label, message);
137+
(empty_result(), true)
138+
}
96139
}
97140
}
98141

@@ -105,15 +148,18 @@ fn get_registry_pythons_for_hive(
105148
hive: RegKey,
106149
conda_locator: &Arc<dyn CondaLocator>,
107150
reporter: &Option<&dyn Reporter>,
108-
) -> LocatorResult {
151+
) -> RegistryWalkOutcome {
109152
use log::{trace, warn};
110153
use std::thread;
111154

112155
let python_key = match hive.open_subkey("Software\\Python") {
113156
Ok(k) => k,
114157
Err(err) => {
115158
warn!("Failed to open {}\\Software\\Python, {:?}", name, err);
116-
return empty_result();
159+
return RegistryWalkOutcome {
160+
result: empty_result(),
161+
had_panic: false,
162+
};
117163
}
118164
};
119165

@@ -137,7 +183,7 @@ fn get_registry_pythons_for_hive(
137183
})
138184
.collect();
139185

140-
let results: Vec<LocatorResult> = thread::scope(|s| {
186+
let results: Vec<(LocatorResult, bool)> = thread::scope(|s| {
141187
let handles: Vec<_> = companies
142188
.into_iter()
143189
.map(|(company, company_key)| {
@@ -168,13 +214,18 @@ fn get_registry_pythons_for_hive(
168214

169215
let mut environments = vec![];
170216
let mut managers = vec![];
171-
for r in results {
217+
let mut had_panic = false;
218+
for (r, panicked) in results {
172219
environments.extend(r.environments);
173220
managers.extend(r.managers);
221+
had_panic |= panicked;
174222
}
175-
LocatorResult {
176-
environments,
177-
managers,
223+
RegistryWalkOutcome {
224+
result: LocatorResult {
225+
environments,
226+
managers,
227+
},
228+
had_panic,
178229
}
179230
}
180231

crates/pet-windows-registry/src/lib.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,14 @@ impl WindowsRegistry {
3939
return Some(Arc::clone(result));
4040
}
4141

42-
let registry_result = Arc::new(get_registry_pythons(&self.conda_locator, &reporter));
43-
result.replace(Arc::clone(&registry_result));
42+
let outcome = get_registry_pythons(&self.conda_locator, &reporter);
43+
let registry_result = Arc::new(outcome.result);
44+
// If any worker thread panicked, the result is potentially partial.
45+
// Skip persisting it so the next refresh can retry the walk instead
46+
// of replaying a stale empty/partial cache forever (issue #454).
47+
if !outcome.had_panic {
48+
result.replace(Arc::clone(&registry_result));
49+
}
4450

4551
Some(registry_result)
4652
}

0 commit comments

Comments
 (0)