Skip to content

Commit a5bdbe7

Browse files
committed
check flaky
1 parent f7444a3 commit a5bdbe7

5 files changed

Lines changed: 79 additions & 83 deletions

File tree

.github/scripts/ci_config.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -174,22 +174,26 @@ def run_group_tests(args):
174174
crates = groups[args.group] or []
175175
failed = []
176176

177+
repeat = getattr(args, "repeat", 1) or 1
178+
177179
for crate in crates:
178180
# Skip dash-fuzz on Windows
179181
if args.os == "windows-latest" and crate == "dash-fuzz":
180182
github_notice(f"Skipping {crate} on Windows (honggfuzz not supported)")
181183
continue
182184

183-
github_group_start(f"Testing {crate}")
185+
for run in range(1, repeat + 1):
186+
label = f"Testing {crate}" if repeat == 1 else f"Testing {crate} (run {run}/{repeat})"
187+
github_group_start(label)
184188

185-
cmd = ["cargo", "test", "-p", crate, "--all-features"]
186-
result = subprocess.run(cmd)
189+
cmd = ["cargo", "test", "-p", crate, "--all-features"]
190+
result = subprocess.run(cmd)
187191

188-
github_group_end()
192+
github_group_end()
189193

190-
if result.returncode != 0:
191-
failed.append(crate)
192-
github_error(f"Test failed for {crate} on {args.os}")
194+
if result.returncode != 0:
195+
failed.append(f"{crate} (run {run})" if repeat > 1 else crate)
196+
github_error(f"Test failed for {crate} on {args.os} (run {run}/{repeat})")
193197

194198
if failed:
195199
print("\n" + "=" * 40)
@@ -225,6 +229,7 @@ def main():
225229
run_group_parser = subparsers.add_parser("run-group", help="Run tests for a group")
226230
run_group_parser.add_argument("group", help="Group name")
227231
run_group_parser.add_argument("--os", default="ubuntu-latest", help="OS name")
232+
run_group_parser.add_argument("--repeat", type=int, default=1, help="Run tests N times")
228233

229234
args = parser.parse_args()
230235

.github/workflows/build-and-test.yml

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,4 +50,15 @@ jobs:
5050
run: python contrib/setup-dashd.py >> "$GITHUB_ENV"
5151

5252
- name: Run tests
53-
run: python .github/scripts/ci_config.py run-group ${{ matrix.group }} --os ${{ inputs.os }}
53+
env:
54+
DASHD_TEST_RETAIN_DIR: ${{ matrix.group == 'spv' && '/tmp/dashd-test-logs' || '' }}
55+
run: python .github/scripts/ci_config.py run-group ${{ matrix.group }} --os ${{ inputs.os }} ${{ matrix.group == 'spv' && '--repeat 50' || '' }}
56+
57+
- name: Upload failed SPV test logs
58+
if: failure() && matrix.group == 'spv'
59+
uses: actions/upload-artifact@v4
60+
with:
61+
name: spv-test-logs-${{ inputs.os }}
62+
path: /tmp/dashd-test-logs/
63+
retention-days: 7
64+
if-no-files-found: ignore

dash-spv/src/sync/filters/manager.rs

Lines changed: 29 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,16 @@ impl<H: BlockHeaderStorage, FH: FilterHeaderStorage, F: FilterStorage, W: Wallet
131131
requests: &RequestSender,
132132
) -> SyncResult<Vec<SyncEvent>> {
133133
self.set_state(SyncState::Syncing);
134+
135+
// Clear all in-memory processing state for a clean start.
136+
// After a peer disconnect, in-flight batches and blocks are lost.
137+
// Stored filters on disk and the wallet's committed height provide recovery.
138+
self.active_batches.clear();
139+
self.blocks_remaining.clear();
140+
self.filters_matched.clear();
141+
self.pending_batches.clear();
142+
self.filter_pipeline = FiltersPipeline::new();
143+
134144
// Get wallet state - use filter_committed_height for restart recovery,
135145
// not synced_height (which advances per-block and may exceed committed scan progress)
136146
let (wallet_birth_height, wallet_committed_height) = {
@@ -177,59 +187,26 @@ impl<H: BlockHeaderStorage, FH: FilterHeaderStorage, F: FilterStorage, W: Wallet
177187
scan_start
178188
};
179189

180-
// Initialize storage tracking
181-
// If we have pending batches from a previous run, continue from their boundaries
182-
// instead of recalculating from storage (which might not reflect in-flight batches)
183-
if !self.pending_batches.is_empty() {
184-
let first_pending = self.pending_batches.first().unwrap().start_height();
185-
tracing::info!(
186-
"Resuming with {} pending batches, next_batch_to_store staying at {} (first pending: {})",
187-
self.pending_batches.len(),
188-
self.next_batch_to_store,
189-
first_pending
190-
);
191-
// Don't reset next_batch_to_store - keep the existing value
192-
} else {
193-
tracing::info!(
194-
"Initializing next_batch_to_store to {} (stored_filters_tip={}, scan_start={})",
195-
download_start,
196-
stored_filters_tip,
197-
scan_start
198-
);
199-
self.next_batch_to_store = download_start;
200-
}
201-
190+
self.next_batch_to_store = download_start;
202191
self.processing_height = scan_start;
203192

204-
// Initialize download pipeline for all remaining filters
205-
if download_start <= self.progress.filter_header_tip_height() {
206-
// Only reinitialize if pipeline is empty - avoid losing in-flight batches
207-
if self.filter_pipeline.active_count() == 0 && self.pending_batches.is_empty() {
208-
self.filter_pipeline.init(download_start, self.progress.filter_header_tip_height());
209-
tracing::info!(
210-
"Starting filter download from {} to {} (batch-based processing)",
211-
download_start,
212-
self.progress.filter_header_tip_height()
213-
);
214-
} else {
215-
// Extend target without resetting state - batches still in flight
216-
self.filter_pipeline.extend_target(self.progress.filter_header_tip_height());
217-
tracing::info!(
218-
"Resuming filter download to {} (active batches: {}, pending: {})",
219-
self.progress.filter_header_tip_height(),
220-
self.filter_pipeline.active_count(),
221-
self.pending_batches.len()
222-
);
223-
}
193+
tracing::info!(
194+
"Starting filter download (scan_start={}, download_start={}, stored_filters_tip={}, target={})",
195+
scan_start,
196+
download_start,
197+
stored_filters_tip,
198+
self.progress.filter_header_tip_height()
199+
);
224200

201+
// Initialize download pipeline for remaining filters
202+
if download_start <= self.progress.filter_header_tip_height() {
203+
self.filter_pipeline.init(download_start, self.progress.filter_header_tip_height());
225204
let header_storage = self.header_storage.read().await;
226205
self.filter_pipeline.send_pending(requests, &*header_storage).await?;
227206
drop(header_storage);
228207
} else {
229-
// No new filters to download - initialize pipeline to a "complete" state
230-
// so it doesn't try to download from its default start height
208+
// No new filters to download, scanning stored filters only
231209
self.filter_pipeline.init(download_start, download_start.saturating_sub(1));
232-
tracing::info!("Rescan mode: no new filters to download, scanning stored filters only");
233210
}
234211

235212
// Initialize the first processing batch
@@ -734,13 +711,19 @@ impl<H: BlockHeaderStorage, FH: FilterHeaderStorage, F: FilterStorage, W: Wallet
734711
SyncState::Syncing | SyncState::Synced
735712
if self.progress.current_height() < self.progress.filter_header_tip_height() =>
736713
{
714+
// Transition back to Syncing so is_synced() returns false
715+
// until all new filters and matched blocks are fully processed.
716+
if self.state() == SyncState::Synced {
717+
self.set_state(SyncState::Syncing);
718+
}
719+
737720
self.filter_pipeline.extend_target(tip_height);
738721
{
739722
let header_storage = self.header_storage.read().await;
740723
self.filter_pipeline.send_pending(requests, &*header_storage).await?;
741724
}
742725

743-
if self.state() == SyncState::Synced && self.active_batches.is_empty() {
726+
if self.active_batches.is_empty() {
744727
tracing::debug!("Processing new filter (target: {})", tip_height);
745728
return self.try_create_lookahead_batches().await;
746729
}

dash-spv/src/sync/filters/pipeline.rs

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -80,11 +80,6 @@ impl FiltersPipeline {
8080
}
8181
}
8282

83-
/// Get the number of active batches.
84-
pub(super) fn active_count(&self) -> usize {
85-
self.coordinator.active_count()
86-
}
87-
8883
/// Take completed batches with their buffered filter data for processing.
8984
pub(super) fn take_completed_batches(&mut self) -> BTreeSet<FiltersBatch> {
9085
std::mem::take(&mut self.completed_batches)
@@ -315,7 +310,7 @@ mod tests {
315310
fn test_pipeline_new() {
316311
let pipeline = FiltersPipeline::new();
317312

318-
assert_eq!(pipeline.active_count(), 0);
313+
assert_eq!(pipeline.coordinator.active_count(), 0);
319314
assert!(pipeline.batch_trackers.is_empty());
320315
assert!(pipeline.completed_batches.is_empty());
321316
assert_eq!(pipeline.target_height, 0);
@@ -328,7 +323,7 @@ mod tests {
328323
let default_pipeline = FiltersPipeline::default();
329324
let new_pipeline = FiltersPipeline::new();
330325

331-
assert_eq!(default_pipeline.active_count(), new_pipeline.active_count());
326+
assert_eq!(default_pipeline.coordinator.active_count(), new_pipeline.coordinator.active_count());
332327
assert_eq!(default_pipeline.target_height, new_pipeline.target_height);
333328
}
334329

@@ -360,7 +355,7 @@ mod tests {
360355

361356
assert!(pipeline.batch_trackers.is_empty());
362357
assert!(pipeline.completed_batches.is_empty());
363-
assert_eq!(pipeline.active_count(), 0);
358+
assert_eq!(pipeline.coordinator.active_count(), 0);
364359
assert_eq!(pipeline.filters_received, 0);
365360
// 1 batch queued for heights 200-300
366361
assert_eq!(pipeline.coordinator.pending_count(), 1);
@@ -609,7 +604,7 @@ mod tests {
609604
assert_eq!(timed_out, vec![0]);
610605
// Batch should be re-queued in coordinator's pending queue
611606
assert_eq!(pipeline.coordinator.pending_count(), 1);
612-
assert_eq!(pipeline.active_count(), 0);
607+
assert_eq!(pipeline.coordinator.active_count(), 0);
613608
}
614609

615610
#[test]
@@ -660,7 +655,7 @@ mod tests {
660655
pipeline.batch_trackers.insert(2000, BatchTracker::new(2999));
661656
pipeline.coordinator.mark_sent(&[0, 1000, 2000]);
662657

663-
assert_eq!(pipeline.active_count(), 3);
658+
assert_eq!(pipeline.coordinator.active_count(), 3);
664659
assert_eq!(pipeline.coordinator.pending_count(), 0);
665660

666661
// Wait for timeout
@@ -672,7 +667,7 @@ mod tests {
672667

673668
// All 3 batches should be in the pending queue, not duplicated
674669
assert_eq!(pipeline.coordinator.pending_count(), 3);
675-
assert_eq!(pipeline.active_count(), 0);
670+
assert_eq!(pipeline.coordinator.active_count(), 0);
676671

677672
// Take pending items - should get exactly 3, not more
678673
let pending = pipeline.coordinator.take_pending(10);
@@ -701,7 +696,7 @@ mod tests {
701696
let count = pipeline.send_pending(&sender, &storage).await.unwrap();
702697

703698
assert_eq!(count, 1);
704-
assert_eq!(pipeline.active_count(), 1);
699+
assert_eq!(pipeline.coordinator.active_count(), 1);
705700
assert!(pipeline.batch_trackers.contains_key(&0));
706701
// No more pending since the single batch was sent
707702
assert_eq!(pipeline.coordinator.pending_count(), 0);
@@ -735,7 +730,7 @@ mod tests {
735730
// Should respect MAX_CONCURRENT_FILTER_BATCHES (20)
736731
// 25 batches needed, but only 20 can be in-flight at once
737732
assert_eq!(count, MAX_CONCURRENT_FILTER_BATCHES);
738-
assert_eq!(pipeline.active_count(), MAX_CONCURRENT_FILTER_BATCHES);
733+
assert_eq!(pipeline.coordinator.active_count(), MAX_CONCURRENT_FILTER_BATCHES);
739734
assert_eq!(pipeline.batch_trackers.len(), MAX_CONCURRENT_FILTER_BATCHES);
740735
// 5 batches still pending
741736
assert_eq!(pipeline.coordinator.pending_count(), 5);
@@ -783,7 +778,7 @@ mod tests {
783778

784779
// Should send all 3 batches: 0-999, 1000-1999, 2000-2500
785780
assert_eq!(count, 3);
786-
assert_eq!(pipeline.active_count(), 3);
781+
assert_eq!(pipeline.coordinator.active_count(), 3);
787782
assert_eq!(pipeline.coordinator.pending_count(), 0);
788783
}
789784

@@ -827,7 +822,7 @@ mod tests {
827822
// Send request
828823
let sent = pipeline.send_pending(&sender, &storage).await.unwrap();
829824
assert_eq!(sent, 1);
830-
assert_eq!(pipeline.active_count(), 1);
825+
assert_eq!(pipeline.coordinator.active_count(), 1);
831826

832827
// Receive all filters
833828
for h in 0..=99 {
@@ -836,7 +831,7 @@ mod tests {
836831
}
837832

838833
// Batch should be complete
839-
assert_eq!(pipeline.active_count(), 0);
834+
assert_eq!(pipeline.coordinator.active_count(), 0);
840835
assert_eq!(pipeline.completed_batches.len(), 1);
841836
assert_eq!(pipeline.filters_received, 100);
842837
assert_eq!(pipeline.highest_received, 99);
@@ -861,7 +856,7 @@ mod tests {
861856

862857
// Send initial request
863858
pipeline.send_pending(&sender, &storage).await.unwrap();
864-
assert_eq!(pipeline.active_count(), 1);
859+
assert_eq!(pipeline.coordinator.active_count(), 1);
865860
assert_eq!(pipeline.coordinator.pending_count(), 0);
866861

867862
// Wait for timeout
@@ -871,14 +866,14 @@ mod tests {
871866
let timed_out = pipeline.handle_timeouts();
872867
assert_eq!(timed_out.len(), 1);
873868
assert_eq!(pipeline.coordinator.pending_count(), 1);
874-
assert_eq!(pipeline.active_count(), 0);
869+
assert_eq!(pipeline.coordinator.active_count(), 0);
875870

876871
// Tracker should still exist for late arrivals
877872
assert!(pipeline.batch_trackers.contains_key(&0));
878873

879874
// Can retry by sending again
880875
pipeline.send_pending(&sender, &storage).await.unwrap();
881-
assert_eq!(pipeline.active_count(), 1);
876+
assert_eq!(pipeline.coordinator.active_count(), 1);
882877

883878
// Existing tracker is reused (not replaced)
884879
assert!(pipeline.batch_trackers.contains_key(&0));

dash-spv/tests/dashd_sync.rs

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ const SYNC_TIMEOUT: u64 = 60;
3939
/// SPV-specific test context wrapping the shared dashd infrastructure.
4040
///
4141
/// Storage and blockchain directories are cleaned up on drop.
42-
/// Set `DASHD_TEST_RETAIN_DIR` to a path to copy the test data there instead of deleting it.
42+
/// Set `DASHD_TEST_RETAIN_DIR` to a directory path to retain logs and storage for failed tests.
4343
struct TestContext {
4444
dashd: DashdTestContext,
4545
storage_dir: PathBuf,
@@ -91,17 +91,19 @@ impl TestContext {
9191

9292
impl Drop for TestContext {
9393
fn drop(&mut self) {
94-
// If DASHD_TEST_RETAIN_DIR is set, copy the test data there before cleanup
95-
if let Ok(retain_dir) = std::env::var("DASHD_TEST_RETAIN_DIR") {
96-
let retain_path = PathBuf::from(&retain_dir);
97-
let test_name = std::thread::current().name().unwrap_or("unknown").to_string();
98-
let dest = retain_path.join(&test_name);
99-
if dest.exists() {
100-
let _ = std::fs::remove_dir_all(&dest);
94+
// Retain test data only for failed tests when DASHD_TEST_RETAIN_DIR is set.
95+
if std::thread::panicking() {
96+
if let Ok(retain_dir) = std::env::var("DASHD_TEST_RETAIN_DIR") {
97+
let test_name = std::thread::current().name().unwrap().to_string();
98+
let dest = PathBuf::from(&retain_dir).join(&test_name);
99+
if dest.exists() {
100+
let _ = std::fs::remove_dir_all(&dest);
101+
}
102+
copy_dir(&self.storage_dir, &dest);
103+
eprintln!("Test data retained at: {}", dest.display());
101104
}
102-
copy_dir(&self.storage_dir, &dest);
103-
eprintln!("Test data retained at: {}", dest.display());
104105
}
106+
105107
// Clean up the storage directory
106108
let _ = std::fs::remove_dir_all(&self.storage_dir);
107109
}

0 commit comments

Comments
 (0)