Skip to content

Commit 841c130

Browse files
feat(t-wise): make sampling deterministic between runs
Introduces IntSet and IntMap for deterministic iteration order. Actually deterministic sampling only happends when compiled with the `deterministic` feature.
1 parent 2497604 commit 841c130

9 files changed

Lines changed: 182 additions & 35 deletions

File tree

ddnnife/src/ddnnf/anomalies/t_wise_sampling/sample.rs

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use super::Config;
22
use super::t_iterator::TInteractionIter;
3+
use crate::int_hash::IntSet;
34
use std::cmp::{Ordering, min};
4-
use std::collections::HashSet;
55
use std::fmt::{Display, Formatter};
66
use std::iter;
77
use streaming_iterator::StreamingIterator;
@@ -17,7 +17,7 @@ pub struct Sample {
1717
/// Configs that do not contain all variables of this sample
1818
pub partial_configs: Vec<Config>,
1919
/// The variables that Configs of this sample may contain
20-
pub vars: HashSet<u32>,
20+
pub vars: IntSet<u32>,
2121
/// The literals that actually occur in this sample, this is not a HashSet because we want
2222
/// a stable iteration order.
2323
pub literals: Vec<i32>,
@@ -61,7 +61,7 @@ impl Display for Sample {
6161

6262
impl Sample {
6363
/// Create an empty sample that may contain the given variables
64-
pub fn new(vars: HashSet<u32>) -> Self {
64+
pub fn new(vars: IntSet<u32>) -> Self {
6565
Self {
6666
complete_configs: vec![],
6767
partial_configs: vec![],
@@ -94,7 +94,7 @@ impl Sample {
9494
literals.sort_unstable();
9595
literals.dedup();
9696

97-
let vars: HashSet<u32> = literals.iter().map(|x| x.unsigned_abs()).collect();
97+
let vars: IntSet<u32> = literals.iter().map(|x| x.unsigned_abs()).collect();
9898

9999
let mut sample = Self {
100100
complete_configs: vec![],
@@ -108,13 +108,13 @@ impl Sample {
108108
}
109109

110110
pub fn new_from_samples(samples: &[&Self]) -> Self {
111-
let vars: HashSet<u32> = samples
111+
let vars: IntSet<u32> = samples
112112
.iter()
113113
.flat_map(|sample| sample.vars.iter())
114114
.cloned()
115115
.collect();
116116

117-
let literals: HashSet<i32> = samples
117+
let literals: IntSet<i32> = samples
118118
.iter()
119119
.flat_map(|sample| sample.get_literals().iter().copied())
120120
.collect();
@@ -127,7 +127,7 @@ impl Sample {
127127

128128
/// Create a sample that only contains a single configuration with a single literal
129129
pub fn from_literal(literal: i32, number_of_variables: usize) -> Self {
130-
let mut sample = Self::new(HashSet::from([literal.unsigned_abs()]));
130+
let mut sample = Self::new([literal.unsigned_abs()].into_iter().collect());
131131
sample.literals = vec![literal];
132132
sample.add_complete(Config::from(&[literal], number_of_variables));
133133
sample
@@ -137,7 +137,7 @@ impl Sample {
137137
&self.literals
138138
}
139139

140-
pub fn get_vars(&self) -> &HashSet<u32> {
140+
pub fn get_vars(&self) -> &IntSet<u32> {
141141
&self.vars
142142
}
143143

@@ -169,10 +169,9 @@ impl Sample {
169169
///
170170
/// # Examples
171171
/// ```
172-
/// use std::collections::HashSet;
173172
/// use ddnnife::ddnnf::anomalies::t_wise_sampling::{Config, Sample};
174173
///
175-
/// let sample = Sample::new(HashSet::from([1,2,3]));
174+
/// let sample = Sample::new([1,2,3].into_iter().collect());
176175
///
177176
/// assert!(sample.is_config_complete(&Config::from(&[1,2,3], 3)));
178177
/// assert!(!sample.is_config_complete(&Config::from(&[1,2], 3)));
@@ -211,9 +210,8 @@ impl Sample {
211210
///
212211
/// # Examples
213212
/// ```
214-
/// use std::collections::HashSet;
215213
/// use ddnnife::ddnnf::anomalies::t_wise_sampling::{Config, Sample};
216-
/// let mut s = Sample::new(HashSet::from([1,2,3]));
214+
/// let mut s = Sample::new([1,2,3].into_iter().collect());
217215
///
218216
/// assert!(s.is_empty());
219217
/// s.add_partial(Config::from(&[1,3], 3));
@@ -247,7 +245,7 @@ mod test {
247245
let sample = Sample {
248246
complete_configs: vec![Config::from(&[1, 2, 3, -4, -5], 5)],
249247
partial_configs: vec![],
250-
vars: HashSet::from([1, 2, 3, 4, 5]),
248+
vars: [1, 2, 3, 4, 5].into_iter().collect(),
251249
literals: vec![1, 2, 3, -4, -5],
252250
};
253251

ddnnife/src/ddnnf/anomalies/t_wise_sampling/sample_merger/similarity_merger.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@ use super::super::SamplingResult;
22
use super::super::t_iterator::TInteractionIter;
33
use super::{Config, Sample};
44
use super::{OrMerger, SampleMerger};
5+
use crate::int_hash::IntSet;
56
use crate::util::rng;
67
use rand::prelude::SliceRandom;
78
use std::cmp::{Ordering, min};
8-
use std::collections::HashSet;
99
use streaming_iterator::StreamingIterator;
1010

1111
#[derive(Debug, Copy, Clone)]
@@ -74,7 +74,7 @@ fn snd<'a>((_, candidate): &(usize, &'a Candidate<'_>)) -> &'a Candidate<'a> {
7474
#[derive(Debug, Clone, PartialEq, Eq)]
7575
struct Candidate<'a> {
7676
config: &'a Config,
77-
literals: HashSet<i32>,
77+
literals: IntSet<i32>,
7878
max_intersect: usize,
7979
total_intersect: usize,
8080
}
@@ -101,7 +101,7 @@ impl Ord for Candidate<'_> {
101101

102102
impl<'a> Candidate<'a> {
103103
fn new(config: &'a Config) -> Self {
104-
let literals: HashSet<i32> = config.get_decided_literals().collect();
104+
let literals: IntSet<i32> = config.get_decided_literals().collect();
105105
debug_assert!(!literals.contains(&0));
106106
debug_assert!(!literals.is_empty());
107107
Self {
@@ -112,7 +112,7 @@ impl<'a> Candidate<'a> {
112112
}
113113
}
114114

115-
fn update(&mut self, other_literals: &HashSet<i32>) {
115+
fn update(&mut self, other_literals: &IntSet<i32>) {
116116
let intersect = self.literals.intersection(other_literals).count();
117117

118118
self.total_intersect += intersect;
@@ -180,7 +180,7 @@ mod test {
180180

181181
sample
182182
.iter()
183-
.for_each(|c| candidate.update(&c.get_decided_literals().collect()));
183+
.for_each(|c| candidate.update(&c.get_decided_literals().collect::<IntSet<_>>()));
184184

185185
assert!(candidate.is_t_wise_covered_by(&sample, 2));
186186

@@ -193,7 +193,7 @@ mod test {
193193

194194
sample
195195
.iter()
196-
.for_each(|c| candidate.update(&c.get_decided_literals().collect()));
196+
.for_each(|c| candidate.update(&c.get_decided_literals().collect::<IntSet<_>>()));
197197

198198
assert!(!candidate.is_t_wise_covered_by(&sample, 2));
199199
}

ddnnife/src/ddnnf/anomalies/t_wise_sampling/sample_merger/zipping_merger.rs

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ impl SampleMerger for ZippingMerger<'_> {
7070

7171
impl ZippingMerger<'_> {
7272
/// Generates all t-wise interactions between two samples.
73-
fn interactions(left: &Sample, right: &Sample, t: usize) -> HashSet<Vec<i32>> {
73+
fn interactions(left: &Sample, right: &Sample, t: usize) -> Vec<Vec<i32>> {
7474
Self::generate_interactions(
7575
Self::generate_self_interactions(left, t),
7676
Self::generate_self_interactions(right, t),
@@ -83,7 +83,7 @@ impl ZippingMerger<'_> {
8383
fn generate_interactions(
8484
left: impl Iterator<Item = HashSet<Vec<i32>>>,
8585
right: impl DoubleEndedIterator<Item = HashSet<Vec<i32>>>,
86-
) -> HashSet<Vec<i32>> {
86+
) -> Vec<Vec<i32>> {
8787
let mut interactions = HashSet::new();
8888

8989
// Both sets are sorted from 1 to t-1.
@@ -94,12 +94,29 @@ impl ZippingMerger<'_> {
9494
for right in &right {
9595
let mut interaction = left.clone();
9696
interaction.extend_from_slice(right);
97+
interaction.sort_unstable();
9798
interactions.insert(interaction);
9899
}
99100
}
100101
});
101102

102-
interactions
103+
// Deterministic sampling requires the same iteration order between runs.
104+
// As the HashSets used for the rest of the algorithm do not provide such a determinsitic order,
105+
// we collect, sort and (determinsitically) shuffle the generated interactions.
106+
#[cfg(feature = "deterministic")]
107+
{
108+
use crate::util::rng;
109+
use rand::seq::SliceRandom;
110+
111+
let mut interactions: Vec<Vec<i32>> = interactions.into_iter().collect();
112+
interactions.sort_unstable();
113+
interactions.shuffle(&mut rng());
114+
115+
interactions
116+
}
117+
118+
#[cfg(not(feature = "deterministic"))]
119+
interactions.into_iter().collect()
103120
}
104121

105122
/// Generates a set of interactions inside a sample ordered by interactions size.
@@ -177,7 +194,7 @@ mod test {
177194
use crate::parser::build_ddnnf;
178195

179196
use super::*;
180-
use std::collections::HashSet;
197+
use crate::int_hash::IntSet;
181198
use std::path::Path;
182199

183200
#[test]
@@ -198,7 +215,7 @@ mod test {
198215
/// Create an empty sample that may contain the given variables and will certainly contain
199216
/// the given literals. Only use this if you know that the configs you are going to add to
200217
/// this sample contain the given literals.
201-
fn new_with_literals(vars: HashSet<u32>, mut literals: Vec<i32>) -> Sample {
218+
fn new_with_literals(vars: IntSet<u32>, mut literals: Vec<i32>) -> Sample {
202219
literals.sort_unstable();
203220
literals.dedup();
204221
Sample {
@@ -221,7 +238,7 @@ mod test {
221238
ddnnf: &ddnnf,
222239
};
223240

224-
let mut left_sample = new_with_literals(HashSet::from([2, 3]), vec![-2, 3]);
241+
let mut left_sample = new_with_literals([2, 3].into_iter().collect(), vec![-2, 3]);
225242
left_sample.add_partial(Config::from(&[3, -2], 4));
226243
let right_sample = Sample::new_from_configs(vec![Config::from(&[1, 4], 4)]);
227244

ddnnife/src/ddnnf/anomalies/t_wise_sampling/t_wise_sampler.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,19 @@ use super::t_iterator::TInteractionIter;
44
use super::{Sample, SamplingResult, SatWrapper};
55
use crate::NodeType;
66
use crate::ddnnf::extended_ddnnf::ExtendedDdnnf;
7+
use crate::int_hash::{self, IntMap, IntSet};
78
use crate::util::rng;
89
use crate::{Ddnnf, DdnnfKind};
910
use itertools::Itertools;
1011
use rand::prelude::SliceRandom;
1112
use std::cmp::min;
12-
use std::collections::{HashMap, HashSet};
1313
use streaming_iterator::StreamingIterator;
1414

1515
pub struct TWiseSampler<'a, A: AndMerger, O: OrMerger> {
1616
/// The d-DNNF to sample.
1717
pub(crate) ddnnf: &'a Ddnnf,
1818
/// Map that holds the [SamplingResult]s for the nodes.
19-
pub(crate) partial_samples: HashMap<usize, SamplingResult>,
19+
pub(crate) partial_samples: IntMap<usize, SamplingResult>,
2020
/// The merger for and nodes.
2121
and_merger: A,
2222
/// The merger for or nodes.
@@ -28,7 +28,7 @@ impl<'a, A: AndMerger, O: OrMerger> TWiseSampler<'a, A, O> {
2828
pub fn new(ddnnf: &'a Ddnnf, and_merger: A, or_merger: O) -> Self {
2929
Self {
3030
ddnnf,
31-
partial_samples: HashMap::with_capacity(ddnnf.nodes.len()),
31+
partial_samples: int_hash::map_with_capacity(ddnnf.nodes.len()),
3232
and_merger,
3333
or_merger,
3434
}
@@ -259,8 +259,8 @@ pub fn trim_and_resample(
259259
}
260260

261261
#[inline]
262-
fn trim_sample(sample: &Sample, ranks: &[f64], avg_rank: f64) -> (Sample, HashSet<i32>) {
263-
let mut literals_to_resample: HashSet<i32> = HashSet::new();
262+
fn trim_sample(sample: &Sample, ranks: &[f64], avg_rank: f64) -> (Sample, IntSet<i32>) {
263+
let mut literals_to_resample: IntSet<i32> = IntSet::default();
264264
let mut new_sample = Sample::new_from_samples(&[sample]);
265265
let complete_len = sample.complete_configs.len();
266266

0 commit comments

Comments
 (0)