Skip to content

Commit 5e44fcc

Browse files
committed
Implements reservoir sampler randomly sampling stream of features, closes #7
1 parent 03bc25c commit 5e44fcc

1 file changed

Lines changed: 72 additions & 0 deletions

File tree

robosat/osm/sampler.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import random
2+
3+
4+
class ReservoirSampler:
5+
'''Randomly samples k items from a stream of unknown n items.
6+
'''
7+
8+
def __init__(self, capacity):
9+
'''Creates an new `ReservoirSampler` instance.
10+
11+
Args:
12+
capacity: the number of items to randomly sample from a stream of unknown size.
13+
'''
14+
15+
assert capacity > 0
16+
17+
self.capacity = capacity
18+
self.reservoir = []
19+
self.pushed = 0
20+
21+
def push(self, v):
22+
'''Adds an item to the reservoir.
23+
24+
Args:
25+
v: the item from the stream to add to the reservoir.
26+
'''
27+
28+
size = len(self.reservoir)
29+
30+
if size < self.capacity:
31+
self.reservoir.append(v)
32+
else:
33+
assert size == self.capacity
34+
assert size <= self.pushed
35+
36+
p = self.capacity / self.pushed
37+
38+
if random.random() < p:
39+
i = random.randint(0, size - 1)
40+
self.reservoir[i] = v
41+
42+
self.pushed += 1
43+
44+
def __len__(self):
45+
'''Returns the number of randomly sampled items.
46+
47+
Returns:
48+
The number of randomly sampled items in the reservoir.
49+
'''
50+
51+
return len(self.reservoir)
52+
53+
def __getitem__(self, k):
54+
'''Returns a randomly sampled item in the reservoir.
55+
56+
Args:
57+
k: the index for the kth item from the reservoir to return.
58+
59+
Returns:
60+
The kth item in the reservoir of randomly sampled items.
61+
'''
62+
63+
return self.reservoir[k]
64+
65+
def __repr__(self):
66+
'''Returns the representation for this class.
67+
68+
Returns:
69+
The string representation for this class.
70+
'''
71+
72+
return '<{}: {}>'.format(self.__class__.__name__, list(self))

0 commit comments

Comments
 (0)