Skip to content

Commit c6e05f8

Browse files
committed
Add code samples for presentation
1 parent 18566eb commit c6e05f8

File tree

13 files changed

+868
-0
lines changed

13 files changed

+868
-0
lines changed
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# Iterators
2+
An iterator is a nice concept since it allows to create laze sequences,
3+
i.e., sequences that have elements that are computed only when they are
4+
requested by retrieving the enxt element.
5+
6+
Python allows for two basic ways to implement iterators, either as
7+
function that have a LHS yield statements, or as class that implement
8+
an `__iter__` and a `next` method.
9+
10+
However, often it is not necessary to implements iterators from scratch,
11+
often they can be constructed by using the Python standard library's
12+
`itertools` functionality.
13+
14+
## What is it?
15+
1. `accumulator.py`: illustrates the use of `itertools`'s `accumulate` in
16+
for various types of data and operators.
17+
1. `count_down.py`: simple illustration of a class that implements an
18+
iterable.
19+
1. `event_generate.py`: a sequence of `Event` objects is generated by
20+
an instance of the `EventIter` class. Events have a type, a start time,
21+
and a duration. Events of the same type can not overlap. The
22+
`EventIter` constructor takes a list of event types, and a start time.
23+
It generates sequence of random type, start time and duration, until an
24+
event is generated that last later than the stop time.
25+
1. `generators.ipynb`: Jupyter notebook illustrating generators.
26+
1. `people.py`: illustration of `itertools`'s `groupby`, and `operator`'s
27+
`attrgetter` methods. Note that `groupby` does not reorder the
28+
original iterators element, but only groups consecutive elements that
29+
have the same key.
30+
1. `primes.py`: this script will generate the sequence of prime numbers
31+
until it is interupted. The iterator is implemented by a function with
32+
a `yield` statement.
33+
1. `primes_multiple_calls.py`: illustrates that a function with `yield`
34+
instantiates a generator when called, and hence "starts over" for
35+
each `for`-loop.
36+
1. `primes_itertools.py`: this script also generates a potentially
37+
infinite sequence of prime numbers, but it is implemented using
38+
the `count` function of the `itertools` module in Python's standard
39+
library, as well as the `filter` function.
40+
1. `dataset.py`: illustrates the `__iter__` and `__next__` methods, as well
41+
as utilities of the `operator` module.
42+
1. `generating_data.py`: a retake of the data geenration script in
43+
Fundamentals, now using `itertools` and built-in Python functional
44+
features.
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#!/usr/bin/env python
2+
3+
from itertools import accumulate
4+
from operator import add, mul, concat
5+
6+
if __name__ == '__main__':
7+
data = range(10)
8+
for x in accumulate(data, add):
9+
print(x)
10+
for x in accumulate(data[1:], mul):
11+
print(x)
12+
for x in ([y] for y in data):
13+
print(x)
14+
for x in accumulate(([y] for y in data), concat):
15+
print(x)
16+
for fragment in accumulate('hello world!', concat):
17+
print(fragment)
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#!/usr/bin/env python
2+
3+
4+
class CountDown(object):
5+
'''Class implementign a counter that goes from a start value to 0'''
6+
7+
def __init__(self, n):
8+
'''Constructor setting the value to count down from'''
9+
self._n = n
10+
self._current = n
11+
12+
@property
13+
def n(self):
14+
'''Returns value that this counter will count down frmo'''
15+
return self._n
16+
17+
def __iter__(self):
18+
'''Initialize and return the iterator, this method is called
19+
each time the object is used as an iterator'''
20+
self._current = self.n
21+
return self
22+
23+
def __next__(self):
24+
'''Returns the next value, and changes state, called in each
25+
iteration'''
26+
if self._current >= 0:
27+
value = self._current
28+
self._current -= 1
29+
return value
30+
else:
31+
raise StopIteration()
32+
33+
def __str__(self):
34+
return 'count down at {c:d} from {n:d}'.format(self._current,
35+
self.n)
36+
37+
38+
if __name__ == '__main__':
39+
count_down = CountDown(5)
40+
print('first iteration')
41+
for i in count_down:
42+
print(i)
43+
print('second iteration')
44+
for i in count_down:
45+
print(i)
46+
for i in count_down:
47+
print(i)
48+
count_down1 = CountDown(10)
49+
count_down2 = CountDown(8)
50+
print('zip for 10, 8')
51+
for i, j in zip(count_down1, count_down2):
52+
print(i, j)
53+
print('zip for same iterator, will not work')
54+
for i, j in zip(count_down, count_down):
55+
print(i, j)
Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
#!/usr/bin/env python
2+
'''dataset implements a poor man's version of pandas data frames, it is
3+
only intended to illustrate a number of concepts about iterators
4+
and can be used when dependencies on third party libraries should
5+
be avoided'''
6+
7+
import collections
8+
import operator
9+
10+
ColumnDef = collections.namedtuple('ColumnSpecs', ['name', 'type'])
11+
12+
13+
class DatasetError(Exception):
14+
'''Base class for Dataset exceptions'''
15+
16+
pass
17+
18+
19+
class DataLenError(DatasetError):
20+
'''Exception indicating that a list of data is being appended with
21+
a length different from the number of headers of the data set'''
22+
23+
pass
24+
25+
26+
class ConversionError(DatasetError):
27+
'''Exception indicating that a type conversion failed, i.e., a
28+
value is appended that can not be converted to its column's
29+
type'''
30+
31+
pass
32+
33+
34+
class ColumnOverwriteError(DataLenError):
35+
'''Exception indicating that a new column would overwrite an
36+
existing one.'''
37+
38+
pass
39+
40+
41+
class UndefinedColumnError(DataLenError):
42+
'''Exception indicating that a column does not exist in the dataset'''
43+
44+
pass
45+
46+
47+
class ComputeError(DataLenError):
48+
'''Exception indicating that a computation failed'''
49+
50+
pass
51+
52+
53+
class Dataset(object):
54+
'''Class representing data sets'''
55+
56+
def __init__(self, col_defs):
57+
'''Constructor that optonally takes the data set headers'''
58+
self._headers = [col_def.name for col_def in col_defs]
59+
self._type_map = {col_def.name: col_def.type
60+
for col_def in col_defs}
61+
self._data = {header: [] for header in self._headers}
62+
self._nr_data = 0
63+
self._next = 0
64+
65+
@property
66+
def headers(self):
67+
'''get the list of headers for the data set'''
68+
return list(self._headers)
69+
70+
@property
71+
def nr_columns(self):
72+
'''returns number of columns in the dataset'''
73+
return len(self._headers)
74+
75+
@property
76+
def column_defs(self):
77+
'''retrieve the column definitions of the dataset'''
78+
col_defs = []
79+
for header in self._headers:
80+
col_defs.append(ColumnDef(header, self._type_map[header]))
81+
return col_defs
82+
83+
def __len__(self):
84+
'''retrieve the length of the data set'''
85+
return self._nr_data
86+
87+
def _convert(self, header, value):
88+
'''convert the value to the appropriate data type'''
89+
return self._type_map[header](value)
90+
91+
def append(self, data):
92+
'''append a row of data to the set'''
93+
if len(data) != len(self._headers):
94+
msg = '{0:d} headers, {1:d} items'.format(len(self._headers),
95+
len(data))
96+
raise DataLenError(msg)
97+
for i, header in enumerate(self._headers):
98+
try:
99+
value = self._convert(header, data[i])
100+
except ValueError as error:
101+
msg = 'type conversion failed: {0}'.format(str(error))
102+
raise ConversionError(msg)
103+
self._data[header].append(value)
104+
self._nr_data += 1
105+
106+
def __iter__(self):
107+
'''iterator over the data values in the data set, each returning
108+
a list ordered according to the headers of the data set'''
109+
self._RowTuple = collections.namedtuple('RowTuple', self._headers)
110+
self._next = 0
111+
return self
112+
113+
def __next__(self):
114+
'''return next data value when dataset is used as an iterator'''
115+
if self._next < self._nr_data:
116+
values = self._RowTuple._make((self._data[header][self._next]
117+
for header in self._headers))
118+
self._next += 1
119+
return values
120+
else:
121+
self._next = 0
122+
raise StopIteration
123+
124+
def compute(self, col_defs, args, function):
125+
'''perform a computation producing extra columns by applying a
126+
function using the specified argument names'''
127+
for col_def in col_defs:
128+
if col_def.name in self._headers:
129+
msg = 'column {0} already exists'.format(col_def.name)
130+
raise ColumnOverwriteError(msg)
131+
self._data[col_def.name] = []
132+
for name in args:
133+
if name not in self._headers:
134+
msg = 'no column {0} in dataset'.format(name)
135+
raise UndefinedColumnError(msg)
136+
arg_idx = tuple(self._headers.index(name) for name in args)
137+
selector = operator.itemgetter(*arg_idx)
138+
names = [col_def.name for col_def in col_defs]
139+
for row in self:
140+
args = selector(row)
141+
try:
142+
values = function(*args)
143+
except Exception as error:
144+
args_str = ', '.join([str(arg) for arg in args])
145+
msg = "computation for '{0}' failed: {1}".format(args_str,
146+
str(error))
147+
raise ComputeError(msg)
148+
for name, value in zip(names, values):
149+
self._data[name].append(value)
150+
for col_def in col_defs:
151+
self._headers.append(col_def.name)
152+
self._type_map[col_def.name] = col_def.type
153+
154+
def __str__(self):
155+
'''create string representation of the data set'''
156+
str_repr = ', '.join(self._headers)
157+
for row in self:
158+
str_repr += '\n' + ', '.join([str(x) for x in row])
159+
return str_repr
160+
161+
162+
if __name__ == '__main__':
163+
data = Dataset([
164+
ColumnDef('x', int),
165+
ColumnDef('y', int),
166+
ColumnDef('z', int),
167+
])
168+
for x in range(10):
169+
data.append((x, x**2, x**3))
170+
print(data)
171+
data.compute([ColumnDef('sum', int), ColumnDef('prod', int)], ['x', 'y'],
172+
lambda x, y: (x + y, x*y))
173+
print(data)
174+
for row in data:
175+
print('{0:d} + {1:d} = {2:d}'.format(row.x, row.y, row.sum))
176+
print('{0:d} data items'.format(len(data)))
177+
data.compute([ColumnDef('substr', int)], ['x', 'y'],
178+
lambda x, y: (y - x, ))
179+
print(data)
180+
try:
181+
data.append(['bla'] * data.nr_columns)
182+
except Exception as error:
183+
print('### error: {0}'.format(error))
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
#!/usr/bin/env python
2+
3+
from datetime import datetime, timedelta
4+
from random import uniform
5+
6+
7+
class Event(object):
8+
9+
def __init__(self, name, start, duration):
10+
self._name = name
11+
self._start = start
12+
self._duration = duration
13+
14+
@property
15+
def name(self):
16+
return self._name
17+
18+
@property
19+
def start(self):
20+
return self._start
21+
22+
@property
23+
def stop(self):
24+
return self.start + self.duration
25+
26+
@property
27+
def duration(self):
28+
return self._duration
29+
30+
def begin(self):
31+
return (str(self.start), self.name, 'on')
32+
33+
def end(self):
34+
return (str(self.start + self.duration), self.name, 'off')
35+
36+
37+
class EventIter(object):
38+
39+
def __init__(self, name, start, min_delay=10, max_delay=3600,
40+
min_duraion=60, max_duration=5*3600):
41+
self._name = name
42+
self._start = start
43+
self._min_delay = min_delay
44+
self._max_delay = max_delay
45+
self._min_duration = min_duraion
46+
self._max_duration = max_duration
47+
48+
def __iter__(self):
49+
return self
50+
51+
def __next__(self):
52+
delta = int(uniform(self._min_delay, self._max_delay))
53+
start = self._start + timedelta(seconds=delta)
54+
delta = int(uniform(self._min_duration, self._max_duration))
55+
duration = timedelta(seconds=delta)
56+
self._start = start + duration
57+
return Event(self._name, start, duration)
58+
59+
60+
def event_key(event):
61+
if event[2] == 'on':
62+
return (event[0], event[1], 0)
63+
else:
64+
return (event[0], event[1], 1)
65+
66+
if __name__ == '__main__':
67+
from argparse import ArgumentParser
68+
import sys
69+
70+
def main():
71+
arg_parser = ArgumentParser(description='event log generator')
72+
arg_parser.add_argument('--events', default=['heating'], nargs='+',
73+
help='event types')
74+
arg_parser.add_argument('--start', default='2014-01-01',
75+
help='start date')
76+
arg_parser.add_argument('--stop', default='2014-01-02',
77+
help='stop date')
78+
options = arg_parser.parse_args()
79+
start = datetime.strptime(options.start, '%Y-%m-%d')
80+
stop = datetime.strptime(options.stop, '%Y-%m-%d')
81+
event_list = []
82+
for event_type in options.events:
83+
for event in EventIter(event_type, start):
84+
if event.start < stop:
85+
event_list.append(event.begin())
86+
if event.stop < stop:
87+
event_list.append(event.end())
88+
else:
89+
break
90+
event_list.sort(key=event_key)
91+
for event in event_list:
92+
print('{0};{1} {2}'.format(str(event[0]), event[1], event[2]))
93+
94+
status = main()
95+
sys.exit(status)

0 commit comments

Comments
 (0)