-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy path__init__.py
More file actions
312 lines (250 loc) · 11 KB
/
__init__.py
File metadata and controls
312 lines (250 loc) · 11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
from IPython.core.magic import Magics, magics_class, line_magic
from IPython import get_ipython, start_ipython
import pickle
import os
import time
import hashlib
import datetime
import shutil
import ast
import astunparse
from tabulate import tabulate
from IPython.display import HTML, display
debug = False
# ########################### #
# ######## CacheCall ######## #
# ########################### #
class CacheCallException(Exception):
pass
class CacheCall:
"""
The CacheCall class handles a single call to the cache-magic.
Its attributes are all derived from or related to the line, for which the magic is called. And its methods handle
the execution of the call.
"""
def __init__(self, shell):
self.shell = shell
def __call__(self, version="*", reset=False, var_name="", var_value="", show_all=False, set_debug=None):
if set_debug is not None:
global debug
debug = set_debug
user_ns = self.shell.user_ns
base_dir = self.shell.starting_dir + "/.cache_magic/"
if show_all:
self._show_all(base_dir)
return
var_folder_path = os.path.join(base_dir, var_name)
var_data_path = os.path.join(var_folder_path, "data.txt")
var_info_path = os.path.join(var_folder_path, "info.txt")
if reset:
if var_name:
print("resetting cached values for " + var_name)
self._reset_var(var_folder_path)
# no return, because it might be a forced recalculation
else:
print("resetting entire cache")
self._reset_all(base_dir)
return
if not var_name:
print("Warning: nothing todo: no variable defined, no reset requested, no show_all requested. ")
return
version = self._get_cache_version(version, var_value, user_ns)
stored_value = None
try:
info = self.get_from_file(var_info_path)
self._handle_cache_hit(info, var_value, var_folder_path, version)
try:
stored_value = self.get_from_file(var_data_path)
print('loading cached value for variable \'{0}\'. Time since pickling {1}'
.format(str(var_name), str(datetime.datetime.now() - info["store_date"])))
user_ns[var_name] = stored_value
except IOError:
pass # this happens, when there was a cache hit, but it was dirty
except IOError:
if not var_value and not reset:
raise CacheCallException("variable '" + str(var_name) + "' not in cache")
if var_value and stored_value is None:
print('creating new value for variable \'' + str(var_name) + '\'')
self._create_new_value(
self.shell,
var_folder_path,
var_data_path,
var_info_path,
version,
var_name,
var_value)
@staticmethod
def hash_line(line):
return str(line).strip()
# return hashlib.sha1(line.encode('utf-8')).hexdigest()
@staticmethod
def reset_folder(path, make_new=True):
if os.path.exists(path):
shutil.rmtree(path)
if make_new:
os.makedirs(path)
@staticmethod
def get_from_file(path):
with open(path, 'rb') as fp:
return pickle.loads(fp.read())
def _create_new_value(self, shell, var_folder_path, var_data_path, var_info_path, version, var_name, var_value):
# make sure there is a clean state for this var
self.reset_folder(var_folder_path)
# calculate the new Value in user-context
cmd = self._reconstruct_expression(var_name, var_value)
# time the shell command
start_time = time.time()
result = shell.run_cell(cmd)
compute_time = time.time() - start_time
if not result.success:
raise CacheCallException(
"There was an error during the execution of the expression. "
"No value will be stored. The Expression was: \n" + str(cmd))
# store the result
with open(var_data_path, 'wb') as fp:
pickle.dump(shell.user_ns[var_name], fp)
info = dict(expression_hash=self.hash_line(var_value),
store_date=datetime.datetime.now(),
version=version,
compute_time=compute_time)
with open(var_info_path, 'wb') as fp:
pickle.dump(info, fp)
@staticmethod
def _show_all(base_dir):
if not os.path.isdir(base_dir):
raise CacheCallException("Base-Directory " + base_dir + " not found. ")
vars = []
for subdir in os.listdir(base_dir):
var_name = subdir
if debug:
print("found subdir: " + var_name)
data_path = os.path.join(base_dir, var_name, "data.txt")
size = os.path.getsize(data_path)
var_info_path = os.path.join(base_dir, subdir, "info.txt")
try:
info = CacheCall.get_from_file(var_info_path)
vars.append([var_name, size, info["store_date"],
"%.1f" % info.get("compute_time", 0.0),
info["version"], info["expression_hash"]])
except IOError:
print("Warning: failed to read info variable '" + var_name + "'")
display(HTML(tabulate(vars, headers=["var name", "size(byte)", "stored at date",
"time(s)", "version", "expression(hash)"],
tablefmt="html")))
@staticmethod
def _reset_all(base_dir):
CacheCall.reset_folder(base_dir)
@staticmethod
def _reset_var(var_folder_path):
CacheCall.reset_folder(var_folder_path, False)
@staticmethod
def _handle_cache_hit(info, var_value, var_folder_path, version):
"""
If there was a cache hit, this handles the invalidation of the cache, if needed
"""
if var_value:
# if there is an expression and no info-file -> a new variable and nothing needs to be checked up front
if str(info["version"]) != str(version):
# Note: Version can be a string, a number or the content of a variable (which can by anything)
if debug:
print("resetting because version mismatch")
CacheCall.reset_folder(var_folder_path)
elif info["expression_hash"] != CacheCall.hash_line(var_value):
print("Warning! Expression has changed since last save, which was at " + str(info["store_date"]))
print("To store a new value, change the version ('-v' or '--version') ")
else:
if version != '' and info['version'] != version:
# force a version
raise CacheCallException(
"Forced version '" + str(version)
+ "' could not be found, instead found version '"
+ str(info['version']) + "'."
+ "If you don't care about a specific version, leave out the version parameter. ")
@staticmethod
def _get_cache_version(version_param, var_value, user_ns):
if version_param in user_ns.keys():
return user_ns[version_param]
if version_param == "*":
return CacheCall.hash_line(var_value)
if version_param.isdigit():
return int(version_param)
print("Version: " + str(version_param))
print("version_param.isdigit(): " + str(version_param.isdigit()))
raise CacheCallException("Invalid version. It must either be an Integer, *, or the name of a variable")
@staticmethod
def _reconstruct_expression(var_name, var_value):
return str(var_name) + " = " + str(var_value)
@magics_class
class CacheMagic(Magics):
@line_magic
def cache(self, line):
"""
This ipython-magic caches the result of statements.
"""
try:
parameter = self.parse_input(line)
CacheCall(self.shell)(**parameter)
except CacheCallException as e:
print("Error: " + str(e))
@staticmethod
def parse_input(_input):
result = {}
global debug
params = _input.strip().split(" ")
reading_version = False
expression_starts_at = 0
for p in params:
expression_starts_at = expression_starts_at + 1
if p == "-v" or p == "--version":
reading_version = True
continue
if reading_version:
reading_version = False
result["version"] = p
continue
if p == "-r" or p == "--reset":
result["reset"] = True
continue
if p == "-d" or p == "--debug":
debug = True
continue
if p.startswith("-"):
raise CacheCallException("unknown parameter \"" + p + "\"")
# if parameters are done the rest is part of the expression
expression_starts_at = expression_starts_at - 1
break
# Everything after the version is the assignment getting cached
cmd_str = " ".join(params[expression_starts_at:])
if not "version" in result and not "reset" in result and not cmd_str:
# no input (expect debug) --> restore all
result["show_all"] = True
try:
cmd = ast.parse(cmd_str)
except Exception as e:
raise CacheCallException("statement is no valid python: " + cmd_str + "\n Error: " + str(e))
if cmd_str:
if not isinstance(cmd, ast.Module):
raise CacheCallException("statement must be an assignment or variable name. Line: " + cmd_str)
if len(cmd.body) != 1:
raise CacheCallException("statement must be an assignment or variable name. Line: " + cmd_str)
statement = cmd.body[0]
if isinstance(statement, ast.Expr) and isinstance(statement.value, ast.Name):
result["var_name"] = statement.value.id
elif isinstance(cmd.body[0], ast.Assign):
if len(statement.targets) != 1 \
or not isinstance(statement.targets[0], ast.Name):
raise CacheCallException("astatement must be an assignment or variable name. Line: " + cmd_str)
result["var_name"] = statement.targets[0].id
result["var_value"] = astunparse.unparse(statement.value)
else:
raise CacheCallException("statement must be an assignment or variable name. Line: " + cmd_str)
return result
# ########################### #
# ### ipython boilerplate ### #
# ########################### #
try:
ip = get_ipython()
ip.register_magics(CacheMagic)
print("%cache magic is now registered in ipython")
except:
print("Error! Couldn't register magic in ipython!!!")