Skip to content

Commit b2a1f30

Browse files
#21: ds.model() and ds.add_model(), tests thereof, and docs.
1 parent 4c4989b commit b2a1f30

4 files changed

Lines changed: 121 additions & 1 deletion

File tree

README.md

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,50 @@ ds.add_parameters(prefilter=True, smooth=True, smoothing_factor=12)
329329
ds.publish('Did some awesome data science!')
330330
```
331331

332+
### Models
333+
334+
If your run has generated a Tensorflow model, you can declare it as such. This will load the model into the Model Library on the Dotscience Hub, and will enable automated deployment and model tracking features.
335+
336+
This can be done with `model()` or `add_model()`:
337+
338+
```python
339+
import dotscience as ds
340+
import tensorflow as tf
341+
342+
ds.script()
343+
ds.start()
344+
345+
...
346+
347+
tf.saved_model.simple_save(
348+
tf.keras.backend.get_session(),
349+
ds.model(tf, "potatoes", "./model"), # <---
350+
inputs={'input_image_bytes': model.input},
351+
outputs={t.name:t for t in model.outputs})
352+
353+
...or...
354+
355+
tf.saved_model.simple_save(
356+
tf.keras.backend.get_session(),
357+
"./model",
358+
inputs={'input_image_bytes': model.input},
359+
outputs={t.name:t for t in model.outputs})
360+
361+
ds.add_model(tf, "potatoes", "./model") # <---
362+
363+
ds.publish('Trained the potato classifier')
364+
```
365+
366+
The first argument to `model` or `add_model` should be the Tensorflow module itself, as imported by `import tensorflow as tf` in our example. This is used to identify it as a Tensorflow model (other types of model will be used in future), and to record the Tensorflow version used to generate it.
367+
368+
The second argument is the model name for the Model Library. In this case, we called it `potatoes`, as our model is a potato classifier.
369+
370+
The third argument is the path to the directory we're saving the Tensorflow model in, in this case `./model`. If called as `model()` rather than `add_model()`, this path is returned, so that it can be used to wrap the output path argument to `simple_save` in our example.
371+
372+
For classifier models, an optional keyword argument is supported in both `model()` and `add_model()`: `classes` can be provided as a path to a CSV file listing your classes, to enable automatic model metric tracking in deployment.
373+
374+
Note that we don't need to call `output()` for the paths passed to `model()` and `add_model()`; they automatically declare the files as outputs from this run.
375+
332376
## Multiple runs
333377

334378
There's nothing to stop you from doing more than one "run" in one go; just call `start()` at the beginning and `publish()` at the end of each.

dotscience/__init__.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,43 @@ def label(self, label, value):
124124
self.add_label(label, value)
125125
return value
126126

127+
def model(self, kind, name, *args, **kwargs):
128+
artefact_type = None
129+
try:
130+
if kind.__name__ == "tensorflow":
131+
artefact_type = "tensorflow-model"
132+
except:
133+
pass
134+
135+
if artefact_type == None:
136+
raise RuntimeError('Unknown model type %r' % (kind,))
137+
138+
aj = {"type": artefact_type}
139+
files = {}
140+
return_value = None
141+
if artefact_type == "tensorflow-model":
142+
aj["version"] = kind.__version__
143+
if len(args) != 1:
144+
raise RuntimeError('Tensorflow models require a path to the model as the third argument')
145+
files["model"] = args[0]
146+
return_value = args[0]
147+
if "classes" in kwargs:
148+
files["classes"] = kwargs["classes"]
149+
150+
relative_files = {}
151+
for key in files:
152+
self.add_output(files[key])
153+
relative_files[key] = os.path.relpath(str(files[key]),start=self._root)
154+
aj["files"] = relative_files
155+
156+
self.add_label("artefact:" + name, json.dumps(aj, sort_keys=True, separators=(',', ':')))
157+
158+
return return_value
159+
160+
def add_model(self, kind, name, *args, **kwargs):
161+
self.model(kind, name, *args, **kwards)
162+
return None
163+
127164
def add_summary(self, label, value):
128165
self._summary[str(label)] = str(value)
129166

dotscience/test_dotscience.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,45 @@ def test_run_output_recursive():
210210
finally:
211211
shutil.rmtree("test_run_output_recursive.tmp")
212212

213+
class MockTensorflow:
214+
def __init__(self):
215+
self.__name__ = "tensorflow"
216+
self.__version__ = "1.2.3.4"
217+
218+
@given(text(),sampled_from(output_files))
219+
def test_run_tensorflow_model(name,x):
220+
r = dotscience.Run("/workspace-root")
221+
xpath = tidy_path("/workspace-root/" + x)
222+
relxpath = os.path.relpath(xpath,start="/workspace-root")
223+
assert r.model(MockTensorflow(), name, xpath) == xpath
224+
assert str(r) == """[[DOTSCIENCE-RUN:%s]]%s[[/DOTSCIENCE-RUN:%s]]""" % \
225+
(r._id, json.dumps({"version": "1",
226+
"summary": {},
227+
"parameters": {},
228+
"input": [],
229+
"output": [relxpath],
230+
"labels": {"artefact:"+name: "{\"files\":{\"model\":\"" + relxpath + "\"},\"type\":\"tensorflow-model\",\"version\":\"1.2.3.4\"}"},
231+
}, sort_keys=True, indent=4), r._id)
232+
233+
@given(text(),sampled_from(output_files),sampled_from(output_files))
234+
def test_run_tensorflow_model_with_classes(name,x,c):
235+
assume(x != c)
236+
r = dotscience.Run("/workspace-root")
237+
xpath = tidy_path("/workspace-root/" + x)
238+
relxpath = os.path.relpath(xpath,start="/workspace-root")
239+
cpath = tidy_path("/workspace-root/" + c)
240+
relcpath = os.path.relpath(cpath,start="/workspace-root")
241+
242+
assert r.model(MockTensorflow(), name, xpath, classes=cpath) == xpath
243+
assert str(r) == """[[DOTSCIENCE-RUN:%s]]%s[[/DOTSCIENCE-RUN:%s]]""" % \
244+
(r._id, json.dumps({"version": "1",
245+
"summary": {},
246+
"parameters": {},
247+
"input": [],
248+
"output": sorted([relxpath,relcpath]),
249+
"labels": {"artefact:"+name: "{\"files\":{\"classes\":\"" + relcpath + "\",\"model\":\"" + relxpath + "\"},\"type\":\"tensorflow-model\",\"version\":\"1.2.3.4\"}"},
250+
}, sort_keys=True, indent=4), r._id)
251+
213252
@given(text())
214253
def test_run_labels_1(x):
215254
r = dotscience.Run("/workspace-root")

test.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,4 @@ BASE=dotscience-python-test:$CI_DOCKER_TAG
1414

1515
docker build -t $BASE -f Dockerfile.test .
1616

17-
docker run -v dotscience-python-test-examples:/dsbuild/.hypothesis/examples $BASE /bin/bash -c "cd dsbuild ; pytest dotscience"
17+
docker run -v dotscience-python-test-examples:/dsbuild/.hypothesis/examples $BASE /bin/bash -c "cd dsbuild ; pytest dotscience $@"

0 commit comments

Comments
 (0)