|
| 1 | +from dataclasses import asdict |
| 2 | + |
| 3 | +from steamship.data.plugin_instance import PluginInstance |
| 4 | +from steamship.data.plugin import TrainingPlatform |
| 5 | +from steamship.extension.file import File |
| 6 | +from steamship.plugin.inputs.export_plugin_input import ExportPluginInput |
| 7 | +from steamship.plugin.inputs.training_parameter_plugin_input import TrainingParameterPluginInput |
| 8 | +import time |
| 9 | + |
| 10 | +from ..client.helpers import deploy_plugin, upload_file, _steamship |
| 11 | + |
| 12 | +__copyright__ = "Steamship" |
| 13 | +__license__ = "MIT" |
| 14 | + |
| 15 | +EXPORTER_HANDLE = "signed-url-exporter" |
| 16 | + |
| 17 | +def test_e2e_corpus_export(): |
| 18 | + client = _steamship() |
| 19 | + versionConfigTemplate = dict( |
| 20 | + textColumn=dict(type="string"), |
| 21 | + tagColumns=dict(type="string"), |
| 22 | + tagKind=dict(type="string") |
| 23 | + ) |
| 24 | + instanceConfig = dict( |
| 25 | + textColumn="Message", |
| 26 | + tagColumns="Category", |
| 27 | + tagKind="Intent" |
| 28 | + ) |
| 29 | + exporterPluginR = PluginInstance.create( |
| 30 | + client=client, |
| 31 | + handle=EXPORTER_HANDLE, |
| 32 | + pluginHandle=EXPORTER_HANDLE, |
| 33 | + upsert=True |
| 34 | + ) |
| 35 | + assert (exporterPluginR.data is not None) |
| 36 | + exporterPlugin = exporterPluginR.data |
| 37 | + assert (exporterPlugin.handle is not None) |
| 38 | + |
| 39 | + input = ExportPluginInput(handle='default', type="corpus") |
| 40 | + print(asdict(input)) |
| 41 | + |
| 42 | + # Make a blockifier which will generate our training corpus |
| 43 | + with deploy_plugin("plugin_blockifier_csv.py", "blockifier", versionConfigTemplate=versionConfigTemplate, instanceConfig=instanceConfig) as (plugin, version, instance): |
| 44 | + with upload_file("utterances.csv") as file: |
| 45 | + assert (len(file.query().data.blocks) == 0) |
| 46 | + # Use the plugin we just registered |
| 47 | + file.blockify(pluginInstance=instance.handle).wait() |
| 48 | + assert (len(file.query().data.blocks) == 5) |
| 49 | + |
| 50 | + # Now export the corpus |
| 51 | + rawDataR = exporterPlugin.export(input) |
| 52 | + assert (rawDataR is not None) |
| 53 | + |
| 54 | + # The results of a corpus exporter are MD5 encoded! |
| 55 | + rawData = rawDataR.data |
0 commit comments