Remove the custom Workflow transform execution for serving (#354)

karlhigley · oliverholworthy · web-flow · commit 7c1907217b08 · 2023-06-01T19:31:34.000-04:00
* Remove the custom `Workflow` transform execution for serving

This also reworks `match_representation` to handle some cases that start to crop up when switching to the executors from Core re: ragged list features and whether values/offsets are expected or not.

* Formatting changes

* Add validation for schema and data not agreeing on raggedness

* Disable custom C++ implementations of NVT operators (for now)

* Update workflow test assertions to match the new `Categorify` encoding

---------

Co-authored-by: Oliver Holworthy &lt;oholworthy@nvidia.com&gt;
diff --git a/merlin/systems/triton/conversions.py b/merlin/systems/triton/conversions.py
@@ -30,6 +30,7 @@
 import numpy as np
 import pandas as pd
 
+import merlin.dtypes as md
 from merlin.core.compat import cudf
 from merlin.core.compat import cupy as cp
 from merlin.core.dispatch import build_cudf_list_column, is_list_dtype
@@ -85,10 +86,12 @@ def match_representations(schema: Schema, dict_array: Dict[str, Any]) -> Dict[st
     """
     aligned = {}
     for col_name, col_schema in schema.column_schemas.items():
-        if col_schema.is_ragged:
-            vals_name = f"{col_name}__values"
-            offs_name = f"{col_name}__offsets"
+        dtype = col_schema.dtype
+
+        vals_name = f"{col_name}__values"
+        offs_name = f"{col_name}__offsets"
 
+        if col_schema.is_ragged:
             try:
                 # Look for values and offsets that already exist
                 aligned[vals_name] = dict_array[vals_name]
@@ -98,12 +101,40 @@ def match_representations(schema: Schema, dict_array: Dict[str, Any]) -> Dict[st
                 values, offsets = _to_values_offsets(dict_array[col_name])
                 aligned[vals_name] = values
                 aligned[offs_name] = offsets
+
+            if dtype != md.unknown:
+                aligned[vals_name] = aligned[vals_name].astype(dtype.to_numpy)
         else:
-            aligned[col_name] = dict_array[col_name]
+            try:
+                # Look for values and offsets that already exist,
+                # then reshape accordingly
+                aligned[col_name] = _from_values_offsets(
+                    dict_array[vals_name], dict_array[offs_name], col_schema.shape
+                )
+            except KeyError:
+                # If you don't find them, just use the values
+                aligned[col_name] = dict_array[col_name]
+
+            if dtype != md.unknown:
+                aligned[col_name] = aligned[col_name].astype(dtype.to_numpy)
 
     return aligned
 
 
+def _from_values_offsets(values, offsets, shape):
+    new_shape = [-1]
+    new_shape.extend(shape.as_tuple[1:])
+
+    row_lengths = offsets[1:] - offsets[:-1]
+    if not all(row_lengths == row_lengths[0]):
+        raise ValueError(
+            "Attempted to convert values/offsets representation of list column "
+            "to values-only representation when row lengths were not equal."
+        )
+
+    return values.reshape(new_shape)
+
+
 def _to_values_offsets(array):
     """Convert array to values/offsets representation
 
@@ -306,7 +337,10 @@ def convert_format(tensors, kind, target_kind):
         elif kind == Supports.CPU_DICT_ARRAY:
             return _array_to_pandas(tensors), Supports.CPU_DATAFRAME
         elif kind == Supports.GPU_DICT_ARRAY:
-            return _array_to_pandas(_convert_array(tensors, cp.asnumpy)), Supports.CPU_DATAFRAME
+            return (
+                _array_to_pandas(_convert_array(tensors, cp.asnumpy)),
+                Supports.CPU_DATAFRAME,
+            )
 
     raise ValueError("unsupported target for converting tensors", target_kind)
 
diff --git a/merlin/systems/workflow/base.py b/merlin/systems/workflow/base.py
@@ -28,10 +28,10 @@
 import json
 import logging
 
-from merlin.core.dispatch import concat_columns
-from merlin.dag import ColumnSelector, Supports
+from merlin.dag import ColumnSelector, DataFormats, Supports
+from merlin.dag.executors import LocalExecutor, _convert_format, _data_format
 from merlin.schema import Tags
-from merlin.systems.triton.conversions import convert_format, match_representations
+from merlin.systems.triton.conversions import match_representations
 from merlin.table import TensorTable
 
 LOG = logging.getLogger("merlin-systems")
@@ -65,8 +65,10 @@ def __init__(self, workflow, output_dtypes, model_config, model_device):
                 f"The following columns were not found in the workflow's output: {missing_cols}"
             )
 
-        # recurse over all column groups, initializing operators for inference pipeline
-        self._initialize_ops(self.workflow.output_node)
+        # recurse over all column groups, initializing operators for inference pipeline.
+        # (disabled for now while we sort out whether and how we want to use C++ implementations
+        # of NVTabular operators for performance optimization)
+        # self._initialize_ops(self.workflow.output_node)
 
     def _initialize_ops(self, workflow_node, visited=None):
         if visited is None:
@@ -97,98 +99,13 @@ def _initialize_ops(self, workflow_node, visited=None):
                 self._initialize_ops(parent, visited)
 
     def run_workflow(self, input_tensors):
-        # use our NVTabular workflow to transform the dataset
-        transformed, kind = self._transform_tensors(input_tensors, self.workflow.output_node)
-
-        # if we don't have tensors in numpy format, convert back so that the we can return
-        # to triton
-        if kind != Supports.CPU_DICT_ARRAY:
-            transformed, kind = convert_format(transformed, kind, Supports.CPU_DICT_ARRAY)
-
-        transformed = TensorTable(transformed).to_dict()
-        output_dict = match_representations(self.workflow.output_schema, transformed)
-
-        for key, value in output_dict.items():
-            output_dict[key] = value.astype(self.output_dtypes[key])
-
-        return output_dict
-
-    def _transform_tensors(self, input_tensors, workflow_node):
-        upstream_inputs = []
-
-        # Gather inputs from the parents and dependency nodes
-        if workflow_node.parents_with_dependencies:
-            for parent in workflow_node.parents_with_dependencies:
-                upstream_tensors, upstream_kind = self._transform_tensors(input_tensors, parent)
-                if upstream_tensors is not None and upstream_kind:
-                    upstream_inputs.append((upstream_tensors, upstream_kind))
-
-        # Gather additional input columns from the original input tensors
-        if workflow_node.selector:
-            selector_columns = workflow_node.selector.names
-            to_remove = []
-            for upstream_tensors, upstream_kind in upstream_inputs:
-                for col in selector_columns:
-                    if col in upstream_tensors:
-                        to_remove.append(col)
-            for col in set(to_remove):
-                selector_columns.remove(col)
-
-            if selector_columns:
-                selected_tensors = {c: input_tensors[c] for c in selector_columns}
-                selected_kinds = Supports.CPU_DICT_ARRAY
-                upstream_inputs.append((selected_tensors, selected_kinds))
-
-        # Standardize the formats
-        tensors, kind = None, None
-        for upstream_tensors, upstream_kind in upstream_inputs:
-            if tensors is None:
-                tensors, kind = upstream_tensors, upstream_kind
-            else:
-                if kind != upstream_kind:
-                    # we have multiple different kinds of data here (dataframe/array on cpu/gpu)
-                    # we need to convert to a common format here first before concatenating.
-                    op = workflow_node.op
-                    if op and hasattr(op, "inference_supports"):
-                        target_kind = op.inference_supports
-                    else:
-                        target_kind = Supports.CPU_DICT_ARRAY
-                    # note : the 2nd convert_format call needs to be stricter in what the kind is
-                    # (exact match rather than a bitmask of values)
-                    tensors, kind = convert_format(tensors, kind, target_kind)
-                    upstream_tensors, _ = convert_format(upstream_tensors, upstream_kind, kind)
-
-                tensors = self.concat_tensors([tensors, upstream_tensors], kind)
-
-        # Run the transform
-        if tensors is not None and kind and workflow_node.op:
-            try:
-                # if the op doesn't support the current kind - we need to convert
-                if (
-                    hasattr(workflow_node, "inference_supports")
-                    and not workflow_node.inference_supports & kind
-                ):
-                    tensors, kind = convert_format(tensors, kind, workflow_node.inference_supports)
-
-                tensors = workflow_node.op.transform(
-                    workflow_node.input_columns,
-                    tensors,
-                )
-
-            except Exception:
-                LOG.exception("Failed to transform operator %s", workflow_node.op)
-                raise
+        transformable = TensorTable(input_tensors).to_df()
+        transformed = LocalExecutor().transform(transformable, self.workflow.graph)
 
-        return tensors, kind
+        if _data_format(transformed) != DataFormats.NUMPY_DICT_ARRAY:
+            transformed = _convert_format(transformed, DataFormats.NUMPY_DICT_ARRAY)
 
-    def concat_tensors(self, tensors, kind):
-        if kind & (Supports.GPU_DATAFRAME | Supports.CPU_DATAFRAME):
-            return concat_columns(tensors)
-        else:
-            output = tensors[0]
-            for tensor in tensors[1:]:
-                output.update(tensor)
-            return output
+        return match_representations(self.workflow.output_schema, transformed)
 
     def _get_param(self, config, *args, default=None):
         config_element = config["parameters"]
diff --git a/tests/unit/systems/dag/runtimes/triton/ops/workflow/test_ensemble.py b/tests/unit/systems/dag/runtimes/triton/ops/workflow/test_ensemble.py
@@ -269,7 +269,7 @@ def test_workflow_with_ragged_input_and_output(tmpdir):
                         "x__offsets": np.array([0, 1], dtype="int32"),
                     },
                     {
-                        "x__values": np.array([1], dtype="int64"),
+                        "x__values": np.array([3], dtype="int64"),
                         "x__offsets": np.array([0, 1], dtype="int32"),
                     },
                 ),
@@ -279,7 +279,7 @@ def test_workflow_with_ragged_input_and_output(tmpdir):
                         "x__offsets": np.array([0, 1, 2], dtype="int32"),
                     },
                     {
-                        "x__values": np.array([1, 2], dtype="int64"),
+                        "x__values": np.array([3, 4], dtype="int64"),
                         "x__offsets": np.array([0, 1, 2], dtype="int32"),
                     },
                 ),
@@ -289,7 +289,7 @@ def test_workflow_with_ragged_input_and_output(tmpdir):
                         "x__offsets": np.array([0, 2, 3], dtype="int32"),
                     },
                     {
-                        "x__values": np.array([1, 2, 3], dtype="int64"),
+                        "x__values": np.array([3, 4, 5], dtype="int64"),
                         "x__offsets": np.array([0, 2, 3], dtype="int32"),
                     },
                 ),
@@ -298,7 +298,11 @@ def test_workflow_with_ragged_input_and_output(tmpdir):
                 input_table = TensorTable(request_dict)
                 output_names = ["x__values", "x__offsets"]
                 response = send_triton_request(
-                    schema, input_table, output_names, client=client, triton_model=model_name
+                    schema,
+                    input_table,
+                    output_names,
+                    client=client,
+                    triton_model=model_name,
                 )
                 for key, value in expected_response.items():
                     np.testing.assert_array_equal(response[key], value)
@@ -362,7 +366,11 @@ def test_workflow_dtypes(tmpdir):
                 input_table = TensorTable(request_dict)
                 output_names = ["a__values", "a__offsets", "b"]
                 response = send_triton_request(
-                    schema, input_table, output_names, client=client, triton_model=model_name
+                    schema,
+                    input_table,
+                    output_names,
+                    client=client,
+                    triton_model=model_name,
                 )
                 for key, value in expected_response.items():
                     np.testing.assert_array_equal(response[key], value)