Skip to content

Commit 817ef36

Browse files
authored
added MLFlow model converter (#16)
1 parent c043852 commit 817ef36

21 files changed

Lines changed: 9985 additions & 7 deletions
Lines changed: 261 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,261 @@
1+
"""Hands-on demo: MLflow model -> FNNX package -> inspect -> run.
2+
3+
Builds a tiny MLflow model for one framework, converts it with
4+
``fnnx.extras.mlflow.package_mlflow_model``, prints the package contents +
5+
manifest/env/provenance, then runs it through ``fnnx.runtime.Runtime`` and
6+
compares against the original model.
7+
8+
Usage (from src/python, with the deps for the chosen framework installed):
9+
10+
python examples/mlflow_to_fnnx.py sklearn
11+
python examples/mlflow_to_fnnx.py torch
12+
python examples/mlflow_to_fnnx.py langchain
13+
python examples/mlflow_to_fnnx.py inspect _fnnx_demo/sklearn.fnnx # inspect only
14+
15+
Each build writes <framework>.fnnx and the source MLflow dir under ./_fnnx_demo/
16+
so you can poke at them afterwards (e.g. `tar -tf _fnnx_demo/sklearn.fnnx`).
17+
18+
The key thing the demo illustrates is how the runtime ``inputs`` dict is keyed,
19+
which depends on the converter's chosen ``input_mode``:
20+
* columns mode -> {column_name: 1-D array, ...} (one key per signature column)
21+
* tensor mode -> {"input": ndarray} (single unnamed tensor)
22+
or {name: ndarray, ...} (named tensors)
23+
* json mode -> {"data": <any JSON-able object>} (passthrough / nested signatures)
24+
"""
25+
26+
from __future__ import annotations
27+
28+
import json
29+
import os
30+
import shutil
31+
import sys
32+
import tarfile
33+
34+
from fnnx.extras.mlflow import package_mlflow_model
35+
from fnnx.extras.reader import Reader
36+
from fnnx.runtime import Runtime
37+
38+
39+
OUT_DIR = os.path.join(os.getcwd(), "_fnnx_demo")
40+
41+
42+
def _reset(model_dir: str) -> None:
43+
"""mlflow.save_model refuses a non-empty target; clear it for re-runs."""
44+
shutil.rmtree(model_dir, ignore_errors=True)
45+
46+
47+
def inspect(fnnx_path: str) -> None:
48+
"""Print the package file tree + manifest/env/provenance via Reader."""
49+
print(f"\n=== inspect {fnnx_path} ===")
50+
with tarfile.open(fnnx_path, "r") as tar:
51+
print("-- files --")
52+
for name in sorted(tar.getnames()):
53+
print(" ", name)
54+
55+
reader = Reader(fnnx_path)
56+
print("-- manifest --")
57+
print(reader.manifest.model_dump_json(indent=2))
58+
print("-- input_mode (variant_config) --")
59+
with tarfile.open(fnnx_path, "r") as tar:
60+
vc = json.loads(tar.extractfile("variant_config.json").read().decode()) # type: ignore[union-attr]
61+
print(" ", vc["extra_values"]["fnnx_mlflow"])
62+
if reader.pyenv is not None:
63+
print("-- pip dependencies --")
64+
for dep in reader.pyenv.dependencies:
65+
print(" ", dep.package)
66+
print("-- provenance (meta) --")
67+
for entry in reader.metadata:
68+
print(" id:", entry.id, "| tags:", entry.producer_tags)
69+
70+
71+
def demo_sklearn() -> None:
72+
"""RandomForest on a 2-column frame -> columns input_mode."""
73+
import mlflow
74+
import numpy as np
75+
import pandas as pd
76+
from mlflow.models.signature import infer_signature
77+
from sklearn.ensemble import RandomForestClassifier
78+
79+
model_dir = os.path.join(OUT_DIR, "sklearn_model")
80+
out = os.path.join(OUT_DIR, "sklearn.fnnx")
81+
82+
x = pd.DataFrame({"a": [0.0, 1.0, 2.0, 3.0], "b": [3.0, 2.0, 1.0, 0.0]})
83+
y = [0, 1, 0, 1]
84+
model = RandomForestClassifier(n_estimators=10, random_state=0).fit(x, y)
85+
sig = infer_signature(x, model.predict(x))
86+
_reset(model_dir)
87+
mlflow.sklearn.save_model(model, model_dir, signature=sig) # type: ignore[attr-defined]
88+
89+
package_mlflow_model(model_dir, out, name="sklearn-rf")
90+
inspect(out)
91+
92+
rt = Runtime(out)
93+
inputs = {
94+
"a": x["a"].to_numpy(np.float64),
95+
"b": x["b"].to_numpy(np.float64),
96+
}
97+
res = rt.compute(inputs, {})
98+
print("\nfnnx predictions :", res["predictions"])
99+
print("sklearn predict :", model.predict(x).tolist())
100+
101+
102+
def demo_sklearn_array() -> None:
103+
"""RandomForest trained on a bare numpy array -> tensor input_mode.
104+
105+
Inferring the signature from a numpy ndarray (instead of a DataFrame)
106+
records a single unnamed TensorSpec, so the converter picks
107+
input_mode="tensor": the manifest has ONE input named "input" that takes
108+
the whole array. You call it with {"input": X} -- one key, the raw array --
109+
rather than one key per column. (The model still just sees model.predict(X).)
110+
"""
111+
import mlflow
112+
import numpy as np
113+
from mlflow.models.signature import infer_signature
114+
from sklearn.ensemble import RandomForestClassifier
115+
116+
model_dir = os.path.join(OUT_DIR, "sklearn_array_model")
117+
out = os.path.join(OUT_DIR, "sklearn_array.fnnx")
118+
119+
x = np.array([[0.0, 3.0], [1.0, 2.0], [2.0, 1.0], [3.0, 0.0]], dtype=np.float64)
120+
y = [0, 1, 0, 1]
121+
model = RandomForestClassifier(n_estimators=10, random_state=0).fit(x, y)
122+
sig = infer_signature(x, model.predict(x)) # ndarray in -> TensorSpec
123+
_reset(model_dir)
124+
mlflow.sklearn.save_model(model, model_dir, signature=sig) # type: ignore[attr-defined]
125+
126+
package_mlflow_model(model_dir, out, name="sklearn-rf-array")
127+
inspect(out)
128+
129+
rt = Runtime(out)
130+
res = rt.compute({"input": x}, {}) # single "input" key holding the whole array
131+
print("\nfnnx predictions :", res["predictions"])
132+
print("sklearn predict :", model.predict(x).tolist())
133+
134+
135+
def demo_torch() -> None:
136+
"""nn.Module in a separate module + code_paths -> tensor input_mode."""
137+
import mlflow
138+
import numpy as np
139+
import torch
140+
from mlflow.models import ModelSignature
141+
from mlflow.types.schema import Schema, TensorSpec
142+
143+
# The module must live outside __main__ so torch's by-reference pickle can
144+
# resolve it; code_paths then embeds the source into the package so it
145+
# reloads on a clean machine too.
146+
mod_path = os.path.join(OUT_DIR, "tinynet_mod.py")
147+
with open(mod_path, "w") as f:
148+
f.write(
149+
"import torch\n"
150+
"from torch import nn\n"
151+
"N_FEATURES = 4\n"
152+
"class TinyNet(nn.Module):\n"
153+
" def __init__(self):\n"
154+
" super().__init__()\n"
155+
" self.linear = nn.Linear(N_FEATURES, 2)\n"
156+
" def forward(self, x):\n"
157+
" return self.linear(x)\n"
158+
)
159+
if OUT_DIR not in sys.path:
160+
sys.path.insert(0, OUT_DIR)
161+
from tinynet_mod import N_FEATURES, TinyNet # type: ignore[import-not-found]
162+
163+
torch.manual_seed(0)
164+
model = TinyNet().eval()
165+
sample = np.random.default_rng(0).random((5, N_FEATURES)).astype(np.float32)
166+
167+
model_dir = os.path.join(OUT_DIR, "torch_model")
168+
out = os.path.join(OUT_DIR, "torch.fnnx")
169+
signature = ModelSignature(
170+
inputs=Schema([TensorSpec(np.dtype("float32"), [-1, N_FEATURES])]),
171+
outputs=Schema([TensorSpec(np.dtype("float32"), [-1, 2])]),
172+
)
173+
_reset(model_dir)
174+
# torch.save pickles the module by reference (its class must be importable);
175+
# code_paths embeds the source so the package reloads on a clean machine.
176+
mlflow.pytorch.save_model( # type: ignore[attr-defined]
177+
model,
178+
model_dir,
179+
signature=signature,
180+
code_paths=[mod_path],
181+
)
182+
183+
package_mlflow_model(model_dir, out, name="torch-tiny")
184+
inspect(out)
185+
186+
rt = Runtime(out)
187+
res = rt.compute({"input": sample}, {}) # single unnamed tensor -> "input"
188+
with torch.no_grad():
189+
expected = model(torch.from_numpy(sample)).numpy()
190+
actual = np.asarray(res["predictions"], dtype=np.float32)
191+
print("\nmax abs diff vs torch forward:", float(np.abs(actual - expected).max()))
192+
193+
194+
def demo_langchain() -> None:
195+
"""models-from-code RunnableLambda with an Object signature -> json mode."""
196+
import mlflow
197+
from mlflow.models import ModelSignature
198+
from mlflow.types import DataType
199+
from mlflow.types.schema import ColSpec, Object, Property, Schema
200+
201+
# models-from-code: a script that builds the runnable and registers it via
202+
# mlflow.models.set_model. The whole script is embedded in the package, so
203+
# it reloads without code_paths.
204+
script_path = os.path.join(OUT_DIR, "langchain_model.py")
205+
with open(script_path, "w") as f:
206+
f.write(
207+
"import mlflow\n"
208+
"from langchain_core.runnables import RunnableLambda\n"
209+
"def _shout(record):\n"
210+
" payload = record.get('payload', record) if isinstance(record, dict) else record\n"
211+
" text = payload.get('text', '') if isinstance(payload, dict) else str(payload)\n"
212+
" return {'out': str(text).upper() + '!'}\n"
213+
"mlflow.models.set_model(RunnableLambda(_shout))\n"
214+
)
215+
216+
signature = ModelSignature(
217+
inputs=Schema(
218+
[ColSpec(name="payload", type=Object(properties=[Property("text", DataType.string)]))]
219+
),
220+
outputs=Schema([ColSpec(type=DataType.string)]),
221+
)
222+
223+
model_dir = os.path.join(OUT_DIR, "langchain_model")
224+
out = os.path.join(OUT_DIR, "langchain.fnnx")
225+
_reset(model_dir)
226+
mlflow.langchain.save_model( # type: ignore[attr-defined]
227+
lc_model=script_path, path=model_dir, signature=signature
228+
)
229+
230+
package_mlflow_model(model_dir, out, name="langchain-shout")
231+
inspect(out)
232+
233+
rt = Runtime(out)
234+
res = rt.compute({"data": {"payload": {"text": "hello"}}}, {}) # json mode -> "data"
235+
direct = mlflow.pyfunc.load_model(model_dir).predict({"payload": {"text": "hello"}})
236+
print("\nfnnx predictions :", res["predictions"])
237+
print("direct predict :", direct)
238+
239+
240+
DEMOS = {
241+
"sklearn": demo_sklearn,
242+
"sklearn-array": demo_sklearn_array,
243+
"torch": demo_torch,
244+
"langchain": demo_langchain,
245+
}
246+
247+
248+
def main() -> None:
249+
if len(sys.argv) < 2 or sys.argv[1] not in (*DEMOS, "inspect"):
250+
print(f"usage: {sys.argv[0]} {{{'|'.join(DEMOS)}|inspect <path>}}")
251+
raise SystemExit(2)
252+
253+
os.makedirs(OUT_DIR, exist_ok=True)
254+
if sys.argv[1] == "inspect":
255+
inspect(sys.argv[2])
256+
return
257+
DEMOS[sys.argv[1]]()
258+
259+
260+
if __name__ == "__main__":
261+
main()

src/python/fnnx/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.0.11"
1+
__version__ = "0.0.12"

0 commit comments

Comments
 (0)