Skip to content

Commit 85e2534

Browse files
Add structured logging to inference compiler (#2248)
* Add structured logging to inference compiler and model loading Migrate compiler logging from f-string interpolation to structured logger calls with lazy formatting. Remove the preflight auth check that sent a mutation POST to verify API key permissions -- auth errors already surface during package registration. Update TRT compilation docs to reference inference-cli installation directly. * Retract auto_negotiation.py logger changes * Bump version and add changelog * Bump inference dependencies --------- Co-authored-by: Paweł Pęczek <pawel@roboflow.com>
1 parent 26841f7 commit 85e2534

17 files changed

Lines changed: 207 additions & 118 deletions

File tree

inference_cli/lib/enterprise/inference_compiler/adapters/models_service.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,8 @@ def init(
117117
api_key = ROBOFLOW_API_KEY
118118
if api_key is None:
119119
raise RuntimeConfigurationError(
120-
"Could not initialise Models Service client without Roboflow API key. "
121-
"Set the key explicitly or use environment variable `ROBOFLOW_API_KEY`. If you need help getting "
120+
"Could not initialize Models Service client without a Roboflow API key. "
121+
"Set the key explicitly or use the environment variable `ROBOFLOW_API_KEY`. If you need help finding "
122122
"your Roboflow API key, "
123123
"visit: https://docs.roboflow.com/developer/authentication/find-your-roboflow-api-key"
124124
)
@@ -162,7 +162,7 @@ def register_model_package(
162162
headers=self._add_auth_headers(),
163163
)
164164
except (ConnectionError, Timeout, requests.exceptions.ConnectionError):
165-
raise RetryError(f"Connectivity error")
165+
raise RetryError("Connectivity error")
166166
handle_response_errors(response=response)
167167
return ModelPackageRegistrationResponse.model_validate(response.json())
168168

@@ -193,7 +193,7 @@ def confirm_model_package_artefacts(
193193
headers=self._add_auth_headers(),
194194
)
195195
except (ConnectionError, Timeout, requests.exceptions.ConnectionError):
196-
raise RetryError(f"Connectivity error")
196+
raise RetryError("Connectivity error")
197197
handle_response_errors(response=response)
198198
return None
199199

@@ -221,7 +221,7 @@ def add_model_package_artefacts(
221221
headers=self._add_auth_headers(),
222222
)
223223
except (ConnectionError, Timeout, requests.exceptions.ConnectionError):
224-
raise RetryError(f"Connectivity error")
224+
raise RetryError("Connectivity error")
225225
handle_response_errors(response=response)
226226
return ModelPackageRegistrationResponse.model_validate(response.json()) # type: ignore
227227

@@ -249,7 +249,7 @@ def remove_model_package_artefacts(
249249
headers=self._add_auth_headers(),
250250
)
251251
except (ConnectionError, Timeout, requests.exceptions.ConnectionError):
252-
raise RetryError(f"Connectivity error")
252+
raise RetryError("Connectivity error")
253253
handle_response_errors(response=response)
254254
return None
255255

@@ -268,7 +268,7 @@ def seal_model_package(self, model_id: str, package_id: str) -> None:
268268
headers=self._add_auth_headers(),
269269
)
270270
except (ConnectionError, Timeout, requests.exceptions.ConnectionError):
271-
raise RetryError(f"Connectivity error")
271+
raise RetryError("Connectivity error")
272272
handle_response_errors(response=response)
273273
return None
274274

@@ -290,7 +290,7 @@ def un_seal_model_package(self, model_id: str, model_package_id: str) -> None:
290290
headers=self._add_auth_headers(),
291291
)
292292
except (ConnectionError, Timeout, requests.exceptions.ConnectionError):
293-
raise RetryError(f"Connectivity error")
293+
raise RetryError("Connectivity error")
294294
handle_response_errors(response=response)
295295
return None
296296

@@ -312,7 +312,7 @@ def delete_model_package(self, model_id: str, model_package_id: str) -> None:
312312
headers=self._add_auth_headers(),
313313
)
314314
except (ConnectionError, Timeout, requests.exceptions.ConnectionError):
315-
raise RetryError(f"Connectivity error")
315+
raise RetryError("Connectivity error")
316316
handle_response_errors(response=response)
317317
return None
318318

@@ -334,7 +334,7 @@ def un_delete_model_package(self, model_id: str, model_package_id: str) -> None:
334334
headers=self._add_auth_headers(),
335335
)
336336
except (ConnectionError, Timeout, requests.exceptions.ConnectionError):
337-
raise RetryError(f"Connectivity error")
337+
raise RetryError("Connectivity error")
338338
handle_response_errors(response=response)
339339
return None
340340

@@ -354,7 +354,7 @@ def get_public_trt_timing_cache(
354354
headers=self._add_auth_headers(),
355355
)
356356
except (ConnectionError, Timeout, requests.exceptions.ConnectionError):
357-
raise RetryError(f"Connectivity error")
357+
raise RetryError("Connectivity error")
358358
handle_response_errors(response=response)
359359
return ExternalPublicTRTTimingCompilationEntryV1.model_validate( # type: ignore
360360
response.json()["cacheEntry"]
@@ -376,7 +376,7 @@ def get_private_trt_timing_cache(
376376
headers=self._add_auth_headers(),
377377
)
378378
except (ConnectionError, Timeout, requests.exceptions.ConnectionError):
379-
raise RetryError(f"Connectivity error")
379+
raise RetryError("Connectivity error")
380380
handle_response_errors(response=response)
381381
return ExternalPrivateTRTTimingCompilationEntryV1.model_validate( # type: ignore
382382
response.json()["cacheEntry"]
@@ -399,7 +399,7 @@ def register_private_trt_timing_cache(
399399
headers=self._add_auth_headers(),
400400
)
401401
except (ConnectionError, Timeout, requests.exceptions.ConnectionError):
402-
raise RetryError(f"Connectivity error")
402+
raise RetryError("Connectivity error")
403403
handle_response_errors(response=response)
404404
return PrivateTRTTimingCacheEntryRegistrationResults.model_validate( # type: ignore
405405
response.json()
@@ -425,7 +425,7 @@ def confirm_private_trt_timing_cache_upload(
425425
headers=self._add_auth_headers(),
426426
)
427427
except (ConnectionError, Timeout, requests.exceptions.ConnectionError):
428-
raise RetryError(f"Connectivity error")
428+
raise RetryError("Connectivity error")
429429
handle_response_errors(response=response)
430430
return None
431431

@@ -452,7 +452,7 @@ def list_private_timing_cache_entries(
452452
headers=self._add_auth_headers(),
453453
)
454454
except (ConnectionError, Timeout, requests.exceptions.ConnectionError):
455-
raise RetryError(f"Connectivity error")
455+
raise RetryError("Connectivity error")
456456
handle_response_errors(response=response)
457457
return PrivateTRTTimingCacheEntriesList.model_validate(response.json()) # type: ignore
458458

inference_cli/lib/enterprise/inference_compiler/cli/core.py

Lines changed: 47 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import logging
12
from enum import Enum
23
from typing import Annotated, Optional
34

@@ -8,7 +9,12 @@
89
from inference_cli.lib.env import ROBOFLOW_API_KEY
910
from inference_cli.lib.utils import read_env_file
1011

11-
inference_compiler_app = typer.Typer(name="Inference compiler")
12+
logger = logging.getLogger("inference_cli.inference_compiler")
13+
14+
inference_compiler_app = typer.Typer(
15+
name="Inference compiler",
16+
help="Compile Roboflow models into optimized TensorRT engines for GPU-accelerated inference on NVIDIA GPUs and Jetson devices.",
17+
)
1218

1319

1420
class CompilationMode(str, Enum):
@@ -22,7 +28,13 @@ def compiler_callback():
2228
pass
2329

2430

25-
@inference_compiler_app.command(name="compile-model")
31+
@inference_compiler_app.command(
32+
name="compile-model",
33+
help="Compile an ONNX model from Roboflow into a TensorRT engine optimized for your GPU. "
34+
"The compiled engine is registered back to the Roboflow platform so it can be served to "
35+
"matching devices automatically. Compilation can run in-process (requires TensorRT and "
36+
"inference-models) or inside a Docker container.",
37+
)
2638
def compile_model(
2739
model_id: Annotated[
2840
str,
@@ -37,68 +49,67 @@ def compile_model(
3749
typer.Option(
3850
"--api-key",
3951
"-a",
40-
help="Roboflow API key for your workspace. If not given - env variable `ROBOFLOW_API_KEY` will be used",
52+
help="Roboflow API key for your workspace. If not provided, the `ROBOFLOW_API_KEY` environment variable is used.",
4153
),
4254
] = None,
4355
debug_mode: Annotated[
4456
bool,
4557
typer.Option(
4658
"--debug-mode/--no-debug-mode",
47-
help="Flag enabling errors stack traces to be displayed (helpful for debugging)",
59+
help="Display full stack traces on errors.",
4860
),
4961
] = False,
5062
trt_forward_compatible: Annotated[
5163
bool,
5264
typer.Option(
5365
"--trt-forward-compatible/--no-trt-forward-compatible",
54-
help="Flag to decide if forward-compatibility mode in TRT compilation should be enabled",
66+
help="Enable TensorRT forward-compatibility mode, allowing engines to run on newer TRT versions.",
5567
),
5668
] = False,
5769
trt_same_cc_compatible: Annotated[
5870
bool,
5971
typer.Option(
6072
"--trt-same-cc-compatible/--no-trt-same-cc-compatible",
61-
help="Flag to decide if engine should be compiled to be compatible with devices sharing the same CUDA CC "
62-
"to the one running compilation procedure",
73+
help="Compile the engine to be portable across GPUs with the same CUDA compute capability.",
6374
),
6475
] = False,
6576
compilation_mode: Annotated[
6677
CompilationMode,
6778
typer.Option(
6879
"--compilation-mode",
69-
help="Selection of compilation mode - `container` runs the procedure inside `inference` server, "
70-
"`python` runs in-process. `auto` (default) inspect environment dependencies to verify if "
71-
"the procedure can be run in-process, if not - offloading to the server.",
80+
help="Selection of compilation mode. `container` runs the procedure inside an Inference server container, "
81+
"`python` runs in-process. `auto` (default) inspects environment dependencies to verify if "
82+
"the procedure can run in-process; if not, it offloads to the container.",
7283
),
7384
] = CompilationMode.AUTO,
7485
image: Annotated[
7586
Optional[str],
7687
typer.Option(
7788
"--image",
78-
help="Point specific docker image you would like to run with command (useful for development of custom "
79-
"builds of inference server)",
89+
help="Specify a Docker image to use for compilation (useful for custom builds of the Inference server).",
8090
),
8191
] = None,
8292
use_local_images: Annotated[
8393
bool,
8494
typer.Option(
8595
"--use-local-images/--not-use-local-images",
86-
help="Flag to allow using local images (if set False image is always attempted to be pulled)",
96+
help="Allow using local Docker images. If false, the image is always pulled from the registry.",
8797
),
8898
] = False,
8999
env_file_path: Annotated[
90100
Optional[str],
91101
typer.Option(
92102
"--env-file-path",
93-
help="Path to key-value .env file to inject into compilation container (if you run in Python package, "
94-
"just export variables to env)",
103+
help="Path to a key-value .env file to inject into the compilation container. "
104+
"For Python mode, export the variables to your environment instead.",
95105
),
96106
] = None,
97107
) -> None:
98108
console = Console()
99109
console.print(
100-
"You are running component licensed under Roboflow Enterprise License - please acknowledge the "
101-
"terms of use: https://github.com/roboflow/inference/blob/main/inference/enterprise/LICENSE.txt",
110+
"Inference Compiler is licensed under the Roboflow Enterprise License. "
111+
"By continuing, you acknowledge the terms of use: "
112+
"https://github.com/roboflow/inference/blob/main/inference/enterprise/LICENSE.txt",
102113
)
103114
if api_key is None:
104115
api_key = ROBOFLOW_API_KEY
@@ -151,9 +162,12 @@ def compilation_to_run_in_container(
151162
import inference_models
152163

153164
except Exception as error:
165+
logger.info(
166+
"Could not import inference-models, offloading to container: %s", error
167+
)
154168
console.print(
155-
"Inference compiler running in `auto` mode could not import `inference-models`, which is required "
156-
f"to compile package in process - offloading to container. Error: {error}",
169+
"Compiler running in `auto` mode could not import `inference-models`, which is required "
170+
f"to compile in-process. Offloading to container. Error: {error}",
157171
)
158172
return True
159173
try:
@@ -168,9 +182,10 @@ def compilation_to_run_in_container(
168182
x_ray_result.trt_python_package_available
169183
), "TensorRT Python package not detected"
170184
except Exception as error:
185+
logger.info("TensorRT not available, offloading to container: %s", error)
171186
console.print(
172-
"Inference compiler running in `auto` mode could not import `tensorrt`, which is required "
173-
f"to compile package in process - offloading to container. Error: {error}",
187+
"Compiler running in `auto` mode could not import `tensorrt`, which is required "
188+
f"to compile in-process. Offloading to container. Error: {error}",
174189
)
175190
return True
176191
return False
@@ -192,9 +207,9 @@ def run_compilation_in_container(
192207
image = get_image()
193208
if "-cpu" in image:
194209
raise ValueError(
195-
"Attempted to run compilation using `inference-server` CPU image, which does not support TRT compilation. "
196-
"This error may be result of pointing invalid docker image with `--image` parameter or image "
197-
"auto-selection choice, due to lack of GPU detected."
210+
"Attempted to run compilation using an Inference server CPU image, which does not support TRT compilation. "
211+
"This may be caused by specifying an invalid Docker image with the `--image` parameter, or by "
212+
"automatic image selection when no GPU is detected."
198213
)
199214
is_gpu = "gpu" in image and "jetson" not in image
200215
is_jetson = "jetson" in image
@@ -209,7 +224,13 @@ def run_compilation_in_container(
209224
privileged = True
210225
docker_run_kwargs = {"runtime": "nvidia"}
211226
pull_image(image, use_local_images=use_local_images)
212-
console.print("Starting model compilation inside docker container")
227+
logger.info(
228+
"Starting container compilation: image=%s, is_gpu=%s, is_jetson=%s",
229+
image,
230+
is_gpu,
231+
is_jetson,
232+
)
233+
console.print("Starting model compilation inside Docker container")
213234
command = build_container_command(
214235
model_id=model_id,
215236
api_key=api_key,
@@ -286,6 +307,7 @@ def run_compilation_in_python(
286307
trt_same_cc_compatible: bool = False,
287308
console: Optional[Console] = None,
288309
) -> None:
310+
logger.info("Running compilation in-process (Python mode)")
289311
from inference_cli.lib.enterprise.inference_compiler.core import compiler
290312

291313
compiler.compile_model(

inference_cli/lib/enterprise/inference_compiler/core/compilation_handlers/default.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@
4545
)
4646
from inference_models.weights_providers.entities import ModelMetadata
4747

48+
logger = logging.getLogger("inference_cli.inference_compiler")
49+
4850

4951
def compile_and_register_default_model(
5052
model_metadata: ModelMetadata,
@@ -68,7 +70,7 @@ def compile_and_register_default_model(
6870
expected_files.append(KEYPOINTS_METADATA_FILE)
6971
if package_with_dynamic_batch_size is not None:
7072
print_to_console(
71-
message="Detected model package with dynamic input dimensions - downloading...",
73+
message="Found model package with dynamic input dimensions, downloading...",
7274
console=console,
7375
)
7476
package_files = download_model_package(
@@ -81,7 +83,7 @@ def compile_and_register_default_model(
8183
)
8284
else:
8385
print_to_console(
84-
message="Detected model package with static input dimensions - downloading...",
86+
message="Found model package with static input dimensions, downloading...",
8587
console=console,
8688
)
8789
package_files = download_model_package(
@@ -226,7 +228,7 @@ def compile_and_register_default_model_trt_variant(
226228
)
227229
return None
228230
if verify_model is not None:
229-
print_to_console(message="Verification of the artefacts...", console=console)
231+
print_to_console(message="Verifying compiled artefacts...", console=console)
230232
verify_model_package(
231233
model_metadata=model_metadata,
232234
model_package_id=registration_response.model_package_id,
@@ -248,7 +250,7 @@ def compile_and_register_default_model_trt_variant(
248250
models_service_client=models_service_client,
249251
)
250252
print_to_console(
251-
message="Successfully trained and registered model package", console=console
253+
message="Successfully compiled and registered model package", console=console
252254
)
253255

254256

@@ -264,7 +266,7 @@ def verify_model_package(
264266
) -> None:
265267
try:
266268
with tempfile.TemporaryDirectory() as tmp_dir:
267-
logging.info(f"Verifying model package {model_package_id}...")
269+
logger.info("Verifying model package %s", model_package_id)
268270
adjusted_inference_config_path = os.path.join(
269271
tmp_dir, INFERENCE_CONFIG_FILE
270272
)
@@ -291,12 +293,12 @@ def verify_model_package(
291293
)
292294
os.symlink(keypoints_metadata_path, local_keypoints_metadata_path)
293295
verify_model(tmp_dir)
294-
logging.info(f"Model package {model_package_id} verified.")
296+
logger.info("Model package %s verified", model_package_id)
295297
except ModelVerificationError as error:
296298
raise error
297299
except Exception as error:
298300
raise ModelVerificationError(
299-
"Could not successfully verify correctness of model compilation"
301+
"Could not verify compiled model correctness"
300302
) from error
301303

302304

@@ -344,8 +346,9 @@ def register_default_model_package_artefacts(
344346
calculate_local_file_md5(file_path=keypoints_metadata_path),
345347
)
346348
except Exception as error:
347-
logging.exception(
348-
f"Could not register artefacts for package {registration_response.model_package_id}"
349+
logger.exception(
350+
"Could not register artefacts for package %s",
351+
registration_response.model_package_id,
349352
)
350353
raise CompiledPackageRegistrationError(
351354
f"Could not register artefacts for package {registration_response.model_package_id}"

0 commit comments

Comments
 (0)