NVIDIA
diff --git a/‎.github/CODEOWNERS‎
Lines changed: 6 additions & 0 deletions b/‎.github/CODEOWNERS‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 7 additions & 4 deletions b/‎README.md‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎cpp/include/tensorrt_llm/batch_manager/cacheTransceiver.h‎
Lines changed: 7 additions & 0 deletions b/‎cpp/include/tensorrt_llm/batch_manager/cacheTransceiver.h‎
Lines changed: 7 additions & 0 deletions
@@ -240,6 +240,7 @@ docs/source/performance/perf-benchmarking.md @NVIDIA/trtllm-bench-reviewers
 /cpp/tensorrt_llm/batch_manager/allocateKvCache.cpp @NVIDIA/trt-llm-kv-cache-manager-devs
 /cpp/tests/unit_tests/batch_manager/kvCacheManagerTest.cpp @NVIDIA/trt-llm-kv-cache-manager-devs
 /cpp/tests/unit_tests/batch_manager/kvCacheUtilsTest.cpp @NVIDIA/trt-llm-kv-cache-manager-devs
+/tensorrt_llm/_torch/pyexecutor/kv_cache_manager_v2.py @NVIDIA/trt-llm-kv-cache-manager-devs
 /tensorrt_llm/_torch/pyexecutor/resource_manager.py @NVIDIA/trt-llm-kv-cache-manager-devs
 /cpp/tensorrt_llm/nanobind/batch_manager/kvCacheManager.h @NVIDIA/trt-llm-kv-cache-manager-devs
 /cpp/tensorrt_llm/nanobind/batch_manager/kvCacheManager.cpp @NVIDIA/trt-llm-kv-cache-manager-devs
@@ -297,3 +298,8 @@ docs/source/performance/perf-benchmarking.md @NVIDIA/trtllm-bench-reviewers
 # of the NVIDIA/trt-llm-release-branch-approval team, regardless of who else approves the PR.
 # Without approval from a member of this team, PRs cannot be merged to release branches.
 # * @NVIDIA/trt-llm-release-branch-approval
+
+### Telemetry / privacy review
+# Golden manifest is the privacy-review artifact; route it and the usage package to the privacy owner.
+/tensorrt_llm/usage/llm_args_golden_manifest.json @NVIDIA/trt-llm-oss-compliance
+/tensorrt_llm/usage/ @NVIDIA/trt-llm-oss-compliance
@@ -298,9 +298,10 @@ Deprecation is used to inform developers that some APIs and tools are no longer
 TensorRT-LLM collects anonymous telemetry data by default. This data is used
 in aggregate to understand usage patterns and prioritize engineering efforts.
 **This data cannot be traced back to any individual user.** No prompts,
-user-identifying information, or persistent identifiers are collected. Any
-deployment identifiers are ephemeral, randomly generated per deployment, and
-not linked to users. The data we collect includes:
+outputs, model weights, model paths, tokenizer paths, user-identifying
+information, raw free-form configuration strings, or persistent identifiers are
+collected. Any deployment identifiers are ephemeral, randomly generated per
+deployment, and not linked to users. The data we collect includes:
 
 - Ingress point (e.g., LLM API, CLI, serve command)
 - Deployment duration (via periodic heartbeats)
@@ -309,8 +310,10 @@ not linked to users. The data we collect includes:
 - Parallelism configuration (TP/PP/CP/MoE-EP/MoE-TP sizes), quantization algorithm, dtype, KV cache dtype
 - System information (OS platform, Python version, CPU architecture, CPU count)
 - TRT-LLM version and backend
-- Feature flags (LoRA, speculative decoding, prefix caching, CUDA graphs, chunked context, data parallelism)
+- Feature summary flags (LoRA, speculative decoding, prefix caching, CUDA graphs, chunked context, data parallelism)
 - Disaggregated serving metadata (role and deployment ID)
+- Selected LLM API configuration values: parallelism, dtype, KV cache, scheduler, CUDA graph, and compile settings
+- Capture diagnostics for that payload: a schema checksum (for provenance), the count of captured fields, and whether any free-form value was skipped
 
 Telemetry is automatically disabled in CI and test environments.
 
 
@@ -35,6 +35,7 @@
 #include <torch/custom_class.h>
 #include <torch/python.h>
 #include <type_traits>
+#include <unordered_map>
 #include <unordered_set>
 #include <vector>
 
@@ -287,6 +288,12 @@ class CacheTransceiver : public BaseCacheTransceiver
     // Dedup sets so observe-only timeout WARN logs fire at most once per stuck request.
     std::unordered_set<LlmRequest::RequestIdType> mTimedOutSenderIds;
     std::unordered_set<LlmRequest::RequestIdType> mTimedOutRequesterIds;
+    std::unordered_set<LlmRequest::RequestIdType> mCompletedSenderRequestIds;
+    std::unordered_set<LlmRequest::RequestIdType> mFailedSenderRequestIds;
+    std::unordered_map<LlmRequest::RequestIdType, std::shared_ptr<LlmRequest>> mSenderRequestsAwaitingConsensus;
+    std::unordered_set<LlmRequest::RequestIdType> mCompletedRequesterRequestIds;
+    std::unordered_set<LlmRequest::RequestIdType> mFailedRequesterRequestIds;
+    std::unordered_map<LlmRequest::RequestIdType, std::shared_ptr<LlmRequest>> mRequesterRequestsAwaitingConsensus;
     mpi::MpiComm const* mMpiWorldComm{nullptr};
 
     std::shared_ptr<CacheTransceiverComm> mGroupComm;