Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions ucm/observability.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,16 @@ def __init__(self, model_name, worker_id, config_path):
Load metrics config from YAML file (config_path),
register metrics using prometheus_client, and start a thread to get updated metrics.
"""
# Always initialize instance attributes so that shutdown() and
# __del__() work even when we skip metric registration below.
self.config = self._load_config(config_path)
self.log_interval = self.config.get("log_interval", 10)
self.is_running = False
self.thread = None

if _metric_mappings:
logger.warning("Metrics are already registered, skipping re-registration.")
return
# Load metrics config
self.config = self._load_config(config_path)
self.log_interval = self.config.get("log_interval", 10)

# Set up histogram max length
histogram_max_length = self.config.get("histogram_max_length", 10000)
Expand All @@ -97,9 +101,10 @@ def __init__(self, model_name, worker_id, config_path):
# Initialize metrics based on config
self._init_metrics_from_config()

# Start thread to update metrics
# Start daemon thread to update metrics so it won't block
# process exit if shutdown() is not explicitly called.
self.is_running = True
self.thread = threading.Thread(target=self.update_stats_loop)
self.thread = threading.Thread(target=self.update_stats_loop, daemon=True)
self.thread.start()

def _register_metrics_by_type(self, metric_type):
Expand Down Expand Up @@ -190,7 +195,8 @@ def update_stats_loop(self):

def shutdown(self):
self.is_running = False
self.thread.join()
if self.thread is not None:
self.thread.join()

def __del__(self):
try:
Expand Down
Loading