@@ -311,13 +311,10 @@ def _resolve_feature_service_name(self, feature_refs: List[str]) -> Optional[str
311311 _logger .debug ("Failed to resolve feature service name: %s" , e )
312312 return None
313313
314- def _auto_log_entity_df_info (self , entity_df , start_date = None , end_date = None ):
315- """Log entity_df info to MLflow for reproducibility .
314+ def _log_entity_df_metadata (self , entity_df , start_date = None , end_date = None ):
315+ """Log lightweight entity_df metadata to MLflow (type, row count, columns, query, dates) .
316316
317- Handles three entity_df types:
318- - pd.DataFrame: saves metadata + full parquet artifact (within configured limit)
319- - str (SQL query): logs the query as a param
320- - None (range-based): logs start_date/end_date
317+ Always called during historical retrieval regardless of auto_log_entity_df.
321318 """
322319 try :
323320 import mlflow
@@ -350,15 +347,6 @@ def _auto_log_entity_df_info(self, entity_df, start_date=None, end_date=None):
350347 cols = cols [:MLFLOW_PARAM_TRUNCATION_SLICE ] + "..."
351348 client .log_param (run_id , "feast.entity_df_columns" , cols )
352349
353- max_rows = mlflow_cfg .entity_df_max_rows
354- if len (entity_df ) <= max_rows :
355- import tempfile
356-
357- with tempfile .TemporaryDirectory () as tmp_dir :
358- path = os .path .join (tmp_dir , "entity_df.parquet" )
359- entity_df .to_parquet (path , index = False )
360- client .log_artifact (run_id , path )
361-
362350 elif entity_df is None and (start_date or end_date ):
363351 client .set_tag (run_id , "feast.entity_df_type" , "range" )
364352 if start_date :
@@ -367,7 +355,38 @@ def _auto_log_entity_df_info(self, entity_df, start_date=None, end_date=None):
367355 client .log_param (run_id , "feast.end_date" , str (end_date ))
368356
369357 except Exception as e :
370- _logger .debug ("Failed to log entity_df info to MLflow: %s" , e )
358+ _logger .debug ("Failed to log entity_df metadata to MLflow: %s" , e )
359+
360+ def _log_entity_df_artifact (self , entity_df ):
361+ """Upload entity DataFrame as a parquet artifact to MLflow.
362+
363+ Only called when auto_log_entity_df is True and entity_df is a DataFrame
364+ within the configured row limit.
365+ """
366+ try :
367+ import mlflow
368+
369+ if mlflow .active_run () is None :
370+ return
371+ if not isinstance (entity_df , pd .DataFrame ):
372+ return
373+
374+ mlflow_cfg = self .config .mlflow
375+ tracking_uri = mlflow_cfg .get_tracking_uri ()
376+ client = mlflow .MlflowClient (tracking_uri = tracking_uri )
377+ run_id = mlflow .active_run ().info .run_id
378+
379+ max_rows = mlflow_cfg .entity_df_max_rows
380+ if len (entity_df ) <= max_rows :
381+ import tempfile
382+
383+ with tempfile .TemporaryDirectory () as tmp_dir :
384+ path = os .path .join (tmp_dir , "entity_df.parquet" )
385+ entity_df .to_parquet (path , index = False )
386+ client .log_artifact (run_id , path )
387+
388+ except Exception as e :
389+ _logger .debug ("Failed to log entity_df artifact to MLflow: %s" , e )
371390
372391 def _init_openlineage_emitter (self ) -> Optional [Any ]:
373392 """Initialize OpenLineage emitter if configured and enabled."""
@@ -1750,10 +1769,12 @@ def get_historical_features(
17501769 tracking_uri = self .config .mlflow .get_tracking_uri (),
17511770 )
17521771
1772+ self ._log_entity_df_metadata (
1773+ entity_df , start_date = start_date , end_date = end_date
1774+ )
1775+
17531776 if self .config .mlflow .auto_log_entity_df :
1754- self ._auto_log_entity_df_info (
1755- entity_df , start_date = start_date , end_date = end_date
1756- )
1777+ self ._log_entity_df_artifact (entity_df )
17571778 except Exception as e :
17581779 _logger .debug ("MLflow auto-log failed for historical retrieval: %s" , e )
17591780
@@ -3036,7 +3057,9 @@ async def get_online_features_async(
30363057 """
30373058 provider = self ._get_provider ()
30383059
3039- return await provider .get_online_features_async (
3060+ _retrieval_start = time .monotonic ()
3061+
3062+ response = await provider .get_online_features_async (
30403063 config = self .config ,
30413064 features = features ,
30423065 entity_rows = entity_rows ,
@@ -3046,6 +3069,52 @@ async def get_online_features_async(
30463069 include_feature_view_version_metadata = include_feature_view_version_metadata ,
30473070 )
30483071
3072+ try :
3073+ if (
3074+ self .config .mlflow is not None
3075+ and self .config .mlflow .enabled
3076+ and self .config .mlflow .auto_log
3077+ ):
3078+ _log_fn = _get_mlflow_log_fn ()
3079+ if _log_fn is not None :
3080+ _duration = time .monotonic () - _retrieval_start
3081+ _feature_refs = utils ._get_features (
3082+ self .registry , self .project , features , allow_cache = True
3083+ )
3084+ if isinstance (entity_rows , list ):
3085+ _entity_count = len (entity_rows )
3086+ elif isinstance (entity_rows , Mapping ):
3087+ try :
3088+ _first_col = next (iter (entity_rows .values ()))
3089+ if isinstance (_first_col , RepeatedValue ):
3090+ _entity_count = len (_first_col .val )
3091+ else :
3092+ _entity_count = len (_first_col )
3093+ except Exception :
3094+ _entity_count = 0
3095+ else :
3096+ _entity_count = 0
3097+ _fs = features if isinstance (features , FeatureService ) else None
3098+ _fs_name = (
3099+ features .name
3100+ if isinstance (features , FeatureService )
3101+ else self ._resolve_feature_service_name (_feature_refs )
3102+ )
3103+ _log_fn (
3104+ feature_refs = _feature_refs ,
3105+ entity_count = _entity_count ,
3106+ duration_seconds = _duration ,
3107+ retrieval_type = "online" ,
3108+ feature_service = _fs ,
3109+ feature_service_name = _fs_name ,
3110+ project = self .project ,
3111+ tracking_uri = self .config .mlflow .get_tracking_uri (),
3112+ )
3113+ except Exception as e :
3114+ _logger .debug ("MLflow auto-log failed for online retrieval: %s" , e )
3115+
3116+ return response
3117+
30493118 def retrieve_online_documents (
30503119 self ,
30513120 query : Union [str , List [float ]],
0 commit comments