@@ -1514,9 +1514,40 @@ def add_alias(self, name, expression, dtype=None, is_constant=False):
15141514 # Check for cycles (catches indirect cycles like A -> B -> A)
15151515 self ._check_for_cycles ()
15161516
1517- def _eval_in_namespace (self , expr , warn_missing_keys = True , alias_name = None ):
1517+ def _eval_in_namespace (self , expr , warn_missing_keys = True , alias_name = None , context_override = None ):
1518+ """
1519+ Evaluate an expression in a namespace containing DataFrame columns and functions.
1520+
1521+ Parameters
1522+ ----------
1523+ expr : str
1524+ Expression to evaluate
1525+ warn_missing_keys : bool, default=True
1526+ If True, warn when subframe join has missing keys
1527+ alias_name : str, optional
1528+ Name of alias being evaluated (for error messages)
1529+ context_override : dict, optional
1530+ Additional variables to include in evaluation namespace.
1531+ Used by materialize_aliases() to provide already-computed results
1532+ so that later aliases can reference earlier ones without requiring
1533+ them to be in self.df yet (enables batch materialization).
1534+
1535+ Returns
1536+ -------
1537+ pandas.Series or scalar
1538+ Result of evaluating the expression
1539+ """
15181540 expr = self ._prepare_subframe_joins (expr , warn_missing_keys = warn_missing_keys , alias_name = alias_name )
1541+
1542+ # Build namespace: DataFrame columns first
15191543 local_env = {col : self .df [col ] for col in self .df .columns }
1544+
1545+ # Add context_override (previously computed aliases in batch mode)
1546+ # This allows alias B to reference alias A even if A isn't in self.df yet
1547+ if context_override :
1548+ local_env .update (context_override )
1549+
1550+ # Add functions last (so they don't get shadowed by columns)
15201551 local_env .update (self ._default_functions ())
15211552
15221553 try :
@@ -2062,12 +2093,21 @@ def materialize_aliases(self, pattern=None, names=None, with_dependencies=True,
20622093 list
20632094 Names of aliases that were materialized
20642095
2096+ Notes
2097+ -----
2098+ Performance optimization: Uses batch pd.concat instead of sequential
2099+ column insertion to avoid O(n²) DataFrame fragmentation.
2100+ See BUG-2025-11-27-002 for details.
2101+
20652102 Examples
20662103 --------
20672104 >>> adf.materialize_aliases(pattern=r'is.*') # All 'is*' aliases
20682105 >>> adf.materialize_aliases(names=['r', 'phi', 'cosPhi']) # Specific names
20692106 >>> adf.materialize_aliases(pattern=r'dy.*|dz.*') # dy and dz aliases
20702107 """
2108+ import time
2109+ t_start = time .time () if verbose else None
2110+
20712111 # Get primary targets first (without dependencies)
20722112 targets = self .select_aliases (
20732113 pattern = pattern ,
@@ -2090,29 +2130,139 @@ def materialize_aliases(self, pattern=None, names=None, with_dependencies=True,
20902130 with_dependencies = True
20912131 )
20922132 if verbose :
2093- print (f"[materialize_aliases] With dependencies: { to_materialize } " )
2133+ print (f"[materialize_aliases] With dependencies: { len ( to_materialize ) } aliases " )
20942134 else :
20952135 to_materialize = targets
20962136
2097- # Materialize in order
2098- added = []
2137+ # =====================================================================
2138+ # BATCH MATERIALIZATION — Performance optimization (BUG-2025-11-27-002)
2139+ #
2140+ # Instead of sequential self.df[name] = result (which causes O(n²)
2141+ # DataFrame fragmentation), we:
2142+ # 1. Compute all results into a dict
2143+ # 2. Use context_override so later aliases can reference earlier ones
2144+ # 3. Single pd.concat at the end
2145+ # =====================================================================
2146+
2147+ results = {} # Collect all computed results
2148+ added = [] # Track which aliases we computed
2149+
20992150 for name in to_materialize :
2100- if name not in self .df .columns :
2151+ if name in self .df .columns :
2152+ # Already materialized (either existed or subframe join added it)
2153+ continue
2154+
2155+ if name not in self .aliases :
21012156 if verbose :
2102- print (f"[materialize_aliases] Materializing: { name } " )
2103- self .materialize_alias (name , cleanTemporary = False )
2104- added .append (name )
2157+ print (f"[materialize_aliases] Warning: '{ name } ' not in aliases, skipping" )
2158+ continue
2159+
2160+ expr = self .aliases [name ]
2161+
2162+ # Handle subframe dependencies before evaluation
2163+ # This is necessary because _prepare_subframe_joins needs index columns
2164+ # and subframe attributes to exist
2165+ self ._ensure_subframe_dependencies (name , expr , results , verbose )
2166+
2167+ if verbose :
2168+ print (f"[materialize_aliases] Computing: { name } " )
2169+
2170+ # Evaluate with context_override containing previously computed results
2171+ # This allows alias B to reference alias A without A being in self.df yet
2172+ result = self ._eval_in_namespace (
2173+ expr ,
2174+ warn_missing_keys = True ,
2175+ alias_name = name ,
2176+ context_override = results
2177+ )
2178+
2179+ # Apply dtype if specified
2180+ result_dtype = self .alias_dtypes .get (name )
2181+ if result_dtype is not None :
2182+ try :
2183+ result = result .astype (result_dtype )
2184+ except AttributeError :
2185+ result = result_dtype (result )
2186+
2187+ results [name ] = result
2188+ added .append (name )
2189+
2190+ # =====================================================================
2191+ # BATCH ASSIGNMENT — Single DataFrame operation
2192+ # This avoids the O(n²) fragmentation from sequential column insertion
2193+ # =====================================================================
2194+ if results :
2195+ new_cols_df = pd .DataFrame (results , index = self .df .index )
2196+ self .df = pd .concat ([self .df , new_cols_df ], axis = 1 )
2197+ if verbose :
2198+ print (f"[materialize_aliases] Batch-added { len (results )} columns" )
21052199
2106- # Clean temporary dependencies if requested
2200+ # =====================================================================
2201+ # BATCH CLEANUP — Single drop operation (also avoids fragmentation)
2202+ # =====================================================================
21072203 if cleanTemporary and with_dependencies :
21082204 targets_set = set (targets )
2109- for col in added :
2110- if col not in targets_set and col in self .df .columns :
2111- self .df .drop (columns = [col ], inplace = True )
2112- if verbose :
2113- print (f"[materialize_aliases] Cleaned temporary: { col } " )
2205+ cols_to_drop = [col for col in added if col not in targets_set and col in self .df .columns ]
2206+ if cols_to_drop :
2207+ self .df .drop (columns = cols_to_drop , inplace = True )
2208+ if verbose :
2209+ print (f"[materialize_aliases] Batch-dropped { len (cols_to_drop )} temporary columns" )
2210+
2211+ if verbose :
2212+ elapsed = time .time () - t_start
2213+ print (f"[materialize_aliases] Completed in { elapsed :.2f} s ({ len (added )} aliases)" )
21142214
21152215 return added
2216+
2217+ def _ensure_subframe_dependencies (self , alias_name , expr , context_override , verbose = False ):
2218+ """
2219+ Ensure subframe dependencies are available before evaluating an alias.
2220+
2221+ This handles:
2222+ 1. Materializing subframe index columns (if they're aliases)
2223+ 2. Materializing subframe attributes (in the subframe's DataFrame)
2224+
2225+ Parameters
2226+ ----------
2227+ alias_name : str
2228+ Name of the alias being evaluated
2229+ expr : str
2230+ Expression of the alias
2231+ context_override : dict
2232+ Dict of already-computed results (used to check if deps are available)
2233+ verbose : bool
2234+ If True, print progress
2235+ """
2236+ # Find subframe references (pattern: word.word)
2237+ tokens = re .findall (r'\w+\.\w+' , expr )
2238+
2239+ for token in tokens :
2240+ sf_name , sf_attr = token .split ('.' , 1 )
2241+ sf = self .get_subframe (sf_name )
2242+ if sf is None :
2243+ continue
2244+
2245+ # Materialize subframe index columns if they're aliases
2246+ entry = self ._subframes .get_entry (sf_name )
2247+ if entry :
2248+ index_cols = entry ['index' ]
2249+ if isinstance (index_cols , str ):
2250+ index_cols = [index_cols ]
2251+
2252+ for idx_col in index_cols :
2253+ # Check if index column needs materialization
2254+ if idx_col in self .aliases and idx_col not in self .df .columns :
2255+ # Check if it's in context_override (already computed in this batch)
2256+ if idx_col not in context_override :
2257+ if verbose :
2258+ print (f"[materialize_aliases] Materializing index column: { idx_col } " )
2259+ self .materialize_alias (idx_col , warn_missing_keys = True )
2260+
2261+ # Materialize the subframe attribute itself (in subframe's DataFrame)
2262+ if sf_attr in sf .aliases and sf_attr not in sf .df .columns :
2263+ if verbose :
2264+ print (f"[materialize_aliases] Materializing subframe attr: { sf_name } .{ sf_attr } " )
2265+ sf .materialize_alias (sf_attr )
21162266
21172267 def materialize_pattern (self , pattern , cleanTemporary = True , verbose = False ,
21182268 only_unmaterialized = True ):
0 commit comments