@@ -72,18 +72,38 @@ def ts_median(f: Factor, window: int) -> Factor:
7272
7373
7474def ts_rank (f : Factor , window : int , constant : float = 0 ) -> Factor :
75- """Percentile rank of current value within rolling window (WQ-style)."""
76- rank_expr = f .expr .rolling_rank (window_size = window , min_samples = window ) / (window + 1 ) + constant
75+ """Percentile rank of current value within rolling window."""
76+ def rank_window (s ):
77+ if s .is_nan ().any () or s .n_unique () == 1 :
78+ return None
79+ vals = s .to_numpy ()
80+ sorted_idx = vals .argsort ()
81+ rank_array = sorted_idx .argsort () + 1
82+ return float (rank_array [- 1 ]) / len (vals )
83+
84+ rank_expr = f .expr .rolling_map (rank_window , window_size = window , min_periods = window )
7785 return Factor (
78- rank_expr .over ("symbol" ),
86+ rank_expr .over ("symbol" ) + constant ,
7987 f"ts_rank({ f .name } ,{ window } )"
8088 )
8189
8290
8391def ts_skewness (f : Factor , window : int ) -> Factor :
84- """Rolling skewness over N periods."""
92+ """Rolling skewness over N periods (sample skewness with bias correction)."""
93+ n = window
94+ mean_expr = f .expr .rolling_mean (window_size = window , min_periods = window )
95+ diff = f .expr - mean_expr
96+
97+ sum_cube = (diff ** 3 ).rolling_sum (window_size = window , min_periods = window )
98+ sum_sq = (diff ** 2 ).rolling_sum (window_size = window , min_periods = window )
99+
100+ numerator = sum_cube * n
101+ denominator = (sum_sq ** 1.5 ) * ((n - 1 ) * (n - 2 ))
102+
103+ skew_expr = numerator / denominator
104+
85105 return Factor (
86- f . expr . rolling_skew ( window_size = window ) .over ("symbol" ),
106+ skew_expr .over ("symbol" ),
87107 f"ts_skewness({ f .name } ,{ window } )"
88108 )
89109
@@ -198,14 +218,9 @@ def ts_cv(f: Factor, window: int) -> Factor:
198218def ts_autocorr (a : Factor , window : int , lag : int = 1 ) -> Factor :
199219 """Rolling autocorrelation with specified lag."""
200220 lagged = a .expr .shift (lag )
201- # Manual rolling corr between a and lagged version
202- cov_expr = (a .expr * lagged ).rolling_mean (window_size = window , min_periods = window ) - \
203- a .expr .rolling_mean (window_size = window , min_periods = window ) * \
204- lagged .rolling_mean (window_size = window , min_periods = window )
205- std_a = a .expr .rolling_std (window_size = window , min_periods = window )
206- std_lag = lagged .rolling_std (window_size = window , min_periods = window )
207- return Factor (
208- (cov_expr / (std_a * std_lag )).over ("symbol" ),
221+ corr_expr = pl .rolling_corr (a .expr , lagged , window_size = window , min_periods = window , ddof = 1 )
222+ return Factor (
223+ corr_expr .over ("symbol" ),
209224 f"ts_autocorr({ a .name } ,{ window } ,{ lag } )"
210225 )
211226
@@ -216,3 +231,31 @@ def ts_count_nans(f: Factor, window: int) -> Factor:
216231 f .expr .is_null ().cast (pl .Int32 ).rolling_sum (window_size = window , min_periods = 1 ).over ("symbol" ),
217232 f"ts_count_nans({ f .name } ,{ window } )"
218233 )
234+
235+
236+ def ts_backfill (f : Factor , window : int , k : int = 1 ) -> Factor :
237+ """Backfill NaN with k-th most recent non-NaN in window.
238+
239+ Parameters
240+ ----------
241+ f : Factor
242+ Input factor
243+ window : int
244+ Maximum lookback window for filling
245+ k : int, default 1
246+ Which recent non-NaN to use (1=most recent)
247+
248+ Returns
249+ -------
250+ Factor
251+ Factor with NaN backfilled
252+ """
253+ if k == 1 :
254+ filled_expr = f .expr .forward_fill (limit = window ).over ("symbol" )
255+ else :
256+ filled_expr = f .expr .shift (k - 1 ).forward_fill (limit = window ).over ("symbol" )
257+
258+ return Factor (
259+ filled_expr ,
260+ f"ts_backfill({ f .name } ,{ window } ,{ k } )"
261+ )
0 commit comments