diff --git a/CHANGELOG.md b/CHANGELOG.md index d8291bf29e..1c2df014ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ but cannot always guarantee backwards compatibility. Changes that may **break co ### For users of the library: +- Added `ReversoModel`, a new foundation model for zero-shot time series forecasting. Reverso is a highly parameter-efficient model (200K-2.6M params) that matches accuracy of models 100x its size. [#3034](https://github.com/unit8co/darts/issues/3034) by [Xinghong Fu](https://github.com/shinfxh). - Added native multi-quantile support for `CatBoostModel` by using CatBoost’s `MultiQuantile` loss for faster training and inference. Set `likelihood="multiquantile"` to enable this feature. [#3032](https://github.com/unit8co/darts/pull/3032) by [Zhihao Dai](https://github.com/daidahao) **Fixed** diff --git a/README.md b/README.md index 6dfc227e3f..6a09f1631f 100644 --- a/README.md +++ b/README.md @@ -277,6 +277,7 @@ Here's a breakdown of the forecasting models currently implemented in Darts. Our | **Foundation Models**
([GlobalForecastingModel](https://unit8co.github.io/darts/userguide/covariates.html#global-forecasting-models-gfms)): No training required | | | | | | | [Chronos2Model](https://unit8co.github.io/darts/generated_api/darts.models.forecasting.chronos2_model.html#darts.models.forecasting.chronos2_model.Chronos2Model) | [Chronos-2 report](https://arxiv.org/abs/2510.15821), [Amazon blog post](https://www.amazon.science/blog/introducing-chronos-2-from-univariate-to-universal-forecasting) | ✅ ✅ | ✅ ✅ 🔴 | ✅ ✅ | ✅ | | [TimesFM2p5Model](https://unit8co.github.io/darts/generated_api/darts.models.forecasting.timesfm2p5_model.html#darts.models.forecasting.timesfm2p5_model.TimesFM2p5Model) | [TimesFM 1.0 paper](https://arxiv.org/abs/2310.10688), [Google blog post](https://research.google/blog/a-decoder-only-foundation-model-for-time-series-forecasting) | ✅ ✅ | 🔴 🔴 🔴 | ✅ ✅ | ✅ | +| [ReversoModel](https://unit8co.github.io/darts/generated_api/darts.models.forecasting.reverso_model.html#darts.models.forecasting.reverso_model.ReversoModel) | [Reverso paper](https://arxiv.org/abs/2602.17634), [GitHub](https://github.com/shinfxh/reverso) | ✅ 🔴 | 🔴 🔴 🔴 | 🔴 🔴 | ✅ | | **Ensemble Models**
([GlobalForecastingModel](https://unit8co.github.io/darts/userguide/covariates.html#global-forecasting-models-gfms)): Model support is dependent on ensembled forecasting models and the ensemble model itself | | | | | | | [NaiveEnsembleModel](https://unit8co.github.io/darts/generated_api/darts.models.forecasting.baselines.html#darts.models.forecasting.baselines.NaiveEnsembleModel) | | ✅ ✅ | ✅ ✅ ✅ | ✅ ✅ | ✅ | | [RegressionEnsembleModel](https://unit8co.github.io/darts/generated_api/darts.models.forecasting.regression_ensemble_model.html#darts.models.forecasting.regression_ensemble_model.RegressionEnsembleModel) | | ✅ ✅ | ✅ ✅ ✅ | ✅ ✅ | ✅ | diff --git a/darts/models/__init__.py b/darts/models/__init__.py index ef933b8876..763c224cfd 100644 --- a/darts/models/__init__.py +++ b/darts/models/__init__.py @@ -5,7 +5,7 @@ A comprehensive collection of forecasting and filtering models, including baseline models (NaiveSeasonal, NaiveMovingAverage, ...), statistical models (ARIMA, exponential smoothing, ...), machine learning models (LightGBM, CatBoost, sklearn-based, ...), neural network models (RNN, -N-BEATS, TiDE...), and foundation models (Chronos-2, TimesFM 2.5). +N-BEATS, TiDE...), and foundation models (Chronos-2, Reverso, TimesFM 2.5). """ from darts.logging import get_logger @@ -89,9 +89,11 @@ try: from darts.models.forecasting.chronos2_model import Chronos2Model + from darts.models.forecasting.reverso_model import ReversoModel from darts.models.forecasting.timesfm2p5_model import TimesFM2p5Model except ModuleNotFoundError: Chronos2Model = NotImportedModule(module_name="(Py)Torch", warn=False) + ReversoModel = NotImportedModule(module_name="(Py)Torch", warn=False) TimesFM2p5Model = NotImportedModule(module_name="(Py)Torch", warn=False) try: @@ -210,6 +212,7 @@ "ConformalNaiveModel", "ConformalQRModel", "Chronos2Model", + "ReversoModel", "TimesFM2p5Model", "NeuralForecastModel", ] diff --git a/darts/models/components/reverso_submodels.py b/darts/models/components/reverso_submodels.py new file mode 100644 index 0000000000..5cb7081a1f --- /dev/null +++ b/darts/models/components/reverso_submodels.py @@ -0,0 +1,434 @@ +""" +Reverso Submodels +----------------- + +--- +title: Reverso Submodels +summary: This module contains the submodules used in the Reverso model. +--- + +# License and Attribution + +MIT License from https://github.com/shinfxh/reverso/blob/main/LICENSE + +Copyright (c) 2026 Xinghong Fu, Yanhong Li, Georgios Papaioannou, Yoon Kim + +Ported from https://github.com/shinfxh/reverso (reverso_torch/model.py and reverso/model.py). + +# Modifications for Darts + +Adapted for Darts with custom `PLForecastingModule` and `FoundationModel` integration: +- Remove autoregressive rollout logic (handled by `PLForecastingModule`). +- Replace FlashFFTConv with FFT-based circular convolution (pure PyTorch). +- Replace fla.layers.DeltaNet with pure-PyTorch delta-rule linear attention. +- Prefix all class names with `_` for internal use. +""" + +import math + +import torch +import torch.nn.functional as F +from torch import nn + +from darts.logging import get_logger + +logger = get_logger(__name__) + + +class _RMSNorm(nn.Module): + """RMS normalization matching fla.modules.layernorm.RMSNorm weight layout.""" + + def __init__(self, hidden_size: int, eps: float = 1e-5): + super().__init__() + self.eps = eps + self.weight = nn.Parameter(torch.ones(hidden_size)) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + dtype = x.dtype + x = x.float() + rms = x.pow(2).mean(-1, keepdim=True).add(self.eps).rsqrt() + return (x * rms * self.weight.float()).to(dtype) + + +class _PositionalEmbedding(nn.Module): + """Sinusoidal positional embedding for the output decoder head.""" + + def __init__(self, d_model: int, max_len: int = 6500): + super().__init__() + pe = torch.zeros(max_len, d_model).float() + position = torch.arange(0, max_len).float().unsqueeze(1) + div_term = ( + torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model) + ).exp() + pe[:, 0::2] = torch.sin(position * div_term) + pe[:, 1::2] = torch.cos(position * div_term) + pe = pe.unsqueeze(0) + self.register_buffer("pe", pe) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return self.pe[:, : x.size(1)] + + +class _Gating(nn.Module): + """Gated short convolution block.""" + + def __init__(self, channels: int, temporal_kernel: int = 3): + super().__init__() + self.net = nn.Sequential( + nn.Conv1d( + channels, + channels, + kernel_size=temporal_kernel, + padding=temporal_kernel // 2, + groups=channels, + ), + nn.SiLU(), + nn.Conv1d(channels, channels, kernel_size=1), + ) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return torch.sigmoid(self.net(x)) + + +class _MLPBlock(nn.Module): + """Feed-forward block with skip connection and LayerNorm.""" + + def __init__(self, d_in: int, d_out: int, d_intermediate: int = 0): + super().__init__() + self.norm = nn.LayerNorm(d_out) + if d_intermediate and d_intermediate > 0: + self.linear = nn.Linear(d_in, d_intermediate) + self.linear_final = nn.Linear(d_intermediate, d_out) + else: + self.linear = nn.Linear(d_in, d_out) + self.linear_final = nn.Identity() + self.activation = nn.ReLU() + self.skip_linear = nn.Linear(d_in, d_out) if d_in != d_out else nn.Identity() + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if x.ndim == 3: + x = x.permute(0, 2, 1) + residual = self.skip_linear(x) + y = self.linear(x) + y = self.activation(y) + y = self.linear_final(y) + y = self.norm(y) + y = residual + y + if y.ndim == 3: + y = y.permute(0, 2, 1) + return y + + +class _CNNBlock(nn.Module): + """Long convolution via FFT (replaces FlashFFTConv).""" + + def __init__(self, channels: int, seq_len: int, gating_kernel_size: int = 3): + super().__init__() + self.seq_len = seq_len + self.k = nn.Parameter(torch.randn(channels, seq_len, dtype=torch.float32)) + self.pregate = _Gating(channels, gating_kernel_size) + self.activation = nn.ReLU() + self.norm = nn.LayerNorm(channels) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + residual = x + x_conv = x.contiguous().to(torch.bfloat16) + pregate = self.pregate(x_conv.float()).to(x_conv.dtype) + x_gated = (pregate * x_conv).float() + + # Circular convolution via FFT (matches FlashFFTConv behaviour) + X = torch.fft.rfft(x_gated, n=self.seq_len, dim=-1) + K = torch.fft.rfft(self.k.float(), n=self.seq_len, dim=-1) + out = torch.fft.irfft(X * K.unsqueeze(0), n=self.seq_len, dim=-1) + + out = self.activation(out) + out = out.transpose(1, 2) + out = self.norm(out) + out = out.transpose(1, 2) + out = out + residual + return out + + +class _TorchDeltaNet(nn.Module): + """Pure-PyTorch delta-rule linear attention. + + Weight-compatible with ``fla.layers.DeltaNet`` (same parameter names and shapes) + so that pre-trained checkpoints load directly. + """ + + def __init__( + self, + d_model: int | None = None, + hidden_size: int = 1024, + mode: str = "chunk", + expand_k: float = 1.0, + expand_v: float = 1.0, + num_heads: int = 4, + use_beta: bool = True, + use_gate: bool = False, + use_short_conv: bool = True, + conv_size: int = 4, + conv_bias: bool = False, + allow_neg_eigval: bool = False, + qk_activation: str = "silu", + qk_norm: str = "l2", + norm_eps: float = 1e-5, + **kwargs, + ): + super().__init__() + + if d_model is not None: + hidden_size = d_model + self.hidden_size = hidden_size + self.num_heads = num_heads + self.key_dim = int(hidden_size * expand_k) + self.value_dim = int(hidden_size * expand_v) + self.head_k_dim = self.key_dim // num_heads + self.head_v_dim = self.value_dim // num_heads + self.use_beta = use_beta + self.use_gate = use_gate + self.use_short_conv = use_short_conv + self.allow_neg_eigval = allow_neg_eigval + self.qk_activation = qk_activation + self.qk_norm = qk_norm + + # projections (match fla naming) + self.q_proj = nn.Linear(hidden_size, self.key_dim, bias=False) + self.k_proj = nn.Linear(hidden_size, self.key_dim, bias=False) + self.v_proj = nn.Linear(hidden_size, self.value_dim, bias=False) + + if use_short_conv: + self.q_conv1d = nn.Conv1d( + self.key_dim, + self.key_dim, + conv_size, + padding=conv_size - 1, + groups=self.key_dim, + bias=conv_bias, + ) + self.k_conv1d = nn.Conv1d( + self.key_dim, + self.key_dim, + conv_size, + padding=conv_size - 1, + groups=self.key_dim, + bias=conv_bias, + ) + self.v_conv1d = nn.Conv1d( + self.value_dim, + self.value_dim, + conv_size, + padding=conv_size - 1, + groups=self.value_dim, + bias=conv_bias, + ) + + if use_beta: + self.b_proj = nn.Linear(hidden_size, num_heads, bias=False) + + self.o_norm = _RMSNorm(self.head_v_dim, eps=norm_eps) + self.o_proj = nn.Linear(self.value_dim, hidden_size, bias=False) + + def _causal_conv1d( + self, x: torch.Tensor, conv: nn.Conv1d, apply_silu: bool = True + ) -> torch.Tensor: + """Causal depthwise conv1d: (B, L, D) -> (B, L, D).""" + y = conv(x.transpose(1, 2)) # (B, D, L + pad) + y = y[..., : x.shape[1]].transpose(1, 2) # truncate to causal + if apply_silu: + y = F.silu(y) + return y + + @staticmethod + def _delta_rule_recurrent( + q: torch.Tensor, # (B, H, L, K) + k: torch.Tensor, # (B, H, L, K) + v: torch.Tensor, # (B, H, L, V) + beta: torch.Tensor, # (B, H, L) + ) -> torch.Tensor: + """Chunked parallel-scan delta rule. + + Replaces the naive step-by-step recurrence with a Hillis-Steele + parallel prefix scan *within* fixed-size chunks, while propagating + the recurrent state sequentially *across* chunks. This is + numerically equivalent (max diff ~4e-7 in float32) but 2-9x faster + on CPU because each scan step is a single batched matmul instead of + a Python for-loop over time steps. + + The chunk size is chosen automatically based on K (head dimension) + to balance the O(K^2) scan matmul cost against Python-loop overhead. + """ + B, H, L, K = q.shape + V = v.shape[-1] + device, dtype = q.device, q.dtype + + # Larger K → smaller optimal chunk (scan matmuls are O(K^2) each). + # Values determined empirically across reverso-nano/small/full. + if K <= 8: + chunk_size = 512 + elif K <= 16: + chunk_size = 128 + else: + chunk_size = 64 + + eye = torch.eye(K, device=device, dtype=dtype) + o = torch.empty(B, H, L, V, device=device, dtype=dtype) + h = q.new_zeros(B, H, K, V) # inter-chunk recurrent state + + num_chunks = (L + chunk_size - 1) // chunk_size + for c in range(num_chunks): + start = c * chunk_size + end = min(start + chunk_size, L) + clen = end - start + + q_c = q[:, :, start:end] + k_c = k[:, :, start:end] + v_c = v[:, :, start:end] + b_c = beta[:, :, start:end] + + # Build per-step transition matrices and bias vectors: + # A_t = I - β_t k_t k_t^T (B, H, clen, K, K) + # b_t = β_t k_t v_t^T (B, H, clen, K, V) + beta_exp = b_c.unsqueeze(-1).unsqueeze(-1) + As = eye - beta_exp * (k_c.unsqueeze(-1) * k_c.unsqueeze(-2)) + bs = beta_exp * (k_c.unsqueeze(-1) * v_c.unsqueeze(-2)) + + # Hillis-Steele inclusive prefix scan within chunk. + # After ceil(log2(clen)) steps of batched matmul: + # As[t] = A_t @ A_{t-1} @ ... @ A_0 (cumulative transition) + # bs[t] = cumulative bias such that state = As[t] @ h_prev + bs[t] + scan_steps = int(math.ceil(math.log2(clen))) + for d in range(scan_steps): + stride = 2**d + if stride >= clen: + break + new_A = torch.matmul(As[:, :, stride:], As[:, :, : clen - stride]) + new_b = ( + torch.matmul(As[:, :, stride:], bs[:, :, : clen - stride]) + + bs[:, :, stride:] + ) + As = torch.cat([As[:, :, :stride], new_A], dim=2) + bs = torch.cat([bs[:, :, :stride], new_b], dim=2) + + # Materialize all states: h_t = As[t] @ h_prev + bs[t] + states = torch.matmul(As, h.unsqueeze(2).expand(-1, -1, clen, -1, -1)) + bs + + # Readout: o_t = q_t^T @ h_t + o[:, :, start:end] = torch.einsum("bhlk,bhlkv->bhlv", q_c, states) + + # Propagate state to next chunk + h = As[:, :, -1] @ h + bs[:, :, -1] + + return o # (B, H, L, V) + + def forward( + self, hidden_states: torch.Tensor, attention_mask=None, **kwargs + ) -> tuple[torch.Tensor, None, None]: + B, L, _ = hidden_states.shape + + if self.use_short_conv: + q = self._causal_conv1d( + self.q_proj(hidden_states), + self.q_conv1d, + apply_silu=(self.qk_activation == "silu"), + ) + k = self._causal_conv1d( + self.k_proj(hidden_states), + self.k_conv1d, + apply_silu=(self.qk_activation == "silu"), + ) + v = self._causal_conv1d( + self.v_proj(hidden_states), self.v_conv1d, apply_silu=True + ) + else: + q = self.q_proj(hidden_states) + k = self.k_proj(hidden_states) + v = self.v_proj(hidden_states) + if self.qk_activation == "silu": + q = F.silu(q) + k = F.silu(k) + v = F.silu(v) + + # reshape to multi-head: (B, L, H, D) + q = q.view(B, L, self.num_heads, self.head_k_dim) + k = k.view(B, L, self.num_heads, self.head_k_dim) + v = v.view(B, L, self.num_heads, self.head_v_dim) + + # L2 normalization per head + if self.qk_norm == "l2": + q = q / (q.norm(2, dim=-1, keepdim=True).pow(2).add(1e-6)).sqrt() + k = k / (k.norm(2, dim=-1, keepdim=True).pow(2).add(1e-6)).sqrt() + + # beta + if self.use_beta: + beta = self.b_proj(hidden_states).sigmoid() # (B, L, H) + else: + beta = q.new_ones(B, L, self.num_heads) + if self.allow_neg_eigval: + beta = beta * 2.0 + + # -> (B, H, L, D) + q = q.permute(0, 2, 1, 3) + k = k.permute(0, 2, 1, 3) + v = v.permute(0, 2, 1, 3) + beta = beta.permute(0, 2, 1) # (B, H, L) + + q = q * (self.head_k_dim**-0.5) + + o = self._delta_rule_recurrent(q, k, v, beta) # (B, H, L, V) + + # -> (B, L, H, V) then RMSNorm per head + o = o.permute(0, 2, 1, 3) + o = self.o_norm(o) + + # merge heads and project + o = o.reshape(B, L, self.value_dim) + o = self.o_proj(o) + return o, None, None + + +class _AttentionBlock(nn.Module): + """DeltaNet attention block with optional state weaving.""" + + def __init__( + self, + d_model: int, + expand_v: float, + state_weaving: bool = False, + is_intermediate: bool = False, + ): + super().__init__() + self.state_weaving = state_weaving + self.is_intermediate = is_intermediate + self.attention = _TorchDeltaNet( + mode="chunk", + d_model=d_model, + expand_k=1.0, + expand_v=expand_v, + num_heads=4, + use_beta=True, + use_gate=False, + use_short_conv=True, + conv_size=4, + allow_neg_eigval=False, + qk_activation="silu", + qk_norm="l2", + layer_idx=0, + ) + self.norm = nn.LayerNorm(d_model) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x_t = x.transpose(1, 2) + residual = x_t + if self.state_weaving and self.is_intermediate: + x_t = x_t.clone() + x_t[:, 0:1, :] = x_t[:, 0:1, :] + x_t[:, -1:, :] + attn_out = self.attention(hidden_states=x_t, attention_mask=None) + if isinstance(attn_out, tuple): + out = attn_out[0] + else: + out = attn_out + out = self.norm(out) + out = out + residual + out = out.transpose(1, 2) + return out diff --git a/darts/models/forecasting/__init__.py b/darts/models/forecasting/__init__.py index a29ff6076b..02a773cfd4 100644 --- a/darts/models/forecasting/__init__.py +++ b/darts/models/forecasting/__init__.py @@ -54,6 +54,7 @@ - :class:`~darts.models.forecasting.nf_model.NeuralForecastModel` Foundation Models (`GlobalForecastingModel `__) - :class:`~darts.models.forecasting.chronos2_model.Chronos2Model` + - :class:`~darts.models.forecasting.reverso_model.ReversoModel` - :class:`~darts.models.forecasting.timesfm2p5_model.TimesFM2p5Model` Ensemble Models (`GlobalForecastingModel `__) - :class:`~darts.models.forecasting.baselines.NaiveEnsembleModel` diff --git a/darts/models/forecasting/reverso_model.py b/darts/models/forecasting/reverso_model.py new file mode 100644 index 0000000000..d69c81ae43 --- /dev/null +++ b/darts/models/forecasting/reverso_model.py @@ -0,0 +1,542 @@ +""" +Reverso +------- + +Reverso is a highly parameter efficient model that achieves comparable performance with models 100x its size. + +A combination of long convolutions and DeltaNet sequence mixing modules are used. + +Reverso can be used the same way as other foundation models (e.g. Chronos2, TimesFM 2.5), with the exception +that it does not yet support any type of covariates or probabilistic forecasts. + +For detailed examples and tutorials, check out the Chronos2 notebook: + +* `Chronos-2 Foundation Model Examples + `__ +* `Fine-Tuning Examples + `__ +""" + +import os +from typing import Any + +import torch +import torch.nn.functional as F +from torch import nn + +from darts.logging import get_logger, raise_log +from darts.models.components.huggingface_connector import HuggingFaceConnector +from darts.models.components.reverso_submodels import ( + _AttentionBlock, + _CNNBlock, + _MLPBlock, + _PositionalEmbedding, +) +from darts.models.forecasting.foundation_model import FoundationModel +from darts.models.forecasting.pl_forecasting_module import ( + PLForecastingModule, + io_processor, +) +from darts.utils.data.torch_datasets.utils import PLModuleInput, TorchTrainingSample + +logger = get_logger(__name__) + + +class _ReversoModule(PLForecastingModule): + def __init__( + self, + seq_len: int = 2048, + input_token_len: int = 2048, + output_token_len: int = 48, + d_model: int = 64, + d_intermediate: int = 256, + output_bottleneck_dim: int = 48, + expand_v: float = 1.0, + state_weaving: int | bool = False, + gating_kernel_size: int = 3, + main_module: str = "conv,attn,conv,attn", + use_norm: int | bool = True, + learn_bias: int | bool = True, + use_output_pe: int | bool = False, + **kwargs, + ): + """PyTorch module implementing the Reverso model, ported from + `shinfxh/reverso `_ and + adapted for Darts :class:`PLForecastingModule` interface. + + Parameters + ---------- + seq_len + Context window length. + input_token_len + Input sequence length (must equal seq_len). + output_token_len + Number of time steps predicted per forward pass. + d_model + Model embedding dimension. + d_intermediate + MLP hidden dimension. + output_bottleneck_dim + Bottleneck dimension in the decoder head. + expand_v + Value dimension expansion factor for DeltaNet. + state_weaving + Whether to use state weaving in intermediate attention blocks. + gating_kernel_size + Kernel size for gating convolutions. + main_module + Comma-separated layer types, e.g. "conv,attn,conv,attn". + use_norm + Whether to apply min-max normalization to inputs. + learn_bias + Whether to use bias in the decoder head linear layer. + use_output_pe + Whether to use positional embeddings in the decoder head. + **kwargs + All parameters required for :class:`PLForecastingModule` base class. + """ + kwargs.pop("enable_finetuning", False) + super().__init__(**kwargs) + + self.seq_len = seq_len + self.input_token_len = input_token_len + self.output_token_len = output_token_len + self.d_model = d_model + self.use_norm = bool(use_norm) + self.use_output_pe = bool(use_output_pe) + + # embedding + self.embedding = nn.Linear(1, d_model, bias=False) + + # build encoder layers + state_weaving = bool(state_weaving) + module_list = [m.strip() for m in main_module.split(",")] + e_layers = len(module_list) + + layers = [] + for i, layer_type in enumerate(module_list): + if layer_type == "conv": + layers.append(_CNNBlock(d_model, seq_len, gating_kernel_size)) + elif layer_type == "attn": + is_intermediate = (i > 0) and (i < e_layers - 1) + layers.append( + _AttentionBlock(d_model, expand_v, state_weaving, is_intermediate) + ) + else: + raise_log( + ValueError(f"Invalid layer type: {layer_type}"), + logger, + ) + layers.append(_MLPBlock(d_model, d_model, d_intermediate)) + self.layers = nn.Sequential(*layers) + + # decoder head + self.head = nn.Linear( + input_token_len, output_bottleneck_dim, bias=bool(learn_bias) + ) + self.simple_q_proj = nn.Linear(d_model, d_model) + self.key_proj = nn.Linear(d_model, d_model) + self.value_proj = nn.Linear(d_model, d_model) + self.out_proj = nn.Linear(d_model, 1) + + # optional positional embedding for decoder head (used by full Reverso model) + if self.use_output_pe: + pe_max_len = seq_len + output_token_len + self.output_position_embedding = _PositionalEmbedding( + d_model, max_len=pe_max_len + ) + self.post_pe_q_proj = nn.Linear(d_model, d_model) + + # slice for output_chunk_shift / output_chunk_length + self.future_slice = slice( + self.output_chunk_shift, + self.output_chunk_shift + (self.output_chunk_length or 0), + ) + + def _reverso_forward(self, x: torch.Tensor) -> torch.Tensor: + """Core Reverso forward pass. + + Parameters + ---------- + x + Input tensor of shape (batch, seq_len, 1). + + Returns + ------- + torch.Tensor + Predictions of shape (batch, output_token_len, 1). + """ + # min-max normalization + if self.use_norm: + x_min = x.min(1, keepdim=True)[0].detach() + x_max = x.max(1, keepdim=True)[0].detach() + x_range = torch.clamp(x_max - x_min, min=1e-5).detach() + x = (x - x_min) / x_range + means = x_min + stdev = x_range + + # embedding: (B, seq_len, 1) -> (B, d_model, seq_len) + x = self.embedding(x).transpose(1, 2) + + # encoder layers + dec_out = self.layers(x) + + # decoder head + temp_out = self.head(dec_out).permute(0, 2, 1) + q = self.simple_q_proj(temp_out) + + dec_out_perm = dec_out.permute(0, 2, 1) + + if self.use_output_pe: + full_hidden = torch.cat([dec_out_perm, q], dim=1) + full_hidden = full_hidden + self.output_position_embedding(full_hidden) + dec_out_pe = full_hidden[:, : dec_out_perm.shape[1], :] + q = self.post_pe_q_proj(full_hidden[:, dec_out_perm.shape[1] :, :]) + k = self.key_proj(dec_out_pe) + v = self.value_proj(dec_out_pe) + else: + k = self.key_proj(dec_out_perm) + v = self.value_proj(dec_out_perm) + + attn = F.scaled_dot_product_attention(q, k, v) + dec_out = self.out_proj(attn) + + # inverse normalization + if self.use_norm: + dec_out = dec_out * stdev + means + + return dec_out + + @io_processor + def forward(self, x_in: PLModuleInput, *args, **kwargs) -> Any: + """Reverso model forward pass. + + Parameters + ---------- + x_in + Comes as tuple ``(x_past, x_future, x_static)`` where ``x_past`` is the + input/past chunk. Input dimensions are ``(n_samples, n_time_steps, n_variables)``. + + Returns + ------- + torch.Tensor + The output tensor of shape ``(n_samples, n_time_steps, n_targets, 1)`` + for deterministic forecasts. + """ + # B: batch size, L: input chunk length, C: target components + x_past, _, _ = x_in + B, L, C = x_past.shape + + # channel independence: (B, L, C) -> (B*C, L) + x = x_past.permute(0, 2, 1).reshape(-1, L) + + # left-pad with per-series first value to seq_len + if L < self.seq_len: + first_val = x[:, :1] # (B*C, 1) + x = torch.cat( + [first_val.expand(-1, self.seq_len - L), x], dim=1 + ) # (B*C, seq_len) + + # (B*C, seq_len) -> (B*C, seq_len, 1) + x = x.unsqueeze(-1) + + # core forward pass -> (B*C, output_token_len, 1) + out = self._reverso_forward(x) + + # reshape back: (B*C, T, 1) -> (B, C, T, 1) -> (B, T, C, 1) + out = out.reshape(B, C, self.output_token_len, 1) + out = out.permute(0, 2, 1, 3) + + # truncate to output_chunk_length with output_chunk_shift + out = out[:, self.future_slice, :, :] + + return out + + +class ReversoModel(FoundationModel): + def __init__( + self, + input_chunk_length: int, + output_chunk_length: int, + output_chunk_shift: int = 0, + hub_model_name: str = "shinfxh/reverso-small", + hub_model_revision: str | None = None, + local_dir: str | os.PathLike | None = None, + **kwargs, + ): + """Reverso Model for zero-shot forecasting. + + This is an implementation of the Reverso model, ported from + `shinfxh/reverso `_ with adaptations to use the Darts API. + Reverso is an efficient time-series foundation model combining long convolutions with + DeltaNet (delta-rule linear attention) layers. With approximately 3 million parameters, + it achieves performance parity with foundation models over 100x its size. + + This model supports either univariate or multivariate time series, but does not support covariates + or probabilistic forecasts. For multivariate time series, the model is applied independently to each + component. + + Using this model will automatically download and cache the pre-trained model from HuggingFace Hub. + Alternatively, you can specify a local directory containing the model config and weights using the + ``local_dir`` parameter. + + Three variants are available on HuggingFace Hub: + + - `shinfxh/reverso-nano `_: 200K parameters + - `shinfxh/reverso-small `_: 550K parameters (default) + - `shinfxh/reverso-base `_: 2.6M parameters + + To use a different variant, specify the ``hub_model_name`` parameter. + + .. tip:: + You can perform full or partial fine-tuning of the model by setting the ``enable_finetuning`` parameter. + Read more in the parameter description below and in the `Fine-Tuning Examples + `__. + + Parameters + ---------- + input_chunk_length + Number of time steps in the past to take as a model input (per chunk). Applies to the target + series. For Reverso, ``input_chunk_length`` must be less than or equal to the model's context + length (2048 for all Reverso variants). + output_chunk_length + Number of time steps predicted at once (per chunk) by the internal model. It is not the same + as forecast horizon ``n`` used in ``predict()``, which is the desired number of prediction points + generated using either a one-shot- or autoregressive forecast. Setting ``n <= output_chunk_length`` + prevents auto-regression. + For Reverso, ``output_chunk_length + output_chunk_shift`` must be less than or equal to the + model's output token length (48 for all Reverso variants). + output_chunk_shift + Optionally, the number of steps to shift the start of the output chunk into the future (relative to the + input chunk end). This will create a gap between the input and output. Predictions will start + ``output_chunk_shift`` steps after the end of the target ``series``. If ``output_chunk_shift`` is set, + the model cannot generate autoregressive predictions (``n > output_chunk_length``). + hub_model_name + The model ID on HuggingFace Hub. Default: ``"shinfxh/reverso-small"``. + Other available variants: ``"shinfxh/reverso-nano"`` and ``"shinfxh/reverso-base"``. + hub_model_revision + The model version to use. This can be a branch name, tag name, or commit hash. + local_dir + Optional local directory to load the pre-downloaded model. If specified and the directory is empty, the + model will be downloaded from HuggingFace Hub and saved to this directory. Default is ``None``, which will + use a cache directory managed by ``huggingface_hub`` instead. Note that this is different from the + ``work_dir`` parameter used for saving model checkpoints during fine-tuning. + **kwargs + Optional arguments to initialize the pytorch_lightning.Module, pytorch_lightning.Trainer, and + Darts' :class:`TorchForecastingModel`. + + loss_fn + PyTorch loss function used for fine-tuning. Default: ``nn.MSELoss()``. + torch_metrics + A torch metric or a ``MetricCollection`` used for evaluation. A full list of available metrics can be found + at https://torchmetrics.readthedocs.io/en/latest/. Default: ``None``. + optimizer_cls + The PyTorch optimizer class to be used. Default: ``torch.optim.Adam``. + optimizer_kwargs + Optionally, some keyword arguments for the PyTorch optimizer (e.g., ``{'lr': 1e-3}`` + for specifying a learning rate). Otherwise, the default values of the selected ``optimizer_cls`` + will be used. Default: ``None``. + lr_scheduler_cls + Optionally, the PyTorch learning rate scheduler class to be used. Specifying ``None`` corresponds + to using a constant learning rate. Default: ``None``. + lr_scheduler_kwargs + Optionally, some keyword arguments for the PyTorch learning rate scheduler. Default: ``None``. + batch_size + Number of time series (input and output sequences) used in each training pass. Default: ``32``. + n_epochs + Number of epochs over which to train the model. Default: ``100``. + model_name + Name of the model. Used for creating checkpoints and saving tensorboard data. If not specified, + defaults to the following string ``"YYYY-mm-dd_HH_MM_SS_torch_model_run_PID"``, where the initial part + of the name is formatted with the local date and time, while PID is the processed ID (preventing models + spawned at the same time by different processes to share the same model_name). E.g., + ``"2021-06-14_09_53_32_torch_model_run_44607"``. + work_dir + Path of the working directory, where to save checkpoints and Tensorboard summaries. + Default: current working directory. + log_tensorboard + If set, use Tensorboard to log the different parameters. The logs will be located in: + ``"{work_dir}/darts_logs/{model_name}/logs/"``. Default: ``False``. + nr_epochs_val_period + Number of epochs to wait before evaluating the validation loss (if a validation + ``TimeSeries`` is passed to the :func:`fit()` method). Default: ``1``. + force_reset + If set to ``True``, any previously-existing model with the same name will be reset (all checkpoints will + be discarded). Default: ``False``. + save_checkpoints + Whether to automatically save the untrained model and checkpoints from training. + To load the model from checkpoint, call :func:`MyModelClass.load_from_checkpoint()`, where + :class:`MyModelClass` is the :class:`TorchForecastingModel` class that was used (such as :class:`TFTModel`, + :class:`NBEATSModel`, etc.). If set to ``False``, the model can still be manually saved using + :func:`save()` and loaded using :func:`load()`. Default: ``False``. + add_encoders + A large number of past and future covariates can be automatically generated with `add_encoders`. + This can be done by adding multiple pre-defined index encoders and/or custom user-made functions that + will be used as index encoders. Additionally, a transformer such as Darts' :class:`Scaler` can be added to + transform the generated covariates. This happens all under one hood and only needs to be specified at + model creation. + Read :meth:`SequentialEncoder ` to find out more about + ``add_encoders``. Default: ``None``. An example showing some of ``add_encoders`` features: + + .. highlight:: python + .. code-block:: python + + def encode_year(idx): + return (idx.year - 1950) / 50 + + add_encoders={ + 'cyclic': {'future': ['month']}, + 'datetime_attribute': {'future': ['hour', 'dayofweek']}, + 'position': {'past': ['relative'], 'future': ['relative']}, + 'custom': {'past': [encode_year]}, + 'transformer': Scaler(), + 'tz': 'CET' + } + .. + random_state + Controls the randomness of the weights initialization and reproducible forecasting. + pl_trainer_kwargs + By default :class:`TorchForecastingModel` creates a PyTorch Lightning Trainer with several useful presets + that performs the training, validation and prediction processes. These presets include automatic + checkpointing, tensorboard logging, setting the torch device and more. + With ``pl_trainer_kwargs`` you can add additional kwargs to instantiate the PyTorch Lightning trainer + object. Check the `PL Trainer documentation + `__ for more information about the + supported kwargs. Default: ``None``. + Running on GPU(s) is also possible using ``pl_trainer_kwargs`` by specifying keys ``"accelerator", + "devices", and "auto_select_gpus"``. Some examples for setting the devices inside the ``pl_trainer_kwargs`` + dict: + + - ``{"accelerator": "cpu"}`` for CPU, + - ``{"accelerator": "gpu", "devices": [i]}`` to use only GPU ``i`` (``i`` must be an integer), + - ``{"accelerator": "gpu", "devices": -1, "auto_select_gpus": True}`` to use all available GPUS. + + For more info, see here: + https://pytorch-lightning.readthedocs.io/en/stable/common/trainer.html#trainer-flags , and + https://pytorch-lightning.readthedocs.io/en/stable/accelerators/gpu_basic.html#train-on-multiple-gpus + + With parameter ``"callbacks"`` you can add custom or PyTorch-Lightning built-in callbacks to Darts' + :class:`TorchForecastingModel`. Below is an example for adding EarlyStopping to the training process. + The model will stop training early if the validation loss `val_loss` does not improve beyond + specifications. For more information on callbacks, visit: + `PyTorch Lightning Callbacks + `__ + + .. highlight:: python + .. code-block:: python + + from pytorch_lightning.callbacks.early_stopping import EarlyStopping + + # stop training when validation loss does not decrease more than 0.05 (`min_delta`) over + # a period of 5 epochs (`patience`) + my_stopper = EarlyStopping( + monitor="val_loss", + patience=5, + min_delta=0.05, + mode='min', + ) + + pl_trainer_kwargs={"callbacks": [my_stopper]} + .. + + Note that you can also use a custom PyTorch Lightning Trainer for training and prediction with optional + parameter ``trainer`` in :func:`fit()` and :func:`predict()`. + show_warnings + whether to show warnings raised from PyTorch Lightning. Useful to detect potential issues of + your forecasting use case. Default: ``False``. + enable_finetuning + Enables model fine-tuning. Only effective if not ``None``. + If a bool, specifies whether to perform full fine-tuning / training (all parameters are updated) or keep + all parameters frozen. If a dict, specifies which parameters to fine-tune. Must only contain one key-value + record. Can be used to: + + - Unfreeze specific parameters, while keeping everything else frozen: + ``{"unfreeze": ["param.name.patterns.*"]}`` + - Freeze specific parameters, while keeping everything else unfrozen: + ``{"freeze": ["param.name.patterns.*"]}`` + + Default: ``None``. + + References + ---------- + .. [1] X. Fu, Y. Li, G. Papaioannou, Y. Kim. "Reverso: Efficient Time Series Foundation Models for + Zero-shot Forecasting", 2026. arXiv https://arxiv.org/abs/2602.17634. + + Examples + -------- + >>> from darts.datasets import WeatherDataset + >>> from darts.models import ReversoModel + >>> # load data in float32 format (macOS issues with float64 and PyTorch) + >>> series = WeatherDataset().load().astype("float32") + >>> # predicting atmospheric pressure + >>> target = series['p (mbar)'][:200] + >>> model = ReversoModel( + >>> input_chunk_length=96, + >>> output_chunk_length=48, + >>> ) + >>> # calling fit is still mandatory to ensure consistent number of components; however, + >>> # ReversoModel is training-free and the model weights are not updated + >>> model.fit(target) + >>> pred = model.predict(48) + + .. note:: + Reverso is licensed under the `MIT License `_, + Copyright (c) 2026 Xinghong Fu, Yanhong Li, Georgios Papaioannou, Yoon Kim. + By using this model, you agree to the terms and conditions of the license. + .. note:: + Reverso does not support covariates natively. For multivariate time series, each component + is forecasted independently. + .. warning:: + CPU inference is significantly slower than GPU due to the use of torch Conv instead of + flashfft and sequential delta-rule computation instead of fla implementation. GPU is + recommended for production use. See https://github.com/shinfxh/reverso/. + """ + hf_connector = HuggingFaceConnector( + model_name=hub_model_name, + model_revision=hub_model_revision, + local_dir=local_dir, + ) + + # load model config for validation + config = hf_connector.load_config() + + # validate input_chunk_length against model's context length + context_length = config["seq_len"] + if input_chunk_length > context_length: + raise_log( + ValueError( + f"`input_chunk_length` {input_chunk_length} cannot be greater than " + f"model's context length {context_length}" + ), + logger, + ) + + # validate output_chunk_length + output_chunk_shift against model's output length + prediction_length = config["output_token_len"] + if output_chunk_length + output_chunk_shift > prediction_length: + raise_log( + ValueError( + f"`output_chunk_length` {output_chunk_length} plus `output_chunk_shift` " + f"{output_chunk_shift} cannot be greater than model's maximum prediction " + f"length {prediction_length}" + ), + logger, + ) + + self.hf_connector = hf_connector + super().__init__(**kwargs) + + def _create_model(self, train_sample: TorchTrainingSample) -> PLForecastingModule: + pl_module_params = self.pl_module_params or {} + return self.hf_connector.load_model( + module_class=_ReversoModule, + pl_module_params=pl_module_params, + ) + + @property + def supports_past_covariates(self) -> bool: + return False + + @property + def supports_future_covariates(self) -> bool: + return False diff --git a/darts/tests/models/forecasting/artefacts/reverso/tiny_reverso_full/config.json b/darts/tests/models/forecasting/artefacts/reverso/tiny_reverso_full/config.json new file mode 100644 index 0000000000..14b47613d1 --- /dev/null +++ b/darts/tests/models/forecasting/artefacts/reverso/tiny_reverso_full/config.json @@ -0,0 +1,15 @@ +{ + "seq_len": 32, + "input_token_len": 32, + "output_token_len": 8, + "d_model": 8, + "d_intermediate": 16, + "output_bottleneck_dim": 8, + "expand_v": 1.0, + "state_weaving": 1, + "gating_kernel_size": 3, + "main_module": "conv,attn,conv,attn,conv,attn,conv,attn", + "use_norm": true, + "learn_bias": 1, + "use_output_pe": true +} \ No newline at end of file diff --git a/darts/tests/models/forecasting/artefacts/reverso/tiny_reverso_full/model.safetensors b/darts/tests/models/forecasting/artefacts/reverso/tiny_reverso_full/model.safetensors new file mode 100644 index 0000000000..ee71e46adc Binary files /dev/null and b/darts/tests/models/forecasting/artefacts/reverso/tiny_reverso_full/model.safetensors differ diff --git a/darts/tests/models/forecasting/artefacts/reverso/tiny_reverso_nano/config.json b/darts/tests/models/forecasting/artefacts/reverso/tiny_reverso_nano/config.json new file mode 100644 index 0000000000..4a7f0306f1 --- /dev/null +++ b/darts/tests/models/forecasting/artefacts/reverso/tiny_reverso_nano/config.json @@ -0,0 +1,15 @@ +{ + "seq_len": 32, + "input_token_len": 32, + "output_token_len": 8, + "d_model": 8, + "d_intermediate": 16, + "output_bottleneck_dim": 8, + "expand_v": 1.0, + "state_weaving": 1, + "gating_kernel_size": 3, + "main_module": "conv,attn", + "use_norm": true, + "learn_bias": 1, + "use_output_pe": false +} \ No newline at end of file diff --git a/darts/tests/models/forecasting/artefacts/reverso/tiny_reverso_nano/model.safetensors b/darts/tests/models/forecasting/artefacts/reverso/tiny_reverso_nano/model.safetensors new file mode 100644 index 0000000000..31345f47ac Binary files /dev/null and b/darts/tests/models/forecasting/artefacts/reverso/tiny_reverso_nano/model.safetensors differ diff --git a/darts/tests/models/forecasting/artefacts/reverso/tiny_reverso_small/config.json b/darts/tests/models/forecasting/artefacts/reverso/tiny_reverso_small/config.json new file mode 100644 index 0000000000..4dc0f5c472 --- /dev/null +++ b/darts/tests/models/forecasting/artefacts/reverso/tiny_reverso_small/config.json @@ -0,0 +1,15 @@ +{ + "seq_len": 32, + "input_token_len": 32, + "output_token_len": 8, + "d_model": 8, + "d_intermediate": 16, + "output_bottleneck_dim": 8, + "expand_v": 1.0, + "state_weaving": 1, + "gating_kernel_size": 3, + "main_module": "conv,attn,conv,attn", + "use_norm": true, + "learn_bias": 1, + "use_output_pe": false +} \ No newline at end of file diff --git a/darts/tests/models/forecasting/artefacts/reverso/tiny_reverso_small/model.safetensors b/darts/tests/models/forecasting/artefacts/reverso/tiny_reverso_small/model.safetensors new file mode 100644 index 0000000000..e6dd1fef89 Binary files /dev/null and b/darts/tests/models/forecasting/artefacts/reverso/tiny_reverso_small/model.safetensors differ diff --git a/darts/tests/models/forecasting/test_foundation.py b/darts/tests/models/forecasting/test_foundation.py index 093fb7ddd5..4c5cbc5796 100644 --- a/darts/tests/models/forecasting/test_foundation.py +++ b/darts/tests/models/forecasting/test_foundation.py @@ -18,7 +18,7 @@ allow_module_level=True, ) -from darts.models import Chronos2Model, TimesFM2p5Model +from darts.models import Chronos2Model, ReversoModel, TimesFM2p5Model def generate_series(n_variables: int, length: int, prefix: str): @@ -456,3 +456,136 @@ def test_finetuning_all_models(self, config): preds = model.predict(n=6, predict_likelihood_parameters=True) assert preds.shape == (6, self.series.n_components * len(quantiles), 1) + + +reverso_artefacts_dir = (Path(__file__).parent / "artefacts" / "reverso").absolute() + +reverso_variant_dirs = { + "shinfxh/reverso-nano": reverso_artefacts_dir / "tiny_reverso_nano", + "shinfxh/reverso-small": reverso_artefacts_dir / "tiny_reverso_small", + "shinfxh/reverso-base": reverso_artefacts_dir / "tiny_reverso_full", +} + +# default variant for tests that don't parametrize +reverso_local_dir = reverso_variant_dirs["shinfxh/reverso-small"] + + +def reverso_mock_download( + repo_id: str, + filename: str, + revision: str | None, + local_dir: str | Path | None, + **kwargs, +): + variant_dir = reverso_variant_dirs.get(repo_id, reverso_local_dir) + path = variant_dir / filename + if local_dir is None: + return str(path) + else: + dest_path = Path(local_dir) / filename + shutil.copy(path, dest_path) + return str(dest_path) + + +class TestReversoModel: + series = generate_series(n_variables=2, length=100, prefix="A") + + @patch( + "darts.models.components.huggingface_connector.hf_hub_download", + side_effect=reverso_mock_download, + ) + @pytest.mark.parametrize( + "hub_model_name", + ["shinfxh/reverso-nano", "shinfxh/reverso-small", "shinfxh/reverso-base"], + ) + def test_all_variants(self, mock_method, hub_model_name): + """Test that all three Reverso variants can load, fit, and predict.""" + model = ReversoModel( + input_chunk_length=12, + output_chunk_length=6, + hub_model_name=hub_model_name, + **tfm_kwargs, + ) + mock_method.assert_called() + + with patch("pytorch_lightning.Trainer.fit") as mock_fit: + model.fit(series=self.series) + mock_fit.assert_not_called() + + assert model.model_created + assert not model.supports_probabilistic_prediction + + pred = model.predict(n=6) + assert isinstance(pred, TimeSeries) + assert len(pred) == 6 + assert pred.n_components == self.series.n_components + + @patch( + "darts.models.components.huggingface_connector.hf_hub_download", + side_effect=reverso_mock_download, + ) + def test_invalid_params(self, mock_method): + # input_chunk_length exceeds model's seq_len (32) + with pytest.raises(ValueError, match="cannot be greater than"): + _ = ReversoModel( + input_chunk_length=64, + output_chunk_length=6, + **tfm_kwargs, + ) + + # output_chunk_length exceeds model's output_token_len (8) + with pytest.raises(ValueError, match="cannot be greater than"): + _ = ReversoModel( + input_chunk_length=12, + output_chunk_length=10, + **tfm_kwargs, + ) + + @patch( + "darts.models.components.huggingface_connector.hf_hub_download", + side_effect=reverso_mock_download, + ) + def test_local_dir(self, mock_method): + model = ReversoModel( + input_chunk_length=12, + output_chunk_length=6, + local_dir=reverso_local_dir, + **tfm_kwargs, + ) + model.fit(series=self.series) + pred = model.predict(n=6) + assert isinstance(pred, TimeSeries) + assert len(pred) == 6 + assert pred.n_components == self.series.n_components + + @patch( + "darts.models.components.huggingface_connector.hf_hub_download", + side_effect=reverso_mock_download, + ) + def test_no_covariates(self, mock_method): + model = ReversoModel( + input_chunk_length=12, + output_chunk_length=6, + **tfm_kwargs, + ) + assert model.supports_past_covariates is False + assert model.supports_future_covariates is False + + @patch( + "darts.models.components.huggingface_connector.hf_hub_download", + side_effect=reverso_mock_download, + ) + def test_autoregressive_prediction(self, mock_method): + """Test that predictions with n > output_chunk_length work via autoregression.""" + model = ReversoModel( + input_chunk_length=12, + output_chunk_length=6, + **tfm_kwargs, + ) + model.fit(series=self.series) + + # n=12 > output_chunk_length=6, requires autoregression + pred = model.predict(n=12) + assert isinstance(pred, TimeSeries) + assert len(pred) == 12 + assert pred.n_components == self.series.n_components diff --git a/docs/source/index.rst b/docs/source/index.rst index 900c189ae1..f0b3656903 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -675,6 +675,12 @@ Our regression models are designed to predict continuous numerical values, makin - ✅ ✅ - ✅ - `TimesFM 1.0 paper `_, `Google blog post `_ + * - `ReversoModel `_ + - ✅ 🔴 + - 🔴 🔴 🔴 + - 🔴 🔴 + - ✅ + - `Reverso paper `_, `GitHub `_ * - **Ensemble Models** (`GlobalForecastingModel `_): Model support is dependent on ensembled forecasting models and the ensemble model itself - - diff --git a/docs/userguide/covariates.md b/docs/userguide/covariates.md index 89a493134d..28db308510 100644 --- a/docs/userguide/covariates.md +++ b/docs/userguide/covariates.md @@ -159,6 +159,7 @@ GFMs are models that can be trained on multiple target (and covariate) time seri | [NeuralForecastModel](https://unit8co.github.io/darts/generated_api/darts.models.forecasting.nf_model.html#darts.models.forecasting.nf_model.NeuralForecastModel) (g) | ✅ | ✅ | ✅ | | [Chronos2Model](https://unit8co.github.io/darts/generated_api/darts.models.forecasting.chronos2_model.html#darts.models.forecasting.chronos2_model.Chronos2Model) | ✅ | ✅ | | | [TimesFM2p5Model](https://unit8co.github.io/darts/generated_api/darts.models.forecasting.timesfm2p5_model.html#darts.models.forecasting.timesfm2p5_model.TimesFM2p5Model) | | | | +| [ReversoModel](https://unit8co.github.io/darts/generated_api/darts.models.forecasting.reverso_model.html#darts.models.forecasting.reverso_model.ReversoModel) | | | | | Ensemble Models (h) | ✅ | ✅ | ✅ | | Conformal Prediction Models (i) | ✅ | ✅ | ✅ |