diff --git a/docs/changelogs/v0.0.25.md b/docs/changelogs/v0.0.25.md index 3565fc5..e43674d 100644 --- a/docs/changelogs/v0.0.25.md +++ b/docs/changelogs/v0.0.25.md @@ -1,5 +1,6 @@ ### Features + * **TimeGPT finetuning**: Finetuning is now supported for TimeGPT. You can adapt the pre-trained model to your data before forecasting via `TimeGPTFinetuningConfig`, with options for loss function and finetuning depth. See [#332](https://github.com/TimeCopilot/timecopilot/pull/332) and the [Finetuning Foundation Models](https://timecopilot.dev/examples/finetuning/) example for a full walkthrough. ```python diff --git a/docs/changelogs/v0.0.26.md b/docs/changelogs/v0.0.26.md index 44d9633..891b1c2 100644 --- a/docs/changelogs/v0.0.26.md +++ b/docs/changelogs/v0.0.26.md @@ -1,6 +1,29 @@ ### Features -* **Prediction intervals for AutoLGBM, AutoNHITS, and AutoTFT**: These models now support quantile forecasts via the `quantiles` parameter. Pass a list of floats between 0 and 1 to receive additional output columns named `model-q-{percentile}`. Note that `level` is not supported for these models; use `quantiles` instead. +* **New ML models**: Added 7 new auto ML models powered by `mlforecast`'s hyperparameter optimization: `AutoLinearRegression`, `AutoXGBoost`, `AutoRidge`, `AutoLasso`, `AutoElasticNet`, `AutoRandomForest`, and `AutoCatboost`. All models support `quantiles` for probabilistic forecasts via conformal prediction and follow the same interface as the existing `AutoLGBM`. + + ```python + import pandas as pd + from timecopilot.models.ml import ( + AutoLinearRegression, + AutoXGBoost, + AutoRidge, + AutoLasso, + AutoElasticNet, + AutoRandomForest, + AutoCatboost, + ) + + df = pd.read_csv( + "https://timecopilot.s3.amazonaws.com/public/data/air_passengers.csv", + parse_dates=["ds"], + ) + + model = AutoRidge() + fcst_df = model.forecast(df, h=12, quantiles=[0.1, 0.5, 0.9]) + ``` + +* **Quantile forecasts for AutoLGBM, AutoNHITS, and AutoTFT**: These models now support quantile forecasts via the `quantiles` parameter. Pass a list of floats between 0 and 1 to receive additional output columns named `model-q-{percentile}`. Note that `level` is not supported for these models; use `quantiles` instead. - `AutoLGBM` computes prediction intervals via conformal prediction using cross-validation residuals. - `AutoNHITS` and `AutoTFT` are trained with [`MQLoss`](https://nixtla.github.io/neuralforecast/losses.pytorch.html) when quantiles are requested. @@ -10,8 +33,10 @@ from timecopilot.models.ml import AutoLGBM from timecopilot.models.neural import AutoNHITS, AutoTFT - df = pd.read_csv("AirPassengers.csv", parse_dates=["ds"]) - df.insert(0, "unique_id", "AirPassengers") + df = pd.read_csv( + "https://timecopilot.s3.amazonaws.com/public/data/air_passengers.csv", + parse_dates=["ds"], + ) model = AutoLGBM() fcst_df = model.forecast(df, h=12, quantiles=[0.1, 0.5, 0.9]) diff --git a/pyproject.toml b/pyproject.toml index cca0155..b6309e3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,6 +61,7 @@ dependencies = [ "accelerate>=1.10.1", "arch>=7.2.0", "black>=25.9.0", + "catboost>=1.2.10", "datasets>=4.1.1", "fire", "fsspec>=2025.9.0", @@ -103,6 +104,7 @@ dependencies = [ "tsfeatures>=0.4.5", "utilsforecast[plotting]>=0.2.15", "wandb==0.22.1", + "xgboost>=3.2.0", ] description = "The GenAI Forecasting Agent · LLMs × Time Series Foundation Models" license = "MIT" diff --git a/tests/models/conftest.py b/tests/models/conftest.py index 6ed9735..7b9033b 100644 --- a/tests/models/conftest.py +++ b/tests/models/conftest.py @@ -9,7 +9,12 @@ from timecopilot.models.foundation.patchtst_fm import PatchTSTFM from timecopilot.models.foundation.timesfm import TimesFM from timecopilot.models.foundation.toto import Toto -from timecopilot.models.ml import AutoLGBM +from timecopilot.models.ml import ( + AutoElasticNet, + AutoLGBM, + AutoLinearRegression, + AutoXGBoost, +) from timecopilot.models.neural import AutoNHITS, AutoTFT from timecopilot.models.prophet import Prophet from timecopilot.models.stats import ( @@ -39,6 +44,9 @@ def disable_mps_session(monkeypatch): models = [ AutoLGBM(num_samples=2, cv_n_windows=2), + AutoLinearRegression(num_samples=2, cv_n_windows=2), + AutoXGBoost(num_samples=2, cv_n_windows=2), + AutoElasticNet(num_samples=2, cv_n_windows=2), AutoNHITS( num_samples=2, config=dict( diff --git a/tests/models/test_models.py b/tests/models/test_models.py index f120241..821bc87 100644 --- a/tests/models/test_models.py +++ b/tests/models/test_models.py @@ -99,8 +99,18 @@ def test_freq_inferred_correctly(model, freq): ) @pytest.mark.parametrize("h", [1, 12]) def test_correct_forecast_dates(model, freq, h): - if model.alias in ["AutoLGBM", "AutoNHITS", "AutoTFT"]: - # AutoLGBM requires a certain minimum length + _ml_auto_aliases = { + "AutoLGBM", + "AutoLinearRegression", + "AutoXGBoost", + "AutoRidge", + "AutoLasso", + "AutoElasticNet", + "AutoRandomForest", + "AutoCatboost", + } + if model.alias in _ml_auto_aliases | {"AutoNHITS", "AutoTFT"}: + # These auto ML and neural models require a longer minimum series length sizes_per_freq = { freq: 1_000 for freq in ["10S", "10T", "15T", "5T", "H", "Q-DEC"] } @@ -231,11 +241,19 @@ def test_using_quantiles(model): def test_using_level(model): level = [0, 20, 40, 60, 80] # corresponds to qs [0.1, 0.2, ..., 0.9] df = generate_series(n_series=2, freq="D") - if model.alias in [ + _level_unsupported = { "AutoLGBM", + "AutoLinearRegression", + "AutoXGBoost", + "AutoRidge", + "AutoLasso", + "AutoElasticNet", + "AutoRandomForest", + "AutoCatboost", "AutoNHITS", "AutoTFT", - ]: + } + if model.alias in _level_unsupported: # These models do not support levels yet with pytest.raises(ValueError) as excinfo: model.forecast( diff --git a/timecopilot/models/__init__.py b/timecopilot/models/__init__.py index 02bd57a..b9d819c 100644 --- a/timecopilot/models/__init__.py +++ b/timecopilot/models/__init__.py @@ -1,3 +1,13 @@ +from .ml import ( + AutoCatboost, + AutoElasticNet, + AutoLasso, + AutoLGBM, + AutoLinearRegression, + AutoRandomForest, + AutoRidge, + AutoXGBoost, +) from .stats import ( ADIDA, IMAPA, @@ -14,10 +24,18 @@ __all__ = [ "ADIDA", + "AutoCatboost", + "AutoElasticNet", "IMAPA", "AutoARIMA", "AutoCES", "AutoETS", + "AutoLasso", + "AutoLGBM", + "AutoLinearRegression", + "AutoRandomForest", + "AutoRidge", + "AutoXGBoost", "CrostonClassic", "DynamicOptimizedTheta", "HistoricAverage", diff --git a/timecopilot/models/ml.py b/timecopilot/models/ml.py index 6981ab4..1ad21fc 100644 --- a/timecopilot/models/ml.py +++ b/timecopilot/models/ml.py @@ -1,7 +1,33 @@ import os import pandas as pd -from mlforecast.auto import AutoLightGBM, AutoMLForecast +from mlforecast.auto import ( + AutoCatboost as _AutoCatboost, +) +from mlforecast.auto import ( + AutoElasticNet as _AutoElasticNet, +) +from mlforecast.auto import ( + AutoLasso as _AutoLasso, +) +from mlforecast.auto import ( + AutoLightGBM as _AutoLightGBM, +) +from mlforecast.auto import ( + AutoLinearRegression as _AutoLinearRegression, +) +from mlforecast.auto import ( + AutoMLForecast, +) +from mlforecast.auto import ( + AutoRandomForest as _AutoRandomForest, +) +from mlforecast.auto import ( + AutoRidge as _AutoRidge, +) +from mlforecast.auto import ( + AutoXGBoost as _AutoXGBoost, +) from mlforecast.utils import PredictionIntervals from .utils.forecaster import Forecaster, QuantileConverter, get_seasonality @@ -9,12 +35,54 @@ os.environ["NIXTLA_ID_AS_COL"] = "true" +def run_automlforecast_model( + model, + model_name: str, + df: pd.DataFrame, + h: int, + freq: str, + alias: str, + num_samples: int, + cv_n_windows: int, + level: list[int | float] | None, + quantiles: list[float] | None, +) -> pd.DataFrame: + if level is not None and quantiles is not None: + raise ValueError( + "You must not provide both `level` and `quantiles` simultaneously." + ) + if level is not None: + raise ValueError( + f"Level is not supported for {alias}. " "Please use `quantiles` instead." + ) + qc = QuantileConverter(level=None, quantiles=quantiles) + mf = AutoMLForecast( + models=[model], + freq=freq, + season_length=get_seasonality(freq), + num_threads=-1, + ) + prediction_intervals = ( + PredictionIntervals(n_windows=cv_n_windows) if qc.level is not None else None + ) + mf.fit( + df=df, + n_windows=cv_n_windows, + h=h, + num_samples=num_samples, + prediction_intervals=prediction_intervals, + ) + fcst_df = mf.predict(h=h, level=qc.level) + fcst_df.columns = [c.replace(model_name, alias) for c in fcst_df.columns] + fcst_df = qc.maybe_convert_level_to_quantiles(fcst_df, [alias]) + return fcst_df + + class AutoLGBM(Forecaster): """AutoLGBM forecaster using AutoMLForecast with LightGBM. Notes: - - Level and quantiles are not supported for AutoLGBM yet. Please open - an issue if you need this feature. + - Level is not supported. Use `quantiles` for probabilistic forecasts. - AutoLGBM requires a minimum length for some frequencies. """ @@ -79,38 +147,606 @@ def forecast( For multi-series data, the output retains the same unique identifiers as the input DataFrame. """ - if level is not None and quantiles is not None: - raise ValueError( - "You must not provide both `level` and `quantiles` simultaneously." - ) - if level is not None: - raise ValueError( - "Level is not supported for AutoLGBM. Please use `quantiles` instead." - ) + freq = self._maybe_infer_freq(df, freq) + return run_automlforecast_model( + model=_AutoLightGBM(), + model_name="AutoLightGBM", + df=df, + h=h, + freq=freq, + alias=self.alias, + num_samples=self.num_samples, + cv_n_windows=self.cv_n_windows, + level=level, + quantiles=quantiles, + ) + + +class AutoLinearRegression(Forecaster): + """AutoLinearRegression forecaster using AutoMLForecast with LinearRegression. + + Notes: + - Level is not supported. Use `quantiles` for probabilistic forecasts. + - AutoLinearRegression requires a minimum length for some frequencies. + """ + + def __init__( + self, + alias: str = "AutoLinearRegression", + num_samples: int = 10, + cv_n_windows: int = 5, + ): + self.alias = alias + self.num_samples = num_samples + self.cv_n_windows = cv_n_windows + + def forecast( + self, + df: pd.DataFrame, + h: int, + freq: str | None = None, + level: list[int | float] | None = None, + quantiles: list[float] | None = None, + ) -> pd.DataFrame: + """Generate forecasts for time series data using the model. + + This method produces point forecasts and, optionally, quantile + forecasts. The input DataFrame can contain one or multiple time series + in stacked (long) format. + + Args: + df (pd.DataFrame): + DataFrame containing the time series to forecast. It must + include as columns: + + - "unique_id": an ID column to distinguish multiple series. + - "ds": a time column indicating timestamps or periods. + - "y": a target column with the observed values. + + h (int): + Forecast horizon specifying how many future steps to predict. + freq (str, optional): + Frequency of the time series (e.g. "D" for daily, "M" for + monthly). See [Pandas frequency aliases](https://pandas.pydata.org/ + pandas-docs/stable/user_guide/timeseries.html#offset-aliases) for + valid values. If not provided, the frequency will be inferred + from the data. + level (list[int | float], optional): + Not supported for AutoLinearRegression. Use `quantiles` instead. + quantiles (list[float], optional): + List of quantiles to forecast, expressed as floats between 0 + and 1. Should not be used simultaneously with `level`. When + provided, the output DataFrame will contain additional columns + named in the format "model-q-{percentile}", where {percentile} + = 100 × quantile value. Prediction intervals are computed via + conformal prediction using cross-validation residuals. + + Returns: + pd.DataFrame: + DataFrame containing forecast results. Includes: + + - point forecasts for each timestamp and series. + - quantile forecasts if `quantiles` is specified. + + For multi-series data, the output retains the same unique + identifiers as the input DataFrame. + """ + freq = self._maybe_infer_freq(df, freq) + return run_automlforecast_model( + model=_AutoLinearRegression(), + model_name="AutoLinearRegression", + df=df, + h=h, + freq=freq, + alias=self.alias, + num_samples=self.num_samples, + cv_n_windows=self.cv_n_windows, + level=level, + quantiles=quantiles, + ) + + +class AutoXGBoost(Forecaster): + """AutoXGBoost forecaster using AutoMLForecast with XGBoost. + + Notes: + - Level is not supported. Use `quantiles` for probabilistic forecasts. + - AutoXGBoost requires a minimum length for some frequencies. + - Requires the `xgboost` package to be installed. + """ + + def __init__( + self, + alias: str = "AutoXGBoost", + num_samples: int = 10, + cv_n_windows: int = 5, + ): + self.alias = alias + self.num_samples = num_samples + self.cv_n_windows = cv_n_windows + + def forecast( + self, + df: pd.DataFrame, + h: int, + freq: str | None = None, + level: list[int | float] | None = None, + quantiles: list[float] | None = None, + ) -> pd.DataFrame: + """Generate forecasts for time series data using the model. + + This method produces point forecasts and, optionally, quantile + forecasts. The input DataFrame can contain one or multiple time series + in stacked (long) format. + + Args: + df (pd.DataFrame): + DataFrame containing the time series to forecast. It must + include as columns: + + - "unique_id": an ID column to distinguish multiple series. + - "ds": a time column indicating timestamps or periods. + - "y": a target column with the observed values. + + h (int): + Forecast horizon specifying how many future steps to predict. + freq (str, optional): + Frequency of the time series (e.g. "D" for daily, "M" for + monthly). See [Pandas frequency aliases](https://pandas.pydata.org/ + pandas-docs/stable/user_guide/timeseries.html#offset-aliases) for + valid values. If not provided, the frequency will be inferred + from the data. + level (list[int | float], optional): + Not supported for AutoXGBoost. Use `quantiles` instead. + quantiles (list[float], optional): + List of quantiles to forecast, expressed as floats between 0 + and 1. Should not be used simultaneously with `level`. When + provided, the output DataFrame will contain additional columns + named in the format "model-q-{percentile}", where {percentile} + = 100 × quantile value. Prediction intervals are computed via + conformal prediction using cross-validation residuals. + + Returns: + pd.DataFrame: + DataFrame containing forecast results. Includes: + + - point forecasts for each timestamp and series. + - quantile forecasts if `quantiles` is specified. + + For multi-series data, the output retains the same unique + identifiers as the input DataFrame. + """ + freq = self._maybe_infer_freq(df, freq) + return run_automlforecast_model( + model=_AutoXGBoost(), + model_name="AutoXGBoost", + df=df, + h=h, + freq=freq, + alias=self.alias, + num_samples=self.num_samples, + cv_n_windows=self.cv_n_windows, + level=level, + quantiles=quantiles, + ) + + +class AutoRidge(Forecaster): + """AutoRidge forecaster using AutoMLForecast with Ridge regression. + + Notes: + - Level is not supported. Use `quantiles` for probabilistic forecasts. + - AutoRidge requires a minimum length for some frequencies. + """ + + def __init__( + self, + alias: str = "AutoRidge", + num_samples: int = 10, + cv_n_windows: int = 5, + ): + self.alias = alias + self.num_samples = num_samples + self.cv_n_windows = cv_n_windows + + def forecast( + self, + df: pd.DataFrame, + h: int, + freq: str | None = None, + level: list[int | float] | None = None, + quantiles: list[float] | None = None, + ) -> pd.DataFrame: + """Generate forecasts for time series data using the model. + + This method produces point forecasts and, optionally, quantile + forecasts. The input DataFrame can contain one or multiple time series + in stacked (long) format. + + Args: + df (pd.DataFrame): + DataFrame containing the time series to forecast. It must + include as columns: + + - "unique_id": an ID column to distinguish multiple series. + - "ds": a time column indicating timestamps or periods. + - "y": a target column with the observed values. + + h (int): + Forecast horizon specifying how many future steps to predict. + freq (str, optional): + Frequency of the time series (e.g. "D" for daily, "M" for + monthly). See [Pandas frequency aliases](https://pandas.pydata.org/ + pandas-docs/stable/user_guide/timeseries.html#offset-aliases) for + valid values. If not provided, the frequency will be inferred + from the data. + level (list[int | float], optional): + Not supported for AutoRidge. Use `quantiles` instead. + quantiles (list[float], optional): + List of quantiles to forecast, expressed as floats between 0 + and 1. Should not be used simultaneously with `level`. When + provided, the output DataFrame will contain additional columns + named in the format "model-q-{percentile}", where {percentile} + = 100 × quantile value. Prediction intervals are computed via + conformal prediction using cross-validation residuals. + + Returns: + pd.DataFrame: + DataFrame containing forecast results. Includes: + + - point forecasts for each timestamp and series. + - quantile forecasts if `quantiles` is specified. + + For multi-series data, the output retains the same unique + identifiers as the input DataFrame. + """ + freq = self._maybe_infer_freq(df, freq) + return run_automlforecast_model( + model=_AutoRidge(), + model_name="AutoRidge", + df=df, + h=h, + freq=freq, + alias=self.alias, + num_samples=self.num_samples, + cv_n_windows=self.cv_n_windows, + level=level, + quantiles=quantiles, + ) + + +class AutoLasso(Forecaster): + """AutoLasso forecaster using AutoMLForecast with Lasso regression. + + Notes: + - Level is not supported. Use `quantiles` for probabilistic forecasts. + - AutoLasso requires a minimum length for some frequencies. + """ + + def __init__( + self, + alias: str = "AutoLasso", + num_samples: int = 10, + cv_n_windows: int = 5, + ): + self.alias = alias + self.num_samples = num_samples + self.cv_n_windows = cv_n_windows + + def forecast( + self, + df: pd.DataFrame, + h: int, + freq: str | None = None, + level: list[int | float] | None = None, + quantiles: list[float] | None = None, + ) -> pd.DataFrame: + """Generate forecasts for time series data using the model. + + This method produces point forecasts and, optionally, quantile + forecasts. The input DataFrame can contain one or multiple time series + in stacked (long) format. + + Args: + df (pd.DataFrame): + DataFrame containing the time series to forecast. It must + include as columns: + + - "unique_id": an ID column to distinguish multiple series. + - "ds": a time column indicating timestamps or periods. + - "y": a target column with the observed values. + + h (int): + Forecast horizon specifying how many future steps to predict. + freq (str, optional): + Frequency of the time series (e.g. "D" for daily, "M" for + monthly). See [Pandas frequency aliases](https://pandas.pydata.org/ + pandas-docs/stable/user_guide/timeseries.html#offset-aliases) for + valid values. If not provided, the frequency will be inferred + from the data. + level (list[int | float], optional): + Not supported for AutoLasso. Use `quantiles` instead. + quantiles (list[float], optional): + List of quantiles to forecast, expressed as floats between 0 + and 1. Should not be used simultaneously with `level`. When + provided, the output DataFrame will contain additional columns + named in the format "model-q-{percentile}", where {percentile} + = 100 × quantile value. Prediction intervals are computed via + conformal prediction using cross-validation residuals. + + Returns: + pd.DataFrame: + DataFrame containing forecast results. Includes: + + - point forecasts for each timestamp and series. + - quantile forecasts if `quantiles` is specified. + + For multi-series data, the output retains the same unique + identifiers as the input DataFrame. + """ + freq = self._maybe_infer_freq(df, freq) + return run_automlforecast_model( + model=_AutoLasso(), + model_name="AutoLasso", + df=df, + h=h, + freq=freq, + alias=self.alias, + num_samples=self.num_samples, + cv_n_windows=self.cv_n_windows, + level=level, + quantiles=quantiles, + ) + +class AutoElasticNet(Forecaster): + """AutoElasticNet forecaster using AutoMLForecast with ElasticNet. + + Notes: + - Level is not supported. Use `quantiles` for probabilistic forecasts. + - AutoElasticNet requires a minimum length for some frequencies. + """ + + def __init__( + self, + alias: str = "AutoElasticNet", + num_samples: int = 10, + cv_n_windows: int = 5, + ): + self.alias = alias + self.num_samples = num_samples + self.cv_n_windows = cv_n_windows + + def forecast( + self, + df: pd.DataFrame, + h: int, + freq: str | None = None, + level: list[int | float] | None = None, + quantiles: list[float] | None = None, + ) -> pd.DataFrame: + """Generate forecasts for time series data using the model. + + This method produces point forecasts and, optionally, quantile + forecasts. The input DataFrame can contain one or multiple time series + in stacked (long) format. + + Args: + df (pd.DataFrame): + DataFrame containing the time series to forecast. It must + include as columns: + + - "unique_id": an ID column to distinguish multiple series. + - "ds": a time column indicating timestamps or periods. + - "y": a target column with the observed values. + + h (int): + Forecast horizon specifying how many future steps to predict. + freq (str, optional): + Frequency of the time series (e.g. "D" for daily, "M" for + monthly). See [Pandas frequency aliases](https://pandas.pydata.org/ + pandas-docs/stable/user_guide/timeseries.html#offset-aliases) for + valid values. If not provided, the frequency will be inferred + from the data. + level (list[int | float], optional): + Not supported for AutoElasticNet. Use `quantiles` instead. + quantiles (list[float], optional): + List of quantiles to forecast, expressed as floats between 0 + and 1. Should not be used simultaneously with `level`. When + provided, the output DataFrame will contain additional columns + named in the format "model-q-{percentile}", where {percentile} + = 100 × quantile value. Prediction intervals are computed via + conformal prediction using cross-validation residuals. + + Returns: + pd.DataFrame: + DataFrame containing forecast results. Includes: + + - point forecasts for each timestamp and series. + - quantile forecasts if `quantiles` is specified. + + For multi-series data, the output retains the same unique + identifiers as the input DataFrame. + """ freq = self._maybe_infer_freq(df, freq) - qc = QuantileConverter(level=None, quantiles=quantiles) - mf = AutoMLForecast( - models=[AutoLightGBM()], + return run_automlforecast_model( + model=_AutoElasticNet(), + model_name="AutoElasticNet", + df=df, + h=h, freq=freq, - season_length=get_seasonality(freq), - num_threads=-1, + alias=self.alias, + num_samples=self.num_samples, + cv_n_windows=self.cv_n_windows, + level=level, + quantiles=quantiles, ) - prediction_intervals = ( - PredictionIntervals(n_windows=self.cv_n_windows) - if qc.level is not None - else None + + +class AutoRandomForest(Forecaster): + """AutoRandomForest forecaster using AutoMLForecast with RandomForest. + + Notes: + - Level is not supported. Use `quantiles` for probabilistic forecasts. + - AutoRandomForest requires a minimum length for some frequencies. + """ + + def __init__( + self, + alias: str = "AutoRandomForest", + num_samples: int = 10, + cv_n_windows: int = 5, + ): + self.alias = alias + self.num_samples = num_samples + self.cv_n_windows = cv_n_windows + + def forecast( + self, + df: pd.DataFrame, + h: int, + freq: str | None = None, + level: list[int | float] | None = None, + quantiles: list[float] | None = None, + ) -> pd.DataFrame: + """Generate forecasts for time series data using the model. + + This method produces point forecasts and, optionally, quantile + forecasts. The input DataFrame can contain one or multiple time series + in stacked (long) format. + + Args: + df (pd.DataFrame): + DataFrame containing the time series to forecast. It must + include as columns: + + - "unique_id": an ID column to distinguish multiple series. + - "ds": a time column indicating timestamps or periods. + - "y": a target column with the observed values. + + h (int): + Forecast horizon specifying how many future steps to predict. + freq (str, optional): + Frequency of the time series (e.g. "D" for daily, "M" for + monthly). See [Pandas frequency aliases](https://pandas.pydata.org/ + pandas-docs/stable/user_guide/timeseries.html#offset-aliases) for + valid values. If not provided, the frequency will be inferred + from the data. + level (list[int | float], optional): + Not supported for AutoRandomForest. Use `quantiles` instead. + quantiles (list[float], optional): + List of quantiles to forecast, expressed as floats between 0 + and 1. Should not be used simultaneously with `level`. When + provided, the output DataFrame will contain additional columns + named in the format "model-q-{percentile}", where {percentile} + = 100 × quantile value. Prediction intervals are computed via + conformal prediction using cross-validation residuals. + + Returns: + pd.DataFrame: + DataFrame containing forecast results. Includes: + + - point forecasts for each timestamp and series. + - quantile forecasts if `quantiles` is specified. + + For multi-series data, the output retains the same unique + identifiers as the input DataFrame. + """ + freq = self._maybe_infer_freq(df, freq) + return run_automlforecast_model( + model=_AutoRandomForest(), + model_name="AutoRandomForest", + df=df, + h=h, + freq=freq, + alias=self.alias, + num_samples=self.num_samples, + cv_n_windows=self.cv_n_windows, + level=level, + quantiles=quantiles, ) - mf.fit( + + +class AutoCatboost(Forecaster): + """AutoCatboost forecaster using AutoMLForecast with CatBoost. + + Notes: + - Level is not supported. Use `quantiles` for probabilistic forecasts. + - AutoCatboost requires a minimum length for some frequencies. + - Requires the `catboost` package to be installed. + """ + + def __init__( + self, + alias: str = "AutoCatboost", + num_samples: int = 10, + cv_n_windows: int = 5, + ): + self.alias = alias + self.num_samples = num_samples + self.cv_n_windows = cv_n_windows + + def forecast( + self, + df: pd.DataFrame, + h: int, + freq: str | None = None, + level: list[int | float] | None = None, + quantiles: list[float] | None = None, + ) -> pd.DataFrame: + """Generate forecasts for time series data using the model. + + This method produces point forecasts and, optionally, quantile + forecasts. The input DataFrame can contain one or multiple time series + in stacked (long) format. + + Args: + df (pd.DataFrame): + DataFrame containing the time series to forecast. It must + include as columns: + + - "unique_id": an ID column to distinguish multiple series. + - "ds": a time column indicating timestamps or periods. + - "y": a target column with the observed values. + + h (int): + Forecast horizon specifying how many future steps to predict. + freq (str, optional): + Frequency of the time series (e.g. "D" for daily, "M" for + monthly). See [Pandas frequency aliases](https://pandas.pydata.org/ + pandas-docs/stable/user_guide/timeseries.html#offset-aliases) for + valid values. If not provided, the frequency will be inferred + from the data. + level (list[int | float], optional): + Not supported for AutoCatboost. Use `quantiles` instead. + quantiles (list[float], optional): + List of quantiles to forecast, expressed as floats between 0 + and 1. Should not be used simultaneously with `level`. When + provided, the output DataFrame will contain additional columns + named in the format "model-q-{percentile}", where {percentile} + = 100 × quantile value. Prediction intervals are computed via + conformal prediction using cross-validation residuals. + + Returns: + pd.DataFrame: + DataFrame containing forecast results. Includes: + + - point forecasts for each timestamp and series. + - quantile forecasts if `quantiles` is specified. + + For multi-series data, the output retains the same unique + identifiers as the input DataFrame. + """ + freq = self._maybe_infer_freq(df, freq) + return run_automlforecast_model( + model=_AutoCatboost(), + model_name="AutoCatboost", df=df, - n_windows=self.cv_n_windows, h=h, + freq=freq, + alias=self.alias, num_samples=self.num_samples, - prediction_intervals=prediction_intervals, + cv_n_windows=self.cv_n_windows, + level=level, + quantiles=quantiles, ) - fcst_df = mf.predict(h=h, level=qc.level) - fcst_df.columns = [ - c.replace("AutoLightGBM", self.alias) for c in fcst_df.columns - ] - fcst_df = qc.maybe_convert_level_to_quantiles(fcst_df, [self.alias]) - return fcst_df diff --git a/uv.lock b/uv.lock index 56f293b..32136c9 100644 --- a/uv.lock +++ b/uv.lock @@ -542,6 +542,45 @@ wheels = [ {hash = "sha256:1c76a8960c0041fcc21097e357f882197c79da0dbff766e7317890a65d7d8ba6", size = 11276, upload-time = "2025-08-25T18:57:29.684Z", url = "https://files.pythonhosted.org/packages/6c/56/3124f61d37a7a4e7cc96afc5492c78ba0cb551151e530b54669ddd1436ef/cachetools-6.2.0-py3-none-any.whl"}, ] +[[package]] +dependencies = [ + {marker = "python_full_version < '3.13'", name = "numpy", source = {registry = "https://pypi.org/simple"}, version = "1.26.4"}, + {marker = "python_full_version < '3.13'", name = "pandas", source = {registry = "https://pypi.org/simple"}, version = "2.1.4"}, + {marker = "python_full_version >= '3.13'", name = "numpy", source = {registry = "https://pypi.org/simple"}, version = "2.1.3"}, + {marker = "python_full_version >= '3.13'", name = "pandas", source = {registry = "https://pypi.org/simple"}, version = "2.3.3"}, + {name = "graphviz"}, + {name = "matplotlib"}, + {name = "plotly"}, + {name = "scipy"}, + {name = "six"}, +] +name = "catboost" +sdist = {hash = "sha256:26ae6d423acaf0e9d8160f2477a990431057ed04522d993c2f42dac62743b4f7", size = 39925863, upload-time = "2026-02-18T16:13:29.092Z", url = "https://files.pythonhosted.org/packages/e9/0e/09e8fa0858570fda88090bc3f441b69c18ea3d6f4a02fd41aa5426c157bf/catboost-1.2.10.tar.gz"} +source = {registry = "https://pypi.org/simple"} +version = "1.2.10" +wheels = [ + {hash = "sha256:19de3cb267be3ddb8fd667a87f9e7d3c9ee31783c61ea9e6e6f036f666bddcc3", size = 100245782, upload-time = "2026-02-18T16:11:44.756Z", url = "https://files.pythonhosted.org/packages/e6/4f/7134bf2cfdfe46bbb059fac4ac562ce91586a4eb31ca33cb1b4a3ca298bd/catboost-1.2.10-cp310-cp310-win_amd64.whl"}, + {hash = "sha256:25c9b0dd9afb464efe7ccabf7567241aa566f70e7f77893218cb9fa21663e5d5", size = 96702284, upload-time = "2026-02-18T16:11:34.511Z", url = "https://files.pythonhosted.org/packages/37/5e/4fe404306a4839358e4d196a834765a137be163d2a29b316d01233c3a1e2/catboost-1.2.10-cp310-cp310-manylinux2014_aarch64.whl"}, + {hash = "sha256:39234b3692b6c9002b4a2ac529025fc210dd72feb9b621b27d17c65b7d3e9f92", size = 96704178, upload-time = "2026-02-18T16:12:11.229Z", url = "https://files.pythonhosted.org/packages/33/ae/d33a8feba68fa810b30d70c660e4a2c62299472c2e1aa34406ccce306d13/catboost-1.2.10-cp312-cp312-manylinux2014_aarch64.whl"}, + {hash = "sha256:3efc5e4d414b7c13bff6dd0d6c938cf09bb1445097283c7790e54b8ee461820b", size = 28840256, upload-time = "2026-02-18T16:12:40.153Z", url = "https://files.pythonhosted.org/packages/56/58/f370f6c64db5e7da92e3b88ab62e2df72f113cf5a1eee35b48f69d54accd/catboost-1.2.10-cp314-cp314-macosx_11_0_universal2.whl"}, + {hash = "sha256:41bbe16cab0695978c325a20fa300f92831ed78e9cc8c5fe8047538b4055e98e", size = 100244500, upload-time = "2026-02-18T16:12:04.065Z", url = "https://files.pythonhosted.org/packages/a9/af/36048fdd08eca7876716176c30acf0e7ff1dfb1f53d0b93a021537e26601/catboost-1.2.10-cp311-cp311-win_amd64.whl"}, + {hash = "sha256:42c1b6c7ae5c18cdbe00c8b9493987cc13338fe328baaf1a0b98ddaf58db96a2", size = 97111368, upload-time = "2026-02-18T16:12:32.456Z", url = "https://files.pythonhosted.org/packages/fe/2c/fa0479bd79226f037b495a30696b70741beb198f65227c975005e213aa8e/catboost-1.2.10-cp313-cp313-manylinux2014_x86_64.whl"}, + {hash = "sha256:5319c7f9a7764d7dba04c218fd28383b7267553f83232e8ce8737d6b8d38534d", size = 97152239, upload-time = "2026-02-18T16:11:40.027Z", url = "https://files.pythonhosted.org/packages/bd/c4/2db4b19e21b0620ba8cb706120aeb2649694f96f4b4de7b4678f07a79873/catboost-1.2.10-cp310-cp310-manylinux2014_x86_64.whl"}, + {hash = "sha256:5819a880af6b314f4980e6c26ad0f7552eafcf247d521bc884fe726347fdd87d", size = 97159502, upload-time = "2026-02-18T16:11:59.55Z", url = "https://files.pythonhosted.org/packages/7c/62/5839abf95f9ee4bc2beb1be4a45f1a912859362477a4eb0f4c9d81298f53/catboost-1.2.10-cp311-cp311-manylinux2014_x86_64.whl"}, + {hash = "sha256:59aa166f075f0a5ea57b0ba46e5060bd6a22e849e91e4142f16c2df11295b184", size = 96680675, upload-time = "2026-02-18T16:12:28.407Z", url = "https://files.pythonhosted.org/packages/98/fd/63be2ff7aa9f6a7d63e342f42948259a028bfa50203d5ff687c84804ffb7/catboost-1.2.10-cp313-cp313-manylinux2014_aarch64.whl"}, + {hash = "sha256:5ede858e634d6d0f521bf6dd6fad9374f23d37049ee48e0779ccd2a372632cb1", size = 100201430, upload-time = "2026-02-18T16:12:36.731Z", url = "https://files.pythonhosted.org/packages/69/71/a9e9a06418832fbea9d7cefda585d53395358d498537b6bdd3cf7364cd29/catboost-1.2.10-cp313-cp313-win_amd64.whl"}, + {hash = "sha256:5ffe85f53092219cf65c73c2946426a289ef6f62c119c2bfda52815250d9bcef", size = 96708112, upload-time = "2026-02-18T16:11:53.607Z", url = "https://files.pythonhosted.org/packages/c4/5e/4cb6a2f896b34aaa4afe70491c595f77bef7f9b948d719eda99678847d3d/catboost-1.2.10-cp311-cp311-manylinux2014_aarch64.whl"}, + {hash = "sha256:6b8a7ef11d7a89fc547760cfafeee895011a4b92cc1f60d00235ef80a71158ed", size = 100214655, upload-time = "2026-02-18T16:12:20.046Z", url = "https://files.pythonhosted.org/packages/93/e2/f467a133b37eef2b3d8697d46a6e7f0da24bd3643f5475817c473ffc41dc/catboost-1.2.10-cp312-cp312-win_amd64.whl"}, + {hash = "sha256:7b8cc4ea3a6ac4a8d05f3a79c8ee5454360a0a710fa12444963865ad3f0ddfec", size = 97119495, upload-time = "2026-02-18T16:12:47.557Z", url = "https://files.pythonhosted.org/packages/6b/ac/7effae0e47fd9586e46a796f5af61b730c572570cedee333ee9ba8db85a8/catboost-1.2.10-cp314-cp314-manylinux2014_x86_64.whl"}, + {hash = "sha256:951c5bdf27b8edb6ca624f41134888c666ae68275488803d3c91ce83e154f0c5", size = 101749687, upload-time = "2026-02-18T16:12:51.736Z", url = "https://files.pythonhosted.org/packages/da/b7/8f9e284a9cdd034f01f017dc5dab0da03dc3eac171a2be205745da3becb6/catboost-1.2.10-cp314-cp314-win_amd64.whl"}, + {hash = "sha256:ab2e84237308d62bae236b1ecba2e3867697f96bdbaf0ca68dafc2c886946406", size = 28850304, upload-time = "2026-02-18T16:11:49.153Z", url = "https://files.pythonhosted.org/packages/90/52/b961328a61a31a474c61fdcc1a19a086f7e672fd1becc0f70697a0ccccdb/catboost-1.2.10-cp311-cp311-macosx_11_0_universal2.whl"}, + {hash = "sha256:b27115d5b443048f710001c8ac666892dfe03498492310b00466203c91cc30a5", size = 28884278, upload-time = "2026-02-18T16:12:07.659Z", url = "https://files.pythonhosted.org/packages/bb/52/f5cd568800c87576012d715481730da93bcc34e609c5c204550a9ad0c067/catboost-1.2.10-cp312-cp312-macosx_11_0_universal2.whl"}, + {hash = "sha256:b28f763776e62f50da90dddf73b36399583295032667a7e46fc5c1f2593eb80f", size = 97136498, upload-time = "2026-02-18T16:12:15.339Z", url = "https://files.pythonhosted.org/packages/15/6c/08eabe522ac5cefc605ef81f273d77602130739ec7bcdc0ef192aa0a1f07/catboost-1.2.10-cp312-cp312-manylinux2014_x86_64.whl"}, + {hash = "sha256:bad9a70890cdc591080a908d54a3cd70002ab1e48b2017adff84726da0b3e16d", size = 96688527, upload-time = "2026-02-18T16:12:43.534Z", url = "https://files.pythonhosted.org/packages/9d/74/18597f0b2923e3660cd44f942fe9e7cddaa99afc252bc745c48f79566330/catboost-1.2.10-cp314-cp314-manylinux2014_aarch64.whl"}, + {hash = "sha256:bd3d3b344894f61b5f70124658f302148bb9a51c41d0d5b6c453a72e9dfefc49", size = 28829400, upload-time = "2026-02-18T16:12:23.682Z", url = "https://files.pythonhosted.org/packages/2d/02/3c5f08a7c7969eaa2509d804461db26752fe1c7ecb8ad8510cab51a95fd2/catboost-1.2.10-cp313-cp313-macosx_11_0_universal2.whl"}, + {hash = "sha256:cf54c216f6b3b102e06a5fc42deeb7a2497d622e6bc2e222f586e7e357a942f1", size = 28849868, upload-time = "2026-02-18T16:11:29.502Z", url = "https://files.pythonhosted.org/packages/ca/5b/9086b4183bc3ad17daf4d38489c0c9d4e7e89cd327978e7379aedcd918eb/catboost-1.2.10-cp310-cp310-macosx_11_0_universal2.whl"}, +] + [[package]] name = "certifi" sdist = {hash = "sha256:47c09d31ccf2acf0be3f701ea53595ee7e0b8fa08801c6624be771df09ae7b43", size = 164519, upload-time = "2025-10-05T04:12:15.808Z", url = "https://files.pythonhosted.org/packages/4c/5b/b6ce21586237c77ce67d01dc5507039d444b630dd76611bbca2d8e5dcd91/certifi-2025.10.5.tar.gz"} @@ -1908,6 +1947,15 @@ wheels = [ {hash = "sha256:b8bfcca8c25a2bb253e0e0b0adaf8c00773e5e6af6fd92397576680b807e0fd8", size = 294530, upload-time = "2025-04-14T10:17:01.271Z", url = "https://files.pythonhosted.org/packages/86/f1/62a193f0227cf15a920390abe675f386dec35f7ae3ffe6da582d3ade42c7/googleapis_common_protos-1.70.0-py3-none-any.whl"}, ] +[[package]] +name = "graphviz" +sdist = {hash = "sha256:20743e7183be82aaaa8ad6c93f8893c923bd6658a04c32ee115edb3c8a835f78", size = 200434, upload-time = "2025-06-15T09:35:05.824Z", url = "https://files.pythonhosted.org/packages/f8/b3/3ac91e9be6b761a4b30d66ff165e54439dcd48b83f4e20d644867215f6ca/graphviz-0.21.tar.gz"} +source = {registry = "https://pypi.org/simple"} +version = "0.21" +wheels = [ + {hash = "sha256:54f33de9f4f911d7e84e4191749cac8cc5653f815b06738c54db9a15ab8b1e42", size = 47300, upload-time = "2025-06-15T09:35:04.433Z", url = "https://files.pythonhosted.org/packages/91/4c/e0ce1ef95d4000ebc1c11801f9b944fa5910ecc15b5e351865763d8657f8/graphviz-0.21-py3-none-any.whl"}, +] + [[package]] name = "greenlet" sdist = {hash = "sha256:0dca0d95ff849f9a364385f36ab49f50065d76964944638be9691e1832e9f86d", size = 188260, upload-time = "2025-08-07T13:24:33.51Z", url = "https://files.pythonhosted.org/packages/03/b8/704d753a5a45507a7aab61f18db9509302ed3d0a27ac7e0359ec2905b1a6/greenlet-3.2.4.tar.gz"} @@ -4337,6 +4385,7 @@ name = "nvidia-nccl-cu12" source = {registry = "https://pypi.org/simple"} version = "2.27.3" wheels = [ + {hash = "sha256:9ddf1a245abc36c550870f26d537a9b6087fb2e2e3d6e0ef03374c6fd19d984f", size = 322397768, upload-time = "2025-06-03T21:57:30.234Z", url = "https://files.pythonhosted.org/packages/4b/7b/8354b784cf73b0ba51e566b4baba3ddd44fe8288a3d39ef1e06cd5417226/nvidia_nccl_cu12-2.27.3-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl"}, {hash = "sha256:adf27ccf4238253e0b826bce3ff5fa532d65fc42322c8bfdfaf28024c0fbe039", size = 322364134, upload-time = "2025-06-03T21:58:04.013Z", url = "https://files.pythonhosted.org/packages/5c/5b/4e4fff7bad39adf89f735f2bc87248c81db71205b62bcc0d5ca5b606b3c3/nvidia_nccl_cu12-2.27.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl"}, ] @@ -7164,6 +7213,7 @@ dependencies = [ {name = "accelerate"}, {name = "arch"}, {name = "black"}, + {name = "catboost"}, {name = "datasets"}, {name = "fire"}, {name = "fsspec"}, @@ -7199,6 +7249,7 @@ dependencies = [ {name = "transformers"}, {name = "tsfeatures"}, {name = "wandb"}, + {name = "xgboost"}, ] name = "timecopilot" source = {editable = "."} @@ -7248,6 +7299,7 @@ requires-dist = [ {name = "accelerate", specifier = ">=1.10.1"}, {name = "arch", specifier = ">=7.2.0"}, {name = "black", specifier = ">=25.9.0"}, + {name = "catboost", specifier = ">=1.2.10"}, {name = "datasets", specifier = ">=4.1.1"}, {name = "fire"}, {name = "fsspec", specifier = ">=2025.9.0"}, @@ -7282,6 +7334,7 @@ requires-dist = [ {name = "torchmetrics", specifier = ">=1.8.2"}, {name = "tsfeatures", specifier = ">=0.4.5"}, {name = "wandb", specifier = "==0.22.1"}, + {name = "xgboost", specifier = ">=3.2.0"}, ] [package.metadata.requires-dev] @@ -8358,6 +8411,25 @@ wheels = [ {hash = "sha256:fd341868a4b6714a5962c1af0bd44f7c404ef78720c7de4892901e540417111c", size = 38957, upload-time = "2025-08-12T05:51:54.655Z", url = "https://files.pythonhosted.org/packages/d1/71/e7e7f5670c1eafd9e990438e69d8fb46fa91a50785332e06b560c869454f/wrapt-1.17.3-cp310-cp310-macosx_11_0_arm64.whl"}, ] +[[package]] +dependencies = [ + {marker = "python_full_version < '3.13'", name = "numpy", source = {registry = "https://pypi.org/simple"}, version = "1.26.4"}, + {marker = "python_full_version >= '3.13'", name = "numpy", source = {registry = "https://pypi.org/simple"}, version = "2.1.3"}, + {marker = "sys_platform == 'linux'", name = "nvidia-nccl-cu12"}, + {name = "scipy"}, +] +name = "xgboost" +sdist = {hash = "sha256:99b0e9a2a64896cdaf509c5e46372d336c692406646d20f2af505003c0c5d70d", size = 1263936, upload-time = "2026-02-10T11:03:05.542Z", url = "https://files.pythonhosted.org/packages/91/bb/1eb0242409d22db725d7a88088e6cfd6556829fb0736f9ff69aa9f1e9455/xgboost-3.2.0.tar.gz"} +source = {registry = "https://pypi.org/simple"} +version = "3.2.0" +wheels = [ + {hash = "sha256:0d169736fd836fc13646c7ab787167b3a8110351c2c6bc770c755ee1618f0442", size = 101681668, upload-time = "2026-02-10T10:59:31.202Z", url = "https://files.pythonhosted.org/packages/1f/3d/1661dd114a914a67e3f7ab66fa1382e7599c2a8c340f314ad30a3e2b4d08/xgboost-3.2.0-py3-none-win_amd64.whl"}, + {hash = "sha256:2f661966d3e322536d9c448090a870fcba1e32ee5760c10b7c46bac7a342079a", size = 2507014, upload-time = "2026-02-10T10:50:57.44Z", url = "https://files.pythonhosted.org/packages/2d/49/6e4cdd877c24adf56cb3586bc96d93d4dcd780b5ea1efb32e1ee0de08bae/xgboost-3.2.0-py3-none-macosx_10_15_x86_64.whl"}, + {hash = "sha256:852eabc6d3b3702a59bf78dbfdcd1cb9c4d3a3b6e5ed1f8781d8b9512354fdd2", size = 131100954, upload-time = "2026-02-10T11:02:42.704Z", url = "https://files.pythonhosted.org/packages/96/9f/d9914a7b8df842832850b1a18e5f47aaa071c217cdd1da2ae9deb291018b/xgboost-3.2.0-py3-none-manylinux_2_28_aarch64.whl"}, + {hash = "sha256:99b4a6bbcb47212fec5cf5fbe12347215f073c08967431b0122cfbd1ee70312c", size = 131748579, upload-time = "2026-02-10T10:54:40.424Z", url = "https://files.pythonhosted.org/packages/79/98/679de17c2caa4fd3b0b4386ecf7377301702cb0afb22930a07c142fcb1d8/xgboost-3.2.0-py3-none-manylinux_2_28_x86_64.whl"}, + {hash = "sha256:eabbd40d474b8dbf6cb3536325f9150b9e6f0db32d18de9914fb3227d0bef5b7", size = 2328527, upload-time = "2026-02-10T10:51:17.502Z", url = "https://files.pythonhosted.org/packages/93/f1/c09ef1add609453aa3ba5bafcd0d1c1a805c1263c0b60138ec968f8ec296/xgboost-3.2.0-py3-none-macosx_12_0_arm64.whl"}, +] + [[package]] dependencies = [ {marker = "python_full_version >= '3.11' and python_full_version < '3.13'", name = "numpy", source = {registry = "https://pypi.org/simple"}, version = "1.26.4"},