Source code for causalexplain.estimators.cam.train_GAMboost

"""
This Python version attempts to replicate the functionality of the R function. 
Here are some key points:

1. We use pandas for data manipulation and sklearn for machine learning components.
2. The bbs function in R is approximated using SplineTransformer from scikit-learn.
3. Instead of mboost_fit, we use GradientBoostingRegressor from scikit-learn.
4. The function returns a dictionary with the same keys as the R version.

Note that this is an approximation, as the exact behavior of bbs and mboost_fit in R 
might differ from the Python implementations. You may need to fine-tune parameters 
or use different libraries for a more exact replication of the R function's behavior.
"""
import numpy as np
import pandas as pd
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import SplineTransformer



[docs]
def train_GAMboost(X, y, pars=None):
    """_summary_

    Args:
        X (_type_): _description_
        y (_type_): _description_
        pars (_type_, optional): _description_. Defaults to None.

    Returns:
        _type_: _description_
    """
    # Convert X to DataFrame if it's not already
    dat = pd.DataFrame(X)

    # Create spline features for each column
    spline_features = []
    for column in dat.columns:
        spline = SplineTransformer(n_knots=5, degree=3)
        spline_features.append(spline.fit_transform(dat[[column]]))

    # Concatenate all spline features
    X_spline = np.hstack(spline_features)

    # Fit Gradient Boosting model
    gb = GradientBoostingRegressor()
    gb.fit(X_spline, y)

    # Prepare results
    result = {
        'Yfit': gb.predict(X_spline),
        'residuals': y - gb.predict(X_spline),
        'model': gb
    }

    return result