Source code for causalexplain.estimators.cam.selGam

"""
This Python version aims to replicate the functionality of the R function.
Here are some key points about the translation:

1. We use NumPy for array operations.
2. Instead of `gam` from R, we use `pygam` library which provides similar
    functionality in Python.
3. The p-values are extracted from the fitted GAM model's statistics.
4. The logic for creating and updating `selVec` is adjusted to work with Python's
    0-based indexing.

Note that this translation assumes that the `pygam` library is installed and imported.
You may need to install it using `pip install pygam`.

Also, be aware that there might be some differences in the exact implementation
details between R's `gam` and Python's `pygam`. You may need to fine-tune the GAM
model creation and fitting process to match the exact behavior of the R version.
"""
import numpy as np

from causalexplain.estimators.cam.train_gam import train_gam



[docs]
def selGam(X, pars=None, verbose=False, k=None):
    """
    This method selects features based on GAM p-values. It returns a vector
    of selected features whose p-values are less than the cutOffPVal.

    Args:
        X (_type_): _description_
        pars (_type_, optional): _description_. Defaults to None.
        verbose (bool, optional): _description_. Defaults to False.
        k (_type_, optional): _description_. Defaults to None.

    Returns:
        _type_: _description_
    """
    if pars is None:
        pars = {'cutOffPVal': 0.001, 'numBasisFcts': 10}

    X = np.asarray(X)
    p = X.shape

    if p[1] > 1:
        selVec = [False] * p[1]
        X_without_k = np.delete(X, k-1, axis=1)
        y = X[:, k-1]

        # Use train_gam function instead of directly creating and fitting GAM
        gam_result = train_gam(X_without_k, y, pars=pars, verbose=verbose)

        # Extract p-values from the gam_result.
        # PyGAM returns the p-values of all predictors, followed by the p-value of
        # the intercept, which we don't need.
        pValVec = gam_result['p_values']
        pValVec = pValVec[:-1]


        if len(pValVec) != len(selVec) - 1:
            print("This should never happen (function selGam).")

        selVec_without_k = [p < pars['cutOffPVal'] for p in pValVec]
        selVec[:k] = selVec_without_k[:k]
        selVec[k+1:] = selVec_without_k[k:]
        if verbose:
            print(f". . . . . SelGAM(k={k})")
            print(f". . . . . . Vector of p-values: {pValVec}")
            print(f". . . . . . Selected indices: {selVec_without_k}")

    else:
        selVec = []

    return selVec