Source code for causalexplain.estimators.cam.selGam

"""
This Python version aims to replicate the functionality of the R function.
Here are some key points about the translation:

1. We use NumPy for array operations.
2. Instead of `gam` from R, we use `pygam` library which provides similar
    functionality in Python.
3. The p-values are extracted from the fitted GAM model's statistics.
4. The logic for creating and updating `selVec` is adjusted to work with Python's
    0-based indexing.

Note that this translation assumes that the `pygam` library is installed and imported.
You may need to install it using `pip install pygam`.

Also, be aware that there might be some differences in the exact implementation
details between R's `gam` and Python's `pygam`. You may need to fine-tune the GAM
model creation and fitting process to match the exact behavior of the R version.
"""
import numpy as np

from causalexplain.estimators.cam.train_gam import train_gam


[docs] def selGam(X, pars=None, verbose=False, k=None): """ This method selects features based on GAM p-values. It returns a vector of selected features whose p-values are less than the cutOffPVal. Args: X (_type_): _description_ pars (_type_, optional): _description_. Defaults to None. verbose (bool, optional): _description_. Defaults to False. k (_type_, optional): _description_. Defaults to None. Returns: _type_: _description_ """ if pars is None: pars = {'cutOffPVal': 0.001, 'numBasisFcts': 10} X = np.asarray(X) p = X.shape if p[1] > 1: selVec = [False] * p[1] X_without_k = np.delete(X, k-1, axis=1) y = X[:, k-1] # Use train_gam function instead of directly creating and fitting GAM gam_result = train_gam(X_without_k, y, pars=pars, verbose=verbose) # Extract p-values from the gam_result. # PyGAM returns the p-values of all predictors, followed by the p-value of # the intercept, which we don't need. pValVec = gam_result['p_values'] pValVec = pValVec[:-1] if len(pValVec) != len(selVec) - 1: print("This should never happen (function selGam).") selVec_without_k = [p < pars['cutOffPVal'] for p in pValVec] selVec[:k] = selVec_without_k[:k] selVec[k+1:] = selVec_without_k[k:] if verbose: print(f". . . . . SelGAM(k={k})") print(f". . . . . . Vector of p-values: {pValVec}") print(f". . . . . . Selected indices: {selVec_without_k}") else: selVec = [] return selVec