Source code for causalexplain.independence.regressors
from typing import List
import numpy as np
import pandas as pd
from pandas import DataFrame
from pygam import LinearGAM
from sklearn.gaussian_process import GaussianProcessRegressor as gpr
from causalexplain.independence.hsic import HSIC
def _fit_GPR(x, y, x_test, y_test):
model = gpr(normalize_y=False)
model.fit(x, y)
prediction = model.predict(x_test).reshape(-1, 1)
residuals = y_test - prediction
return residuals
def _fit_GAM(x, y, x_test, y_test):
model = LinearGAM()
model.gridsearch(x, y, progress=False)
pred = model.predict(x_test)
residuals = np.array([x_test[i] - pred[i] for i in range(x_test.shape[0])])
return residuals
[docs]
def fit_and_get_residuals(
X: np.ndarray,
Y: np.ndarray,
X_test: np.ndarray = None,
Y_test: np.ndarray = None,
method="gpr"
):
"""
Fit a model y ~ f(X), where X is an independent variable and Y is a
dependent one. The model is passed as argument, together with the
training and test sets.
Args:
X: (np.ndarray) The feature to be used as input to predict Y_train
Y: (np.ndarray) The feature to be predicted
X_test: (np.ndarray) The feature to be used as input to predict Y_test
Y_test: (np.ndarray) The feature to be predicted
method: (str) Either "gpr" or "gam"
Returns:
The method returns the residuals and the RMS error.
"""
# Fix dimensions if hasn't been done already.
if np.ndim(X) == 1:
X = X.reshape(-1, 1)
if np.ndim(Y) == 1:
Y = Y.reshape(-1, 1)
if np.ndim(X_test) == 1:
X_test = X_test.reshape(-1, 1)
if np.ndim(Y_test) == 1:
Y_test = Y_test.reshape(-1, 1)
noise = np.random.normal(0, .1, X.shape[0]).reshape(-1, 1)
Y = Y + noise
if X_test is None or Y_test is None:
X_test = X
Y_test = Y
if method == "gpr":
# residuals, XX, YY = _fit_GPR(X, Y, X_test, Y_test)
residuals = _fit_GPR(X, Y, X_test, Y_test)
elif method == "gam":
# residuals, XX, YY = _fit_GAM(X, Y, X_test, Y_test)
residuals = _fit_GAM(X, Y, X_test, Y_test)
else:
raise ValueError(f"Invalid method: {method}")
return residuals
[docs]
def run_feature_selection(X: DataFrame, y: str) -> List:
"""
Extracts 'y' from the list of features of "X" and call the prediction
method passed to asses the predictive influence of each variable in X
to obtain "y".
Args:
X: Dataframe with ALL continous variables
y: the name of the variable in X to be used as target.
predict_method: the method used to predict "y" from "X".
"hsiclasso" or "block_hsic_lasso"
Return:
List: with the predictive score for each variable.
"""
feature_names = list(X.columns.values)
feature_names.remove(y)
df_target = pd.DataFrame(X[y], columns=[y])
df_features = X[feature_names]
y = np.transpose(df_target.values)
X = np.transpose(df_features.values)
hsic = HSIC().fit(X, y)
return hsic.stat