Source code for causalexplain.estimators.fci.fci

"""
FCI algorithm.

A graph learner which wraps the implementation of the FCI algorithm
in the (c) causallearn library (https://github.com/py-why/causal-learn).

"""

# pylint: disable=E1101:no-member
# pylint: disable=W0201:attribute-defined-outside-init, W0511:fixme
# pylint: disable=W0106:expression-not-assigned
# pylint: disable=C0103:invalid-name, C0116:missing-function-docstring
# pylint: disable=R0913:too-many-arguments, R0902:too-many-instance-attributes
# pylint: disable=R0914:too-many-locals, R0915:too-many-statements
# pylint: disable=R1702:too-many-branches

import networkx as nx
import numpy as np
import pandas as pd
from causallearn.search.ConstraintBased.FCI import fci

from causalexplain.common import utils
from causalexplain.metrics.compare_graphs import evaluate_graph


[docs] class FCI: """ A graph learner which wraps the implementation of the FCI algorithm in (c) causallearn library (https://github.com/py-why/causal-learn). """
[docs] def __init__( self, name: str, independence_test_method: str = "fisherz", alpha: float = 0.05, depth: int = -1, max_path_length: int = -1, verbose: bool = False, background_knowledge=None, show_progress: bool = True, node_names=None, causes_only=True): """ Initialize the FCI algorithm creating an FCI learner. Parameters ---------- name: str, name of the experiment independence_test_method: str, name of the function of the independence test method being used: [fisherz, chisq, gsq, kci] - fisherz: Fisher's Z conditional independence test - chisq: Chi-squared conditional independence test - gsq: G-squared conditional independence test - kci: Kernel-based conditional independence test alpha: float, desired significance level of independence tests (p_value) in (0,1) depth: The depth for the fast adjacency search, or -1 if unlimited max_path_length: the maximum length of any discriminating path, or -1 if unlimited. verbose: True is verbose output should be printed or logged background_knowledge: background knowledge causes_only: bool, if True, only causes are returned (default: True), by filtering the CPDAG and considering only the edges indicating a causal relationship. """ super().__init__() self.name = name self.independence_test_method = independence_test_method self.alpha = alpha self.depth = depth self.max_path_length = max_path_length self.verbose = verbose self.background_knowledge = background_knowledge self.prog_bar = show_progress self.node_names = node_names self.causes_only = causes_only
[docs] def fit_predict(self, X: pd.DataFrame, X_test: pd.DataFrame = None, ref_graph: nx.DiGraph = None): """Fit the model and return the estimated graph.""" graph, edges = fci( X.values, indep_test_method=self.independence_test_method, alpha=self.alpha, depth=self.depth, max_path_length=self.max_path_length, verbose=False, show_progress=False, background_knowledge=self.background_knowledge, node_names=self.node_names ) if self.causes_only: adjacency_matrix = self.filter_causes_only( adj_matrix=graph.graph) self.dag = utils.graph_from_adjacency( adjacency_matrix, node_labels=list(X.columns)) else: self.dag = utils.graph_from_adjacency( graph.graph, node_labels=list(X.columns)) self.metrics = evaluate_graph(ref_graph, self.dag) return self
[docs] def filter_causes_only(self, adj_matrix): """ Filter the adjacency matrix to include only causal relationships. This method processes the input adjacency matrix and creates a new matrix that only includes direct causal relationships (i.e., where i causes j). Parameters: ----------- adj_matrix : numpy.ndarray The input adjacency matrix to be filtered. Returns: -------- numpy.ndarray A new adjacency matrix with only causal relationships preserved. """ result_matrix = np.zeros_like(adj_matrix) # Iterate over each pair (i, j) in the matrix for i in range(adj_matrix.shape[0]): for j in range(adj_matrix.shape[1]): # If G[i, j] = -1 and G[j, i] = 1, set result_matrix[i, j] to 1 if (adj_matrix[i, j] == -1 and adj_matrix[j, i] == 1) \ or (adj_matrix[i, j] == 2 and adj_matrix[j, i] == 1): result_matrix[i, j] = 1 return result_matrix
[docs] def main(dataset_name, input_path="/Users/renero/phd/data/sachs/", output_path="/Users/renero/phd/output/RC4/sachs/compared/", save=False, **kwargs): """ Create a call to FCI with a sample dataset. """ data = pd.read_csv(f"{input_path}{dataset_name}.csv") ref_graph = utils.graph_from_dot_file(f"{input_path}{dataset_name}.dot") fci = FCI(name=dataset_name, **kwargs) fci.fit_predict(X=data, ref_graph=ref_graph) if fci.dag: for edge in fci.dag.edges(): print(edge) else: for edge in fci.pag.edges(): print(edge) if fci.metrics: print(fci.metrics) else: print("No metrics available")
# if save: # where_to = utils.save_experiment(rex.name, output_path, rex) # print(f"Saved '{rex.name}' to '{where_to}'") # Create a call to FCI with a sample dataset. if __name__ == "__main__": main("sachs", alpha=0.75, depth=9) # main("rex_generated_linear_1", njobs=1)