adding docs

Signed-off-by: Nicholas Parente <[email protected]>
py-why · Jan 3, 2025 · e275326 · e275326
1 parent ff50aba
commit e275326
Show file tree

Hide file tree

Showing 10 changed files with 431 additions and 23 deletions.
diff --git a/docs/source/example_notebooks/dowhy_efficient_backdoor_example.ipynb b/docs/source/example_notebooks/dowhy_efficient_backdoor_example.ipynb
@@ -570,7 +570,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.10"
+   "version": "3.11.7"
   },
   "varInspector": {
    "cols": {

diff --git a/docs/source/example_notebooks/dowhy_generalized_covariate_adjustment_example.ipynb b/docs/source/example_notebooks/dowhy_generalized_covariate_adjustment_example.ipynb
diff --git a/docs/source/example_notebooks/identifying_effects_using_id_algorithm.ipynb b/docs/source/example_notebooks/identifying_effects_using_id_algorithm.ipynb
diff --git a/dowhy/causal_identifier/auto_identifier.py b/dowhy/causal_identifier/auto_identifier.py
@@ -81,7 +81,7 @@ class GeneralizedAdjustment(Enum):
 class AutoIdentifier:
     """Class that implements different identification methods.
 
-    Currently supports backdoor and instrumental variable identification methods. The identification is based on the causal graph provided.
+    Currently supports backdoor, general adjustment, and instrumental variable identification methods. The identification is based on the causal graph provided.
 
     This class is for backwards compatibility with CausalModel
     Will be deprecated in the future in favor of function call auto_identify_effect()
@@ -174,6 +174,9 @@ def identify_effect_auto(
     are assumed to be equal to one for all variables in the graph.
     :param conditional_node_names: variables that are used to determine treatment. If none are
     provided, it is assumed that the intervention is static.
+    :param generalized_adjustment: specify whether to return a single minimal adjustment set which
+    matches the general adjustment criterion ("default"), or two exhaustively compute all such adjustment sets ("exhaustive-search"). For now
+    only minimal adjustment sets are supported.
     :returns:  target estimand, an instance of the IdentifiedEstimand class
     """
 
@@ -844,7 +847,10 @@ def identify_frontdoor(
         raise ValueError(f"d-separation algorithm {dseparation_algo} is not supported")
 
     eligible_variables = (
-        get_descendants(graph, action_nodes) - set(outcome_nodes) - set(get_descendants(graph, outcome_nodes))
+        get_descendants(graph, action_nodes)
+        - set(action_nodes)
+        - set(outcome_nodes)
+        - set(get_descendants(graph, outcome_nodes))
     )
     eligible_variables = eligible_variables.intersection(set(observed_nodes))
     set_sizes = range(1, len(eligible_variables) + 1, 1)
@@ -914,6 +920,7 @@ def identify_generalized_adjustment_set(
 
     graph_pbd = get_proper_backdoor_graph(graph, action_nodes, outcome_nodes)
     pcp_nodes = get_proper_causal_path_nodes(graph, action_nodes, outcome_nodes)
+    dpcp_nodes = get_descendants(graph, pcp_nodes).union(pcp_nodes)
 
     if generalized_adjustment == GeneralizedAdjustment.GENERALIZED_ADJUSTMENT_DEFAULT:
         # In default case, we don't exhaustively find all adjustment sets
@@ -922,7 +929,7 @@ def identify_generalized_adjustment_set(
             set(action_nodes),
             set(outcome_nodes),
             # Require the adjustment set to consist only of observed nodes
-            restricted=((set(graph_pbd.nodes) - set(pcp_nodes)) & set(observed_nodes)),
+            restricted=((set(graph_pbd.nodes) - set(dpcp_nodes)) & set(observed_nodes)),
         )
         if adjustment_set is None:
             logger.info("No adjustment sets found.")

diff --git a/dowhy/causal_identifier/identified_estimand.py b/dowhy/causal_identifier/identified_estimand.py
@@ -125,6 +125,9 @@ def __str__(self, only_target_estimand: bool = False, show_all_backdoor_sets: bo
                 # Just show the default backdoor set
                 if k.startswith("backdoor") and k != "backdoor":
                     continue
+            # Just show the default generalized adjustment set
+            if k.startswith("general") and k != "general_adjustment":
+                continue
             if only_target_estimand and k != self.identifier_method:
                 continue
             s += "\n### Estimand : {0}\n".format(i)

diff --git a/dowhy/graph.py b/dowhy/graph.py
@@ -129,6 +129,7 @@ def do_surgery(
 
     node_names = parse_state(node_names)
     new_graph = graph.copy()
+    # new_graph = copy.deepcopy(graph)
     for node_name in node_names:
         if remove_outgoing_edges:
             if remove_only_direct_edges_to_target:
@@ -203,10 +204,15 @@ def get_descendants(graph: nx.DiGraph, nodes):
 
 
 def get_proper_causal_path_nodes(graph: nx.DiGraph, action_nodes, outcome_nodes):
-    # Process is described in van der Zander et al. "Constructing Separators and
-    # Adjustment Sets in Ancestral Graphs", Section 4.1.
+    """Method to get the proper causal path nodes, as described in van der Zander et al. "Constructing Separators and
+    Adjustment Sets in Ancestral Graphs", Section 4.1. We cannot use do_surgery() since we require deep copies of the given graph.
 
-    # We cannot use do_surgery() since we require deep copies of the given graph.
+    :param graph: the causal graph in question
+    :param action_nodes: the action nodes
+    :param outcome_nodes: the outcome nodes
+
+    :returns: the set of nodes that lie on proper causal paths from X to Y
+    """
 
     # 1) Create a pair of modified graphs by removing inbound and outbound arrows from the action nodes, respectively.
     graph_post_interv = copy.deepcopy(graph)  # remove incoming arrows to our action nodes
@@ -224,8 +230,15 @@ def get_proper_causal_path_nodes(graph: nx.DiGraph, action_nodes, outcome_nodes)
 
 
 def get_proper_backdoor_graph(graph: nx.DiGraph, action_nodes, outcome_nodes):
-    # Process is described in van der Zander et al. "Constructing Separators and
-    # Adjustment Sets in Ancestral Graphs", Section 4.1.
+    """Method to get the proper backdoor graph from a causal graph, as described in van der Zander et al. "Constructing Separators and
+    Adjustment Sets in Ancestral Graphs", Section 4.1. We cannot use do_surgery() since we require deep copies of the given graph.
+
+    :param graph: the causal graph in question
+    :param action_nodes: the action nodes
+    :param outcome_nodes: the outcome nodes
+
+    :returns: a new graph which is the proper backdoor graph of the original
+    """
 
     # First we can just call get_proper_causal_path_nodes, then
     # we remove edges from the action_nodes to the proper causal path nodes.

diff --git a/tests/causal_identifiers/base.py b/tests/causal_identifiers/base.py
@@ -38,10 +38,16 @@ def __init__(
         observed_variables,
         valid_frontdoor_sets,
         invalid_frontdoor_sets,
+        action_nodes=None,
+        outcome_nodes=None,
     ):
+        if outcome_nodes is None:
+            outcome_nodes = ["Y"]
+        if action_nodes is None:
+            action_nodes = ["X"]
         self.graph = build_graph_from_str(graph_str)
-        self.action_nodes = ["X"]
-        self.outcome_nodes = ["Y"]
+        self.action_nodes = action_nodes
+        self.outcome_nodes = outcome_nodes
         self.observed_nodes = observed_variables
         self.valid_frontdoor_sets = valid_frontdoor_sets
         self.invalid_frontdoor_sets = invalid_frontdoor_sets
@@ -55,7 +61,7 @@ def __init__(
         action_nodes,
         outcome_nodes,
         minimal_adjustment_sets,
-        exhaustive_adjustment_sets,
+        exhaustive_adjustment_sets=None,
     ):
         self.graph = build_graph_from_str(graph_str)
         self.action_nodes = action_nodes

diff --git a/tests/causal_identifiers/example_graphs.py b/tests/causal_identifiers/example_graphs.py
@@ -410,8 +410,36 @@
         exhaustive_adjustment_sets=[{"Z1", "Z2"}],
     ),
     # Example is selected from Perković et al. "Complete Graphical Characterization and Construction of
+    # Adjustment Sets in Markov Equivalence Classes of Ancestral Graphs", Example 8 (in Section 5).
+    "perkovic_example_8": dict(
+        graph_str="""graph[directed 1 node[id "X1" label "X1"]
+                node[id "X2" label "X2"]
+                node[id "Y" label "Y"]
+                node[id "V1" label "V1"]
+                node[id "V2" label "V2"]
+                node[id "V3" label "V3"]
+                node[id "V4" label "V4"]
+                node[id "V5" label "V5"]
+                node[id "L" label "L"]
+                edge[source "V5" target "X1"]
+                edge[source "V4" target "X1"]
+                edge[source "X1" target "V1"]
+                edge[source "V1" target "V2"]
+                edge[source "V2" target "X2"]
+                edge[source "X2" target "Y"]
+                edge[source "X1" target "V3"]
+                edge[source "V3" target "Y"]
+                edge[source "L" target "V3"]
+                edge[source "L" target "V2"]]
+                """,
+        observed_variables=["V1", "V2", "V3", "V4", "V5" "X1", "X2", "Y"],
+        action_nodes=["X1", "X2"],
+        outcome_nodes=["Y"],
+        minimal_adjustment_sets=[{"V1", "V2"}],
+    ),
+    # Example is selected from Perković et al. "Complete Graphical Characterization and Construction of
     # Adjustment Sets in Markov Equivalence Classes of Ancestral Graphs", Example 9 (in Section 5).
-    "perkovic_example_9_non_backdoor_adjustment_set": dict(
+    "perkovic_example_9": dict(
         graph_str="digraph{V1;V2;V3;X1;X2;Y; X1->Y;V1->X1;V2->X1;V3->V2;V3->Y;X2->V1;X2->Y}",
         observed_variables=["V1", "V2", "V3", "X1", "X2", "Y"],
         action_nodes=["X1", "X2"],
@@ -455,4 +483,15 @@
         valid_frontdoor_sets=[],
         invalid_frontdoor_sets=[{"Z"}, {"M1"}, {"M2"}, {"M1", "M2"}],
     ),
+    # This example is reproduced from the generalized_adjustment examples, and is
+    # added to test that the frontdoor criterion successfully filters out all the action
+    # nodes as ineligible variables for the
+    "perkovic_example_9_multiple_action_nodes_no_frontdoor": dict(
+        graph_str="digraph{V1;V2;V3;X1;X2;Y; X1->Y;V1->X1;V2->X1;V3->V2;V3->Y;X2->V1;X2->Y}",
+        observed_variables=["V1", "V2", "V3", "X1", "X2", "Y"],
+        action_nodes=["X1", "X2"],
+        outcome_nodes=["Y"],
+        valid_frontdoor_sets=[],
+        invalid_frontdoor_sets=[{"V1"}, {"V2"}, {"V3"}],
+    ),
 }
diff --git a/tests/causal_identifiers/test_complete_adjustment_identifier.py b/tests/causal_identifiers/test_complete_adjustment_identifier.py
@@ -2,9 +2,8 @@
 
 import pytest
 
-from dowhy.causal_identifier import AutoIdentifier, GeneralizedAdjustment
+from dowhy.causal_identifier import GeneralizedAdjustment
 from dowhy.causal_identifier.auto_identifier import identify_generalized_adjustment_set
-from dowhy.causal_identifier.identify_effect import EstimandType
 
 from .base import IdentificationTestGeneralCovariateAdjustmentGraphSolution, example_complete_adjustment_graph_solution
 

diff --git a/tests/causal_identifiers/test_frontdoor_identifier.py b/tests/causal_identifiers/test_frontdoor_identifier.py
@@ -20,8 +20,8 @@ def test_identify_frontdoor_functional_api(
         frontdoor_set = identify_frontdoor(
             graph,
             observed_nodes=example_frontdoor_graph_solution.observed_nodes,
-            action_nodes=["X"],
-            outcome_nodes=["Y"],
+            action_nodes=example_frontdoor_graph_solution.action_nodes,
+            outcome_nodes=example_frontdoor_graph_solution.outcome_nodes,
         )
 
         assert (
@@ -40,7 +40,12 @@ def test_identify_frontdoor_causal_model(
         # Building the causal model
         num_samples = 10
         df = pd.DataFrame(np.random.random((num_samples, len(observed_nodes))), columns=observed_nodes)
-        model = CausalModel(data=df, treatment="X", outcome="Y", graph=graph)
+        model = CausalModel(
+            data=df,
+            treatment=example_frontdoor_graph_solution.action_nodes,
+            outcome=example_frontdoor_graph_solution.outcome_nodes,
+            graph=graph,
+        )
         estimand = model.identify_effect()
         frontdoor_set = estimand.frontdoor_variables
         assert (