Theory NotebookMath for LLMs

Causal Discovery

Causal Inference / Causal Discovery

Notesnotes.md Theory Notebooktheory.ipynb Exercises Notebookexercises.ipynb

Theory Notebook

Theory Notebook

Converted from theory.ipynb for web reading.

Causal Discovery

Causal discovery studies when causal graph structure can be inferred from observational, interventional, or multi-environment data under explicit assumptions.

This notebook is the executable companion to notes.md. It uses small synthetic causal systems so every cell runs without external data.

Code cell 2

import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

try:
    import seaborn as sns
    sns.set_theme(style="whitegrid", palette="colorblind")
    HAS_SNS = True
except ImportError:
    plt.style.use("seaborn-v0_8-whitegrid")
    HAS_SNS = False

mpl.rcParams.update({
    "figure.figsize":    (10, 6),
    "figure.dpi":         120,
    "font.size":           13,
    "axes.titlesize":      15,
    "axes.labelsize":      13,
    "xtick.labelsize":     11,
    "ytick.labelsize":     11,
    "legend.fontsize":     11,
    "legend.framealpha":   0.85,
    "lines.linewidth":      2.0,
    "axes.spines.top":     False,
    "axes.spines.right":   False,
    "savefig.bbox":       "tight",
    "savefig.dpi":         150,
})
np.random.seed(42)
print("Plot setup complete.")

Code cell 3


import itertools
import math

COLORS = {
    "primary":   "#0077BB",
    "secondary": "#EE7733",
    "tertiary":  "#009988",
    "error":     "#CC3311",
    "neutral":   "#555555",
    "highlight": "#EE3377",
}

def header(title):
    print("\n" + "=" * 72)
    print(title)
    print("=" * 72)

def check_true(condition, name):
    ok = bool(condition)
    print(f"{'PASS' if ok else 'FAIL'} - {name}")
    assert ok, name

def check_close(value, target, tol=1e-8, name="value"):
    ok = abs(float(value) - float(target)) <= tol
    print(f"{'PASS' if ok else 'FAIL'} - {name}: got {float(value):.6f}, expected {float(target):.6f}")
    assert ok, name

def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))

def simulate_confounding(n=1000):
    z = np.random.binomial(1, 0.5, size=n)
    x = np.random.binomial(1, 0.2 + 0.6 * z)
    y = 1.0 + 2.0 * x + 3.0 * z + np.random.normal(0, 0.2, size=n)
    return z, x, y

def backdoor_adjustment(z, x, y):
    effect = 0.0
    for val in [0, 1]:
        mask_z = z == val
        p_z = np.mean(mask_z)
        y1 = np.mean(y[mask_z & (x == 1)])
        y0 = np.mean(y[mask_z & (x == 0)])
        effect += (y1 - y0) * p_z
    return float(effect)

def ate(y1, y0):
    return float(np.mean(y1 - y0))

def shd(adj_true, adj_hat):
    return int(np.sum(np.asarray(adj_true) != np.asarray(adj_hat)))

def acyclicity_value(w):
    eigvals = np.linalg.eigvals(np.asarray(w) * np.asarray(w))
    return float(np.sum(np.exp(eigvals)).real - w.shape[0])

print("Helper functions ready.")

Demo 1: learning causal graphs from data

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 5

header("Demo 1 - learning causal graphs from data: confounding versus adjustment")
z, x, y = simulate_confounding(n=2000)
naive = float(np.mean(y[x == 1]) - np.mean(y[x == 0]))
adjusted = backdoor_adjustment(z, x, y)
print("Naive difference:", round(naive, 3))
print("Backdoor-adjusted effect:", round(adjusted, 3))
check_true(abs(adjusted - 2.0) < abs(naive - 2.0), "adjustment moves closer to true effect")
print("Causal lesson: conditioning on the right confounder can recover an interventional contrast.")

Demo 2: why discovery is impossible without assumptions

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 7

header("Demo 2 - why discovery is impossible without assumptions: do is not condition")
z, x, y = simulate_confounding(n=3000)
observed = float(np.mean(y[x == 1]))
interventional = float(sum(np.mean(y[(x == 1) & (z == val)]) * np.mean(z == val) for val in [0, 1]))
print("Observed E[Y | X=1]:", round(observed, 3))
print("Adjusted E[Y | do(X=1)]:", round(interventional, 3))
check_true(abs(observed - interventional) > 0.1, "conditioning differs from intervention under confounding")
print("Causal lesson: intervention changes a mechanism, not merely a conditioning event.")

Demo 3: Markov equivalence

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 9

header("Demo 3 - Markov equivalence: potential outcomes")
n = 500
baseline = np.random.normal(0.0, 1.0, size=n)
y0 = baseline + np.random.normal(0, 0.1, size=n)
y1 = y0 + 1.5
effect = ate(y1, y0)
print("ATE:", round(effect, 3))
check_close(effect, 1.5, tol=0.05, name="average treatment effect")
print("Causal lesson: the causal effect compares two potential outcomes for the same units.")

Demo 4: interventions break equivalence

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 11

header("Demo 4 - interventions break equivalence: graph adjacency and SHD")
true_adj = np.array([[0, 1, 1], [0, 0, 1], [0, 0, 0]])
pred_adj = np.array([[0, 1, 0], [0, 0, 1], [0, 0, 0]])
distance = shd(true_adj, pred_adj)
print("Structural Hamming distance:", distance)
check_true(distance == 1, "one edge differs")
fig, ax = plt.subplots()
ax.imshow(true_adj - pred_adj, cmap="RdBu_r", vmin=-1, vmax=1)
ax.set_title("Difference between true and estimated adjacency")
ax.set_xlabel("Child index")
ax.set_ylabel("Parent index")
fig.tight_layout()
plt.show()
plt.close(fig)
print("Causal lesson: discovery is evaluated as graph structure, not just prediction loss.")

Demo 5: discovery as hypothesis generation

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 13

header("Demo 5 - discovery as hypothesis generation: NOTEARS acyclicity")
w_dag = np.array([[0.0, 0.8, 0.0], [0.0, 0.0, 0.5], [0.0, 0.0, 0.0]])
w_cycle = np.array([[0.0, 0.8, 0.0], [0.0, 0.0, 0.5], [0.4, 0.0, 0.0]])
h_dag = acyclicity_value(w_dag)
h_cycle = acyclicity_value(w_cycle)
print("h(W) for DAG:", round(h_dag, 8))
print("h(W) for cyclic graph:", round(h_cycle, 8))
check_true(h_cycle > h_dag, "cycle has larger acyclicity penalty")
print("Causal lesson: some discovery methods encode DAG structure as a smooth constraint.")

Demo 6: DAG and adjacency matrix $A$

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 15

header("Demo 6 - DAG and adjacency matrix $A$: collider warning")
n = 3000
x = np.random.binomial(1, 0.5, size=n)
y = np.random.binomial(1, 0.5, size=n)
c = ((x + y + np.random.binomial(1, 0.15, size=n)) >= 1).astype(int)
unconditional = float(np.corrcoef(x, y)[0, 1])
conditioned = float(np.corrcoef(x[c == 1], y[c == 1])[0, 1])
print("Unconditional correlation:", round(unconditional, 3))
print("Correlation after conditioning on collider:", round(conditioned, 3))
check_true(abs(conditioned) > abs(unconditional), "conditioning on collider creates association")
print("Causal lesson: controlling for more variables can create bias.")

Demo 7: conditional independence oracle

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 17

header("Demo 7 - conditional independence oracle: confounding versus adjustment")
z, x, y = simulate_confounding(n=2000)
naive = float(np.mean(y[x == 1]) - np.mean(y[x == 0]))
adjusted = backdoor_adjustment(z, x, y)
print("Naive difference:", round(naive, 3))
print("Backdoor-adjusted effect:", round(adjusted, 3))
check_true(abs(adjusted - 2.0) < abs(naive - 2.0), "adjustment moves closer to true effect")
print("Causal lesson: conditioning on the right confounder can recover an interventional contrast.")

Demo 8: Markov equivalence class

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 19

header("Demo 8 - Markov equivalence class: do is not condition")
z, x, y = simulate_confounding(n=3000)
observed = float(np.mean(y[x == 1]))
interventional = float(sum(np.mean(y[(x == 1) & (z == val)]) * np.mean(z == val) for val in [0, 1]))
print("Observed E[Y | X=1]:", round(observed, 3))
print("Adjusted E[Y | do(X=1)]:", round(interventional, 3))
check_true(abs(observed - interventional) > 0.1, "conditioning differs from intervention under confounding")
print("Causal lesson: intervention changes a mechanism, not merely a conditioning event.")

Demo 9: CPDAG

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 21

header("Demo 9 - CPDAG: potential outcomes")
n = 500
baseline = np.random.normal(0.0, 1.0, size=n)
y0 = baseline + np.random.normal(0, 0.1, size=n)
y1 = y0 + 1.5
effect = ate(y1, y0)
print("ATE:", round(effect, 3))
check_close(effect, 1.5, tol=0.05, name="average treatment effect")
print("Causal lesson: the causal effect compares two potential outcomes for the same units.")

Demo 10: acyclicity constraint

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 23

header("Demo 10 - acyclicity constraint: graph adjacency and SHD")
true_adj = np.array([[0, 1, 1], [0, 0, 1], [0, 0, 0]])
pred_adj = np.array([[0, 1, 0], [0, 0, 1], [0, 0, 0]])
distance = shd(true_adj, pred_adj)
print("Structural Hamming distance:", distance)
check_true(distance == 1, "one edge differs")
fig, ax = plt.subplots()
ax.imshow(true_adj - pred_adj, cmap="RdBu_r", vmin=-1, vmax=1)
ax.set_title("Difference between true and estimated adjacency")
ax.set_xlabel("Child index")
ax.set_ylabel("Parent index")
fig.tight_layout()
plt.show()
plt.close(fig)
print("Causal lesson: discovery is evaluated as graph structure, not just prediction loss.")

Demo 11: PC algorithm skeleton

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 25

header("Demo 11 - PC algorithm skeleton: NOTEARS acyclicity")
w_dag = np.array([[0.0, 0.8, 0.0], [0.0, 0.0, 0.5], [0.0, 0.0, 0.0]])
w_cycle = np.array([[0.0, 0.8, 0.0], [0.0, 0.0, 0.5], [0.4, 0.0, 0.0]])
h_dag = acyclicity_value(w_dag)
h_cycle = acyclicity_value(w_cycle)
print("h(W) for DAG:", round(h_dag, 8))
print("h(W) for cyclic graph:", round(h_cycle, 8))
check_true(h_cycle > h_dag, "cycle has larger acyclicity penalty")
print("Causal lesson: some discovery methods encode DAG structure as a smooth constraint.")

Demo 12: conditional independence tests

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 27

header("Demo 12 - conditional independence tests: collider warning")
n = 3000
x = np.random.binomial(1, 0.5, size=n)
y = np.random.binomial(1, 0.5, size=n)
c = ((x + y + np.random.binomial(1, 0.15, size=n)) >= 1).astype(int)
unconditional = float(np.corrcoef(x, y)[0, 1])
conditioned = float(np.corrcoef(x[c == 1], y[c == 1])[0, 1])
print("Unconditional correlation:", round(unconditional, 3))
print("Correlation after conditioning on collider:", round(conditioned, 3))
check_true(abs(conditioned) > abs(unconditional), "conditioning on collider creates association")
print("Causal lesson: controlling for more variables can create bias.")

Demo 13: v-structures

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 29

header("Demo 13 - v-structures: confounding versus adjustment")
z, x, y = simulate_confounding(n=2000)
naive = float(np.mean(y[x == 1]) - np.mean(y[x == 0]))
adjusted = backdoor_adjustment(z, x, y)
print("Naive difference:", round(naive, 3))
print("Backdoor-adjusted effect:", round(adjusted, 3))
check_true(abs(adjusted - 2.0) < abs(naive - 2.0), "adjustment moves closer to true effect")
print("Causal lesson: conditioning on the right confounder can recover an interventional contrast.")

Demo 14: orientation rules

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 31

header("Demo 14 - orientation rules: do is not condition")
z, x, y = simulate_confounding(n=3000)
observed = float(np.mean(y[x == 1]))
interventional = float(sum(np.mean(y[(x == 1) & (z == val)]) * np.mean(z == val) for val in [0, 1]))
print("Observed E[Y | X=1]:", round(observed, 3))
print("Adjusted E[Y | do(X=1)]:", round(interventional, 3))
check_true(abs(observed - interventional) > 0.1, "conditioning differs from intervention under confounding")
print("Causal lesson: intervention changes a mechanism, not merely a conditioning event.")

Demo 15: faithfulness and finite-sample failure

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 33

header("Demo 15 - faithfulness and finite-sample failure: potential outcomes")
n = 500
baseline = np.random.normal(0.0, 1.0, size=n)
y0 = baseline + np.random.normal(0, 0.1, size=n)
y1 = y0 + 1.5
effect = ate(y1, y0)
print("ATE:", round(effect, 3))
check_close(effect, 1.5, tol=0.05, name="average treatment effect")
print("Causal lesson: the causal effect compares two potential outcomes for the same units.")

Demo 16: likelihood and BIC scores

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 35

header("Demo 16 - likelihood and BIC scores: graph adjacency and SHD")
true_adj = np.array([[0, 1, 1], [0, 0, 1], [0, 0, 0]])
pred_adj = np.array([[0, 1, 0], [0, 0, 1], [0, 0, 0]])
distance = shd(true_adj, pred_adj)
print("Structural Hamming distance:", distance)
check_true(distance == 1, "one edge differs")
fig, ax = plt.subplots()
ax.imshow(true_adj - pred_adj, cmap="RdBu_r", vmin=-1, vmax=1)
ax.set_title("Difference between true and estimated adjacency")
ax.set_xlabel("Child index")
ax.set_ylabel("Parent index")
fig.tight_layout()
plt.show()
plt.close(fig)
print("Causal lesson: discovery is evaluated as graph structure, not just prediction loss.")

Demo 17: greedy equivalence search

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 37

header("Demo 17 - greedy equivalence search: NOTEARS acyclicity")
w_dag = np.array([[0.0, 0.8, 0.0], [0.0, 0.0, 0.5], [0.0, 0.0, 0.0]])
w_cycle = np.array([[0.0, 0.8, 0.0], [0.0, 0.0, 0.5], [0.4, 0.0, 0.0]])
h_dag = acyclicity_value(w_dag)
h_cycle = acyclicity_value(w_cycle)
print("h(W) for DAG:", round(h_dag, 8))
print("h(W) for cyclic graph:", round(h_cycle, 8))
check_true(h_cycle > h_dag, "cycle has larger acyclicity penalty")
print("Causal lesson: some discovery methods encode DAG structure as a smooth constraint.")

Demo 18: super-exponential DAG search

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 39

header("Demo 18 - super-exponential DAG search: collider warning")
n = 3000
x = np.random.binomial(1, 0.5, size=n)
y = np.random.binomial(1, 0.5, size=n)
c = ((x + y + np.random.binomial(1, 0.15, size=n)) >= 1).astype(int)
unconditional = float(np.corrcoef(x, y)[0, 1])
conditioned = float(np.corrcoef(x[c == 1], y[c == 1])[0, 1])
print("Unconditional correlation:", round(unconditional, 3))
print("Correlation after conditioning on collider:", round(conditioned, 3))
check_true(abs(conditioned) > abs(unconditional), "conditioning on collider creates association")
print("Causal lesson: controlling for more variables can create bias.")

Demo 19: NOTEARS continuous optimization

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 41

header("Demo 19 - NOTEARS continuous optimization: confounding versus adjustment")
z, x, y = simulate_confounding(n=2000)
naive = float(np.mean(y[x == 1]) - np.mean(y[x == 0]))
adjusted = backdoor_adjustment(z, x, y)
print("Naive difference:", round(naive, 3))
print("Backdoor-adjusted effect:", round(adjusted, 3))
check_true(abs(adjusted - 2.0) < abs(naive - 2.0), "adjustment moves closer to true effect")
print("Causal lesson: conditioning on the right confounder can recover an interventional contrast.")

Demo 20: regularization and sparsity

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 43

header("Demo 20 - regularization and sparsity: do is not condition")
z, x, y = simulate_confounding(n=3000)
observed = float(np.mean(y[x == 1]))
interventional = float(sum(np.mean(y[(x == 1) & (z == val)]) * np.mean(z == val) for val in [0, 1]))
print("Observed E[Y | X=1]:", round(observed, 3))
print("Adjusted E[Y | do(X=1)]:", round(interventional, 3))
check_true(abs(observed - interventional) > 0.1, "conditioning differs from intervention under confounding")
print("Causal lesson: intervention changes a mechanism, not merely a conditioning event.")

Demo 21: LiNGAM

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 45

header("Demo 21 - LiNGAM: potential outcomes")
n = 500
baseline = np.random.normal(0.0, 1.0, size=n)
y0 = baseline + np.random.normal(0, 0.1, size=n)
y1 = y0 + 1.5
effect = ate(y1, y0)
print("ATE:", round(effect, 3))
check_close(effect, 1.5, tol=0.05, name="average treatment effect")
print("Causal lesson: the causal effect compares two potential outcomes for the same units.")

Demo 22: additive noise models

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 47

header("Demo 22 - additive noise models: graph adjacency and SHD")
true_adj = np.array([[0, 1, 1], [0, 0, 1], [0, 0, 0]])
pred_adj = np.array([[0, 1, 0], [0, 0, 1], [0, 0, 0]])
distance = shd(true_adj, pred_adj)
print("Structural Hamming distance:", distance)
check_true(distance == 1, "one edge differs")
fig, ax = plt.subplots()
ax.imshow(true_adj - pred_adj, cmap="RdBu_r", vmin=-1, vmax=1)
ax.set_title("Difference between true and estimated adjacency")
ax.set_xlabel("Child index")
ax.set_ylabel("Parent index")
fig.tight_layout()
plt.show()
plt.close(fig)
print("Causal lesson: discovery is evaluated as graph structure, not just prediction loss.")

Demo 23: invariant causal prediction

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 49

header("Demo 23 - invariant causal prediction: NOTEARS acyclicity")
w_dag = np.array([[0.0, 0.8, 0.0], [0.0, 0.0, 0.5], [0.0, 0.0, 0.0]])
w_cycle = np.array([[0.0, 0.8, 0.0], [0.0, 0.0, 0.5], [0.4, 0.0, 0.0]])
h_dag = acyclicity_value(w_dag)
h_cycle = acyclicity_value(w_cycle)
print("h(W) for DAG:", round(h_dag, 8))
print("h(W) for cyclic graph:", round(h_cycle, 8))
check_true(h_cycle > h_dag, "cycle has larger acyclicity penalty")
print("Causal lesson: some discovery methods encode DAG structure as a smooth constraint.")

Demo 24: causal discovery across environments

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 51

header("Demo 24 - causal discovery across environments: collider warning")
n = 3000
x = np.random.binomial(1, 0.5, size=n)
y = np.random.binomial(1, 0.5, size=n)
c = ((x + y + np.random.binomial(1, 0.15, size=n)) >= 1).astype(int)
unconditional = float(np.corrcoef(x, y)[0, 1])
conditioned = float(np.corrcoef(x[c == 1], y[c == 1])[0, 1])
print("Unconditional correlation:", round(unconditional, 3))
print("Correlation after conditioning on collider:", round(conditioned, 3))
check_true(abs(conditioned) > abs(unconditional), "conditioning on collider creates association")
print("Causal lesson: controlling for more variables can create bias.")

Demo 25: time-series causal discovery preview

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 53

header("Demo 25 - time-series causal discovery preview: confounding versus adjustment")
z, x, y = simulate_confounding(n=2000)
naive = float(np.mean(y[x == 1]) - np.mean(y[x == 0]))
adjusted = backdoor_adjustment(z, x, y)
print("Naive difference:", round(naive, 3))
print("Backdoor-adjusted effect:", round(adjusted, 3))
check_true(abs(adjusted - 2.0) < abs(naive - 2.0), "adjustment moves closer to true effect")
print("Causal lesson: conditioning on the right confounder can recover an interventional contrast.")

Previous lesson Next lesson