Theory NotebookMath for LLMs

Counterfactuals

Causal Inference / Counterfactuals

Notesnotes.md Theory Notebooktheory.ipynb Exercises Notebookexercises.ipynb

Theory Notebook

Theory Notebook

Converted from theory.ipynb for web reading.

Counterfactuals

Counterfactual reasoning studies unit-level alternatives by combining factual evidence with a causal model of interventions.

This notebook is the executable companion to notes.md. It uses small synthetic causal systems so every cell runs without external data.

Code cell 2

import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

try:
    import seaborn as sns
    sns.set_theme(style="whitegrid", palette="colorblind")
    HAS_SNS = True
except ImportError:
    plt.style.use("seaborn-v0_8-whitegrid")
    HAS_SNS = False

mpl.rcParams.update({
    "figure.figsize":    (10, 6),
    "figure.dpi":         120,
    "font.size":           13,
    "axes.titlesize":      15,
    "axes.labelsize":      13,
    "xtick.labelsize":     11,
    "ytick.labelsize":     11,
    "legend.fontsize":     11,
    "legend.framealpha":   0.85,
    "lines.linewidth":      2.0,
    "axes.spines.top":     False,
    "axes.spines.right":   False,
    "savefig.bbox":       "tight",
    "savefig.dpi":         150,
})
np.random.seed(42)
print("Plot setup complete.")

Code cell 3


import itertools
import math

COLORS = {
    "primary":   "#0077BB",
    "secondary": "#EE7733",
    "tertiary":  "#009988",
    "error":     "#CC3311",
    "neutral":   "#555555",
    "highlight": "#EE3377",
}

def header(title):
    print("\n" + "=" * 72)
    print(title)
    print("=" * 72)

def check_true(condition, name):
    ok = bool(condition)
    print(f"{'PASS' if ok else 'FAIL'} - {name}")
    assert ok, name

def check_close(value, target, tol=1e-8, name="value"):
    ok = abs(float(value) - float(target)) <= tol
    print(f"{'PASS' if ok else 'FAIL'} - {name}: got {float(value):.6f}, expected {float(target):.6f}")
    assert ok, name

def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))

def simulate_confounding(n=1000):
    z = np.random.binomial(1, 0.5, size=n)
    x = np.random.binomial(1, 0.2 + 0.6 * z)
    y = 1.0 + 2.0 * x + 3.0 * z + np.random.normal(0, 0.2, size=n)
    return z, x, y

def backdoor_adjustment(z, x, y):
    effect = 0.0
    for val in [0, 1]:
        mask_z = z == val
        p_z = np.mean(mask_z)
        y1 = np.mean(y[mask_z & (x == 1)])
        y0 = np.mean(y[mask_z & (x == 0)])
        effect += (y1 - y0) * p_z
    return float(effect)

def ate(y1, y0):
    return float(np.mean(y1 - y0))

def shd(adj_true, adj_hat):
    return int(np.sum(np.asarray(adj_true) != np.asarray(adj_hat)))

def acyclicity_value(w):
    eigvals = np.linalg.eigvals(np.asarray(w) * np.asarray(w))
    return float(np.sum(np.exp(eigvals)).real - w.shape[0])

print("Helper functions ready.")

Demo 1: what-if reasoning

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 5

header("Demo 1 - what-if reasoning: confounding versus adjustment")
z, x, y = simulate_confounding(n=2000)
naive = float(np.mean(y[x == 1]) - np.mean(y[x == 0]))
adjusted = backdoor_adjustment(z, x, y)
print("Naive difference:", round(naive, 3))
print("Backdoor-adjusted effect:", round(adjusted, 3))
check_true(abs(adjusted - 2.0) < abs(naive - 2.0), "adjustment moves closer to true effect")
print("Causal lesson: conditioning on the right confounder can recover an interventional contrast.")

Demo 2: Pearl's ladder of causation

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 7

header("Demo 2 - Pearl's ladder of causation: do is not condition")
z, x, y = simulate_confounding(n=3000)
observed = float(np.mean(y[x == 1]))
interventional = float(sum(np.mean(y[(x == 1) & (z == val)]) * np.mean(z == val) for val in [0, 1]))
print("Observed E[Y | X=1]:", round(observed, 3))
print("Adjusted E[Y | do(X=1)]:", round(interventional, 3))
check_true(abs(observed - interventional) > 0.1, "conditioning differs from intervention under confounding")
print("Causal lesson: intervention changes a mechanism, not merely a conditioning event.")

Demo 3: unit-level vs population-level questions

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 9

header("Demo 3 - unit-level vs population-level questions: potential outcomes")
n = 500
baseline = np.random.normal(0.0, 1.0, size=n)
y0 = baseline + np.random.normal(0, 0.1, size=n)
y1 = y0 + 1.5
effect = ate(y1, y0)
print("ATE:", round(effect, 3))
check_close(effect, 1.5, tol=0.05, name="average treatment effect")
print("Causal lesson: the causal effect compares two potential outcomes for the same units.")

Demo 4: why counterfactuals need a model

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 11

header("Demo 4 - why counterfactuals need a model: graph adjacency and SHD")
true_adj = np.array([[0, 1, 1], [0, 0, 1], [0, 0, 0]])
pred_adj = np.array([[0, 1, 0], [0, 0, 1], [0, 0, 0]])
distance = shd(true_adj, pred_adj)
print("Structural Hamming distance:", distance)
check_true(distance == 1, "one edge differs")
fig, ax = plt.subplots()
ax.imshow(true_adj - pred_adj, cmap="RdBu_r", vmin=-1, vmax=1)
ax.set_title("Difference between true and estimated adjacency")
ax.set_xlabel("Child index")
ax.set_ylabel("Parent index")
fig.tight_layout()
plt.show()
plt.close(fig)
print("Causal lesson: discovery is evaluated as graph structure, not just prediction loss.")

Demo 5: counterfactuals in decisions and explanations

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 13

header("Demo 5 - counterfactuals in decisions and explanations: NOTEARS acyclicity")
w_dag = np.array([[0.0, 0.8, 0.0], [0.0, 0.0, 0.5], [0.0, 0.0, 0.0]])
w_cycle = np.array([[0.0, 0.8, 0.0], [0.0, 0.0, 0.5], [0.4, 0.0, 0.0]])
h_dag = acyclicity_value(w_dag)
h_cycle = acyclicity_value(w_cycle)
print("h(W) for DAG:", round(h_dag, 8))
print("h(W) for cyclic graph:", round(h_cycle, 8))
check_true(h_cycle > h_dag, "cycle has larger acyclicity penalty")
print("Causal lesson: some discovery methods encode DAG structure as a smooth constraint.")

Demo 6: potential outcomes $Y(1),Y(0)$

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 15

header("Demo 6 - potential outcomes $Y(1),Y(0)$: collider warning")
n = 3000
x = np.random.binomial(1, 0.5, size=n)
y = np.random.binomial(1, 0.5, size=n)
c = ((x + y + np.random.binomial(1, 0.15, size=n)) >= 1).astype(int)
unconditional = float(np.corrcoef(x, y)[0, 1])
conditioned = float(np.corrcoef(x[c == 1], y[c == 1])[0, 1])
print("Unconditional correlation:", round(unconditional, 3))
print("Correlation after conditioning on collider:", round(conditioned, 3))
check_true(abs(conditioned) > abs(unconditional), "conditioning on collider creates association")
print("Causal lesson: controlling for more variables can create bias.")

Demo 7: SCM counterfactual $Y_x(\mathbf{u})$

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 17

header("Demo 7 - SCM counterfactual $Y_x(\\mathbf{u})$: confounding versus adjustment")
z, x, y = simulate_confounding(n=2000)
naive = float(np.mean(y[x == 1]) - np.mean(y[x == 0]))
adjusted = backdoor_adjustment(z, x, y)
print("Naive difference:", round(naive, 3))
print("Backdoor-adjusted effect:", round(adjusted, 3))
check_true(abs(adjusted - 2.0) < abs(naive - 2.0), "adjustment moves closer to true effect")
print("Causal lesson: conditioning on the right confounder can recover an interventional contrast.")

Demo 8: factual evidence $E=e$

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 19

header("Demo 8 - factual evidence $E=e$: do is not condition")
z, x, y = simulate_confounding(n=3000)
observed = float(np.mean(y[x == 1]))
interventional = float(sum(np.mean(y[(x == 1) & (z == val)]) * np.mean(z == val) for val in [0, 1]))
print("Observed E[Y | X=1]:", round(observed, 3))
print("Adjusted E[Y | do(X=1)]:", round(interventional, 3))
check_true(abs(observed - interventional) > 0.1, "conditioning differs from intervention under confounding")
print("Causal lesson: intervention changes a mechanism, not merely a conditioning event.")

Demo 9: consistency

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 21

header("Demo 9 - consistency: potential outcomes")
n = 500
baseline = np.random.normal(0.0, 1.0, size=n)
y0 = baseline + np.random.normal(0, 0.1, size=n)
y1 = y0 + 1.5
effect = ate(y1, y0)
print("ATE:", round(effect, 3))
check_close(effect, 1.5, tol=0.05, name="average treatment effect")
print("Causal lesson: the causal effect compares two potential outcomes for the same units.")

Demo 10: SUTVA and interference limits

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 23

header("Demo 10 - SUTVA and interference limits: graph adjacency and SHD")
true_adj = np.array([[0, 1, 1], [0, 0, 1], [0, 0, 0]])
pred_adj = np.array([[0, 1, 0], [0, 0, 1], [0, 0, 0]])
distance = shd(true_adj, pred_adj)
print("Structural Hamming distance:", distance)
check_true(distance == 1, "one edge differs")
fig, ax = plt.subplots()
ax.imshow(true_adj - pred_adj, cmap="RdBu_r", vmin=-1, vmax=1)
ax.set_title("Difference between true and estimated adjacency")
ax.set_xlabel("Child index")
ax.set_ylabel("Parent index")
fig.tight_layout()
plt.show()
plt.close(fig)
print("Causal lesson: discovery is evaluated as graph structure, not just prediction loss.")

Demo 11: treatment assignment $A$

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 25

header("Demo 11 - treatment assignment $A$: NOTEARS acyclicity")
w_dag = np.array([[0.0, 0.8, 0.0], [0.0, 0.0, 0.5], [0.0, 0.0, 0.0]])
w_cycle = np.array([[0.0, 0.8, 0.0], [0.0, 0.0, 0.5], [0.4, 0.0, 0.0]])
h_dag = acyclicity_value(w_dag)
h_cycle = acyclicity_value(w_cycle)
print("h(W) for DAG:", round(h_dag, 8))
print("h(W) for cyclic graph:", round(h_cycle, 8))
check_true(h_cycle > h_dag, "cycle has larger acyclicity penalty")
print("Causal lesson: some discovery methods encode DAG structure as a smooth constraint.")

Demo 12: ATE ATT and CATE

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 27

header("Demo 12 - ATE ATT and CATE: collider warning")
n = 3000
x = np.random.binomial(1, 0.5, size=n)
y = np.random.binomial(1, 0.5, size=n)
c = ((x + y + np.random.binomial(1, 0.15, size=n)) >= 1).astype(int)
unconditional = float(np.corrcoef(x, y)[0, 1])
conditioned = float(np.corrcoef(x[c == 1], y[c == 1])[0, 1])
print("Unconditional correlation:", round(unconditional, 3))
print("Correlation after conditioning on collider:", round(conditioned, 3))
check_true(abs(conditioned) > abs(unconditional), "conditioning on collider creates association")
print("Causal lesson: controlling for more variables can create bias.")

Demo 13: ignorability and exchangeability

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 29

header("Demo 13 - ignorability and exchangeability: confounding versus adjustment")
z, x, y = simulate_confounding(n=2000)
naive = float(np.mean(y[x == 1]) - np.mean(y[x == 0]))
adjusted = backdoor_adjustment(z, x, y)
print("Naive difference:", round(naive, 3))
print("Backdoor-adjusted effect:", round(adjusted, 3))
check_true(abs(adjusted - 2.0) < abs(naive - 2.0), "adjustment moves closer to true effect")
print("Causal lesson: conditioning on the right confounder can recover an interventional contrast.")

Demo 14: positivity

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 31

header("Demo 14 - positivity: do is not condition")
z, x, y = simulate_confounding(n=3000)
observed = float(np.mean(y[x == 1]))
interventional = float(sum(np.mean(y[(x == 1) & (z == val)]) * np.mean(z == val) for val in [0, 1]))
print("Observed E[Y | X=1]:", round(observed, 3))
print("Adjusted E[Y | do(X=1)]:", round(interventional, 3))
check_true(abs(observed - interventional) > 0.1, "conditioning differs from intervention under confounding")
print("Causal lesson: intervention changes a mechanism, not merely a conditioning event.")

Demo 15: observed vs missing potential outcomes

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 33

header("Demo 15 - observed vs missing potential outcomes: potential outcomes")
n = 500
baseline = np.random.normal(0.0, 1.0, size=n)
y0 = baseline + np.random.normal(0, 0.1, size=n)
y1 = y0 + 1.5
effect = ate(y1, y0)
print("ATE:", round(effect, 3))
check_close(effect, 1.5, tol=0.05, name="average treatment effect")
print("Causal lesson: the causal effect compares two potential outcomes for the same units.")

Demo 16: abduction

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 35

header("Demo 16 - abduction: graph adjacency and SHD")
true_adj = np.array([[0, 1, 1], [0, 0, 1], [0, 0, 0]])
pred_adj = np.array([[0, 1, 0], [0, 0, 1], [0, 0, 0]])
distance = shd(true_adj, pred_adj)
print("Structural Hamming distance:", distance)
check_true(distance == 1, "one edge differs")
fig, ax = plt.subplots()
ax.imshow(true_adj - pred_adj, cmap="RdBu_r", vmin=-1, vmax=1)
ax.set_title("Difference between true and estimated adjacency")
ax.set_xlabel("Child index")
ax.set_ylabel("Parent index")
fig.tight_layout()
plt.show()
plt.close(fig)
print("Causal lesson: discovery is evaluated as graph structure, not just prediction loss.")

Demo 17: action

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 37

header("Demo 17 - action: NOTEARS acyclicity")
w_dag = np.array([[0.0, 0.8, 0.0], [0.0, 0.0, 0.5], [0.0, 0.0, 0.0]])
w_cycle = np.array([[0.0, 0.8, 0.0], [0.0, 0.0, 0.5], [0.4, 0.0, 0.0]])
h_dag = acyclicity_value(w_dag)
h_cycle = acyclicity_value(w_cycle)
print("h(W) for DAG:", round(h_dag, 8))
print("h(W) for cyclic graph:", round(h_cycle, 8))
check_true(h_cycle > h_dag, "cycle has larger acyclicity penalty")
print("Causal lesson: some discovery methods encode DAG structure as a smooth constraint.")

Demo 18: prediction

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 39

header("Demo 18 - prediction: collider warning")
n = 3000
x = np.random.binomial(1, 0.5, size=n)
y = np.random.binomial(1, 0.5, size=n)
c = ((x + y + np.random.binomial(1, 0.15, size=n)) >= 1).astype(int)
unconditional = float(np.corrcoef(x, y)[0, 1])
conditioned = float(np.corrcoef(x[c == 1], y[c == 1])[0, 1])
print("Unconditional correlation:", round(unconditional, 3))
print("Correlation after conditioning on collider:", round(conditioned, 3))
check_true(abs(conditioned) > abs(unconditional), "conditioning on collider creates association")
print("Causal lesson: controlling for more variables can create bias.")

Demo 19: twin networks

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 41

header("Demo 19 - twin networks: confounding versus adjustment")
z, x, y = simulate_confounding(n=2000)
naive = float(np.mean(y[x == 1]) - np.mean(y[x == 0]))
adjusted = backdoor_adjustment(z, x, y)
print("Naive difference:", round(naive, 3))
print("Backdoor-adjusted effect:", round(adjusted, 3))
check_true(abs(adjusted - 2.0) < abs(naive - 2.0), "adjustment moves closer to true effect")
print("Causal lesson: conditioning on the right confounder can recover an interventional contrast.")

Demo 20: linear counterfactual examples

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 43

header("Demo 20 - linear counterfactual examples: do is not condition")
z, x, y = simulate_confounding(n=3000)
observed = float(np.mean(y[x == 1]))
interventional = float(sum(np.mean(y[(x == 1) & (z == val)]) * np.mean(z == val) for val in [0, 1]))
print("Observed E[Y | X=1]:", round(observed, 3))
print("Adjusted E[Y | do(X=1)]:", round(interventional, 3))
check_true(abs(observed - interventional) > 0.1, "conditioning differs from intervention under confounding")
print("Causal lesson: intervention changes a mechanism, not merely a conditioning event.")

Demo 21: randomized experiments

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 45

header("Demo 21 - randomized experiments: potential outcomes")
n = 500
baseline = np.random.normal(0.0, 1.0, size=n)
y0 = baseline + np.random.normal(0, 0.1, size=n)
y1 = y0 + 1.5
effect = ate(y1, y0)
print("ATE:", round(effect, 3))
check_close(effect, 1.5, tol=0.05, name="average treatment effect")
print("Causal lesson: the causal effect compares two potential outcomes for the same units.")

Demo 22: observational identification assumptions

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 47

header("Demo 22 - observational identification assumptions: graph adjacency and SHD")
true_adj = np.array([[0, 1, 1], [0, 0, 1], [0, 0, 0]])
pred_adj = np.array([[0, 1, 0], [0, 0, 1], [0, 0, 0]])
distance = shd(true_adj, pred_adj)
print("Structural Hamming distance:", distance)
check_true(distance == 1, "one edge differs")
fig, ax = plt.subplots()
ax.imshow(true_adj - pred_adj, cmap="RdBu_r", vmin=-1, vmax=1)
ax.set_title("Difference between true and estimated adjacency")
ax.set_xlabel("Child index")
ax.set_ylabel("Parent index")
fig.tight_layout()
plt.show()
plt.close(fig)
print("Causal lesson: discovery is evaluated as graph structure, not just prediction loss.")

Demo 23: effect of treatment on treated

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 49

header("Demo 23 - effect of treatment on treated: NOTEARS acyclicity")
w_dag = np.array([[0.0, 0.8, 0.0], [0.0, 0.0, 0.5], [0.0, 0.0, 0.0]])
w_cycle = np.array([[0.0, 0.8, 0.0], [0.0, 0.0, 0.5], [0.4, 0.0, 0.0]])
h_dag = acyclicity_value(w_dag)
h_cycle = acyclicity_value(w_cycle)
print("h(W) for DAG:", round(h_dag, 8))
print("h(W) for cyclic graph:", round(h_cycle, 8))
check_true(h_cycle > h_dag, "cycle has larger acyclicity penalty")
print("Causal lesson: some discovery methods encode DAG structure as a smooth constraint.")

Demo 24: mediation and natural effects preview

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 51

header("Demo 24 - mediation and natural effects preview: collider warning")
n = 3000
x = np.random.binomial(1, 0.5, size=n)
y = np.random.binomial(1, 0.5, size=n)
c = ((x + y + np.random.binomial(1, 0.15, size=n)) >= 1).astype(int)
unconditional = float(np.corrcoef(x, y)[0, 1])
conditioned = float(np.corrcoef(x[c == 1], y[c == 1])[0, 1])
print("Unconditional correlation:", round(unconditional, 3))
print("Correlation after conditioning on collider:", round(conditioned, 3))
check_true(abs(conditioned) > abs(unconditional), "conditioning on collider creates association")
print("Causal lesson: controlling for more variables can create bias.")

Demo 25: sensitivity to unobserved confounding

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 53

header("Demo 25 - sensitivity to unobserved confounding: confounding versus adjustment")
z, x, y = simulate_confounding(n=2000)
naive = float(np.mean(y[x == 1]) - np.mean(y[x == 0]))
adjusted = backdoor_adjustment(z, x, y)
print("Naive difference:", round(naive, 3))
print("Backdoor-adjusted effect:", round(adjusted, 3))
check_true(abs(adjusted - 2.0) < abs(naive - 2.0), "adjustment moves closer to true effect")
print("Causal lesson: conditioning on the right confounder can recover an interventional contrast.")

Previous lesson Next lesson