Theory NotebookMath for LLMs

Do Calculus

Causal Inference / Do Calculus

Notesnotes.md Theory Notebooktheory.ipynb Exercises Notebookexercises.ipynb

Theory Notebook

Theory Notebook

Converted from theory.ipynb for web reading.

Do Calculus

Do-calculus gives graph-based rules for transforming interventional causal queries into estimable observational expressions when assumptions permit identification.

This notebook is the executable companion to notes.md. It uses small synthetic causal systems so every cell runs without external data.

Code cell 2

import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

try:
    import seaborn as sns
    sns.set_theme(style="whitegrid", palette="colorblind")
    HAS_SNS = True
except ImportError:
    plt.style.use("seaborn-v0_8-whitegrid")
    HAS_SNS = False

mpl.rcParams.update({
    "figure.figsize":    (10, 6),
    "figure.dpi":         120,
    "font.size":           13,
    "axes.titlesize":      15,
    "axes.labelsize":      13,
    "xtick.labelsize":     11,
    "ytick.labelsize":     11,
    "legend.fontsize":     11,
    "legend.framealpha":   0.85,
    "lines.linewidth":      2.0,
    "axes.spines.top":     False,
    "axes.spines.right":   False,
    "savefig.bbox":       "tight",
    "savefig.dpi":         150,
})
np.random.seed(42)
print("Plot setup complete.")

Code cell 3


import itertools
import math

COLORS = {
    "primary":   "#0077BB",
    "secondary": "#EE7733",
    "tertiary":  "#009988",
    "error":     "#CC3311",
    "neutral":   "#555555",
    "highlight": "#EE3377",
}

def header(title):
    print("\n" + "=" * 72)
    print(title)
    print("=" * 72)

def check_true(condition, name):
    ok = bool(condition)
    print(f"{'PASS' if ok else 'FAIL'} - {name}")
    assert ok, name

def check_close(value, target, tol=1e-8, name="value"):
    ok = abs(float(value) - float(target)) <= tol
    print(f"{'PASS' if ok else 'FAIL'} - {name}: got {float(value):.6f}, expected {float(target):.6f}")
    assert ok, name

def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))

def simulate_confounding(n=1000):
    z = np.random.binomial(1, 0.5, size=n)
    x = np.random.binomial(1, 0.2 + 0.6 * z)
    y = 1.0 + 2.0 * x + 3.0 * z + np.random.normal(0, 0.2, size=n)
    return z, x, y

def backdoor_adjustment(z, x, y):
    effect = 0.0
    for val in [0, 1]:
        mask_z = z == val
        p_z = np.mean(mask_z)
        y1 = np.mean(y[mask_z & (x == 1)])
        y0 = np.mean(y[mask_z & (x == 0)])
        effect += (y1 - y0) * p_z
    return float(effect)

def ate(y1, y0):
    return float(np.mean(y1 - y0))

def shd(adj_true, adj_hat):
    return int(np.sum(np.asarray(adj_true) != np.asarray(adj_hat)))

def acyclicity_value(w):
    eigvals = np.linalg.eigvals(np.asarray(w) * np.asarray(w))
    return float(np.sum(np.exp(eigvals)).real - w.shape[0])

print("Helper functions ready.")

Demo 1: conditioning is not intervening

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 5

header("Demo 1 - conditioning is not intervening: confounding versus adjustment")
z, x, y = simulate_confounding(n=2000)
naive = float(np.mean(y[x == 1]) - np.mean(y[x == 0]))
adjusted = backdoor_adjustment(z, x, y)
print("Naive difference:", round(naive, 3))
print("Backdoor-adjusted effect:", round(adjusted, 3))
check_true(abs(adjusted - 2.0) < abs(naive - 2.0), "adjustment moves closer to true effect")
print("Causal lesson: conditioning on the right confounder can recover an interventional contrast.")

Demo 2: identification as expression rewriting

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 7

header("Demo 2 - identification as expression rewriting: do is not condition")
z, x, y = simulate_confounding(n=3000)
observed = float(np.mean(y[x == 1]))
interventional = float(sum(np.mean(y[(x == 1) & (z == val)]) * np.mean(z == val) for val in [0, 1]))
print("Observed E[Y | X=1]:", round(observed, 3))
print("Adjusted E[Y | do(X=1)]:", round(interventional, 3))
check_true(abs(observed - interventional) > 0.1, "conditioning differs from intervention under confounding")
print("Causal lesson: intervention changes a mechanism, not merely a conditioning event.")

Demo 3: why graphs are needed

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 9

header("Demo 3 - why graphs are needed: potential outcomes")
n = 500
baseline = np.random.normal(0.0, 1.0, size=n)
y0 = baseline + np.random.normal(0, 0.1, size=n)
y1 = y0 + 1.5
effect = ate(y1, y0)
print("ATE:", round(effect, 3))
check_close(effect, 1.5, tol=0.05, name="average treatment effect")
print("Causal lesson: the causal effect compares two potential outcomes for the same units.")

Demo 4: what do-calculus can and cannot prove

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 11

header("Demo 4 - what do-calculus can and cannot prove: graph adjacency and SHD")
true_adj = np.array([[0, 1, 1], [0, 0, 1], [0, 0, 0]])
pred_adj = np.array([[0, 1, 0], [0, 0, 1], [0, 0, 0]])
distance = shd(true_adj, pred_adj)
print("Structural Hamming distance:", distance)
check_true(distance == 1, "one edge differs")
fig, ax = plt.subplots()
ax.imshow(true_adj - pred_adj, cmap="RdBu_r", vmin=-1, vmax=1)
ax.set_title("Difference between true and estimated adjacency")
ax.set_xlabel("Child index")
ax.set_ylabel("Parent index")
fig.tight_layout()
plt.show()
plt.close(fig)
print("Causal lesson: discovery is evaluated as graph structure, not just prediction loss.")

Demo 5: observational data plus assumptions

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 13

header("Demo 5 - observational data plus assumptions: NOTEARS acyclicity")
w_dag = np.array([[0.0, 0.8, 0.0], [0.0, 0.0, 0.5], [0.0, 0.0, 0.0]])
w_cycle = np.array([[0.0, 0.8, 0.0], [0.0, 0.0, 0.5], [0.4, 0.0, 0.0]])
h_dag = acyclicity_value(w_dag)
h_cycle = acyclicity_value(w_cycle)
print("h(W) for DAG:", round(h_dag, 8))
print("h(W) for cyclic graph:", round(h_cycle, 8))
check_true(h_cycle > h_dag, "cycle has larger acyclicity penalty")
print("Causal lesson: some discovery methods encode DAG structure as a smooth constraint.")

Demo 6: do-operator $\operatorname{do}(X=x)$

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 15

header("Demo 6 - do-operator $\\operatorname{do}(X=x)$: collider warning")
n = 3000
x = np.random.binomial(1, 0.5, size=n)
y = np.random.binomial(1, 0.5, size=n)
c = ((x + y + np.random.binomial(1, 0.15, size=n)) >= 1).astype(int)
unconditional = float(np.corrcoef(x, y)[0, 1])
conditioned = float(np.corrcoef(x[c == 1], y[c == 1])[0, 1])
print("Unconditional correlation:", round(unconditional, 3))
print("Correlation after conditioning on collider:", round(conditioned, 3))
check_true(abs(conditioned) > abs(unconditional), "conditioning on collider creates association")
print("Causal lesson: controlling for more variables can create bias.")

Demo 7: mutilated graph $G_{\overline{X}}$

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 17

header("Demo 7 - mutilated graph $G_{\\overline{X}}$: confounding versus adjustment")
z, x, y = simulate_confounding(n=2000)
naive = float(np.mean(y[x == 1]) - np.mean(y[x == 0]))
adjusted = backdoor_adjustment(z, x, y)
print("Naive difference:", round(naive, 3))
print("Backdoor-adjusted effect:", round(adjusted, 3))
check_true(abs(adjusted - 2.0) < abs(naive - 2.0), "adjustment moves closer to true effect")
print("Causal lesson: conditioning on the right confounder can recover an interventional contrast.")

Demo 8: causal effect $P(Y \mid \operatorname{do}(X=x))$

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 19

header("Demo 8 - causal effect $P(Y \\mid \\operatorname{do}(X=x))$: do is not condition")
z, x, y = simulate_confounding(n=3000)
observed = float(np.mean(y[x == 1]))
interventional = float(sum(np.mean(y[(x == 1) & (z == val)]) * np.mean(z == val) for val in [0, 1]))
print("Observed E[Y | X=1]:", round(observed, 3))
print("Adjusted E[Y | do(X=1)]:", round(interventional, 3))
check_true(abs(observed - interventional) > 0.1, "conditioning differs from intervention under confounding")
print("Causal lesson: intervention changes a mechanism, not merely a conditioning event.")

Demo 9: identifiability

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 21

header("Demo 9 - identifiability: potential outcomes")
n = 500
baseline = np.random.normal(0.0, 1.0, size=n)
y0 = baseline + np.random.normal(0, 0.1, size=n)
y1 = y0 + 1.5
effect = ate(y1, y0)
print("ATE:", round(effect, 3))
check_close(effect, 1.5, tol=0.05, name="average treatment effect")
print("Causal lesson: the causal effect compares two potential outcomes for the same units.")

Demo 10: adjustment set

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 23

header("Demo 10 - adjustment set: graph adjacency and SHD")
true_adj = np.array([[0, 1, 1], [0, 0, 1], [0, 0, 0]])
pred_adj = np.array([[0, 1, 0], [0, 0, 1], [0, 0, 0]])
distance = shd(true_adj, pred_adj)
print("Structural Hamming distance:", distance)
check_true(distance == 1, "one edge differs")
fig, ax = plt.subplots()
ax.imshow(true_adj - pred_adj, cmap="RdBu_r", vmin=-1, vmax=1)
ax.set_title("Difference between true and estimated adjacency")
ax.set_xlabel("Child index")
ax.set_ylabel("Parent index")
fig.tight_layout()
plt.show()
plt.close(fig)
print("Causal lesson: discovery is evaluated as graph structure, not just prediction loss.")

Demo 11: backdoor paths

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 25

header("Demo 11 - backdoor paths: NOTEARS acyclicity")
w_dag = np.array([[0.0, 0.8, 0.0], [0.0, 0.0, 0.5], [0.0, 0.0, 0.0]])
w_cycle = np.array([[0.0, 0.8, 0.0], [0.0, 0.0, 0.5], [0.4, 0.0, 0.0]])
h_dag = acyclicity_value(w_dag)
h_cycle = acyclicity_value(w_cycle)
print("h(W) for DAG:", round(h_dag, 8))
print("h(W) for cyclic graph:", round(h_cycle, 8))
check_true(h_cycle > h_dag, "cycle has larger acyclicity penalty")
print("Causal lesson: some discovery methods encode DAG structure as a smooth constraint.")

Demo 12: backdoor criterion

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 27

header("Demo 12 - backdoor criterion: collider warning")
n = 3000
x = np.random.binomial(1, 0.5, size=n)
y = np.random.binomial(1, 0.5, size=n)
c = ((x + y + np.random.binomial(1, 0.15, size=n)) >= 1).astype(int)
unconditional = float(np.corrcoef(x, y)[0, 1])
conditioned = float(np.corrcoef(x[c == 1], y[c == 1])[0, 1])
print("Unconditional correlation:", round(unconditional, 3))
print("Correlation after conditioning on collider:", round(conditioned, 3))
check_true(abs(conditioned) > abs(unconditional), "conditioning on collider creates association")
print("Causal lesson: controlling for more variables can create bias.")

Demo 13: adjustment formula

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 29

header("Demo 13 - adjustment formula: confounding versus adjustment")
z, x, y = simulate_confounding(n=2000)
naive = float(np.mean(y[x == 1]) - np.mean(y[x == 0]))
adjusted = backdoor_adjustment(z, x, y)
print("Naive difference:", round(naive, 3))
print("Backdoor-adjusted effect:", round(adjusted, 3))
check_true(abs(adjusted - 2.0) < abs(naive - 2.0), "adjustment moves closer to true effect")
print("Causal lesson: conditioning on the right confounder can recover an interventional contrast.")

Demo 14: frontdoor criterion

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 31

header("Demo 14 - frontdoor criterion: do is not condition")
z, x, y = simulate_confounding(n=3000)
observed = float(np.mean(y[x == 1]))
interventional = float(sum(np.mean(y[(x == 1) & (z == val)]) * np.mean(z == val) for val in [0, 1]))
print("Observed E[Y | X=1]:", round(observed, 3))
print("Adjusted E[Y | do(X=1)]:", round(interventional, 3))
check_true(abs(observed - interventional) > 0.1, "conditioning differs from intervention under confounding")
print("Causal lesson: intervention changes a mechanism, not merely a conditioning event.")

Demo 15: confounding and mediators

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 33

header("Demo 15 - confounding and mediators: potential outcomes")
n = 500
baseline = np.random.normal(0.0, 1.0, size=n)
y0 = baseline + np.random.normal(0, 0.1, size=n)
y1 = y0 + 1.5
effect = ate(y1, y0)
print("ATE:", round(effect, 3))
check_close(effect, 1.5, tol=0.05, name="average treatment effect")
print("Causal lesson: the causal effect compares two potential outcomes for the same units.")

Demo 16: insertion and deletion of observations

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 35

header("Demo 16 - insertion and deletion of observations: graph adjacency and SHD")
true_adj = np.array([[0, 1, 1], [0, 0, 1], [0, 0, 0]])
pred_adj = np.array([[0, 1, 0], [0, 0, 1], [0, 0, 0]])
distance = shd(true_adj, pred_adj)
print("Structural Hamming distance:", distance)
check_true(distance == 1, "one edge differs")
fig, ax = plt.subplots()
ax.imshow(true_adj - pred_adj, cmap="RdBu_r", vmin=-1, vmax=1)
ax.set_title("Difference between true and estimated adjacency")
ax.set_xlabel("Child index")
ax.set_ylabel("Parent index")
fig.tight_layout()
plt.show()
plt.close(fig)
print("Causal lesson: discovery is evaluated as graph structure, not just prediction loss.")

Demo 17: action observation exchange

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 37

header("Demo 17 - action observation exchange: NOTEARS acyclicity")
w_dag = np.array([[0.0, 0.8, 0.0], [0.0, 0.0, 0.5], [0.0, 0.0, 0.0]])
w_cycle = np.array([[0.0, 0.8, 0.0], [0.0, 0.0, 0.5], [0.4, 0.0, 0.0]])
h_dag = acyclicity_value(w_dag)
h_cycle = acyclicity_value(w_cycle)
print("h(W) for DAG:", round(h_dag, 8))
print("h(W) for cyclic graph:", round(h_cycle, 8))
check_true(h_cycle > h_dag, "cycle has larger acyclicity penalty")
print("Causal lesson: some discovery methods encode DAG structure as a smooth constraint.")

Demo 18: insertion and deletion of actions

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 39

header("Demo 18 - insertion and deletion of actions: collider warning")
n = 3000
x = np.random.binomial(1, 0.5, size=n)
y = np.random.binomial(1, 0.5, size=n)
c = ((x + y + np.random.binomial(1, 0.15, size=n)) >= 1).astype(int)
unconditional = float(np.corrcoef(x, y)[0, 1])
conditioned = float(np.corrcoef(x[c == 1], y[c == 1])[0, 1])
print("Unconditional correlation:", round(unconditional, 3))
print("Correlation after conditioning on collider:", round(conditioned, 3))
check_true(abs(conditioned) > abs(unconditional), "conditioning on collider creates association")
print("Causal lesson: controlling for more variables can create bias.")

Demo 19: graph conditions for each rule

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 41

header("Demo 19 - graph conditions for each rule: confounding versus adjustment")
z, x, y = simulate_confounding(n=2000)
naive = float(np.mean(y[x == 1]) - np.mean(y[x == 0]))
adjusted = backdoor_adjustment(z, x, y)
print("Naive difference:", round(naive, 3))
print("Backdoor-adjusted effect:", round(adjusted, 3))
check_true(abs(adjusted - 2.0) < abs(naive - 2.0), "adjustment moves closer to true effect")
print("Causal lesson: conditioning on the right confounder can recover an interventional contrast.")

Demo 20: proof intuition from d-separation

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 43

header("Demo 20 - proof intuition from d-separation: do is not condition")
z, x, y = simulate_confounding(n=3000)
observed = float(np.mean(y[x == 1]))
interventional = float(sum(np.mean(y[(x == 1) & (z == val)]) * np.mean(z == val) for val in [0, 1]))
print("Observed E[Y | X=1]:", round(observed, 3))
print("Adjusted E[Y | do(X=1)]:", round(interventional, 3))
check_true(abs(observed - interventional) > 0.1, "conditioning differs from intervention under confounding")
print("Causal lesson: intervention changes a mechanism, not merely a conditioning event.")

Demo 21: manual graph simplification

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 45

header("Demo 21 - manual graph simplification: potential outcomes")
n = 500
baseline = np.random.normal(0.0, 1.0, size=n)
y0 = baseline + np.random.normal(0, 0.1, size=n)
y1 = y0 + 1.5
effect = ate(y1, y0)
print("ATE:", round(effect, 3))
check_close(effect, 1.5, tol=0.05, name="average treatment effect")
print("Causal lesson: the causal effect compares two potential outcomes for the same units.")

Demo 22: ID algorithm preview

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 47

header("Demo 22 - ID algorithm preview: graph adjacency and SHD")
true_adj = np.array([[0, 1, 1], [0, 0, 1], [0, 0, 0]])
pred_adj = np.array([[0, 1, 0], [0, 0, 1], [0, 0, 0]])
distance = shd(true_adj, pred_adj)
print("Structural Hamming distance:", distance)
check_true(distance == 1, "one edge differs")
fig, ax = plt.subplots()
ax.imshow(true_adj - pred_adj, cmap="RdBu_r", vmin=-1, vmax=1)
ax.set_title("Difference between true and estimated adjacency")
ax.set_xlabel("Child index")
ax.set_ylabel("Parent index")
fig.tight_layout()
plt.show()
plt.close(fig)
print("Causal lesson: discovery is evaluated as graph structure, not just prediction loss.")

Demo 23: c-components and hedges

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 49

header("Demo 23 - c-components and hedges: NOTEARS acyclicity")
w_dag = np.array([[0.0, 0.8, 0.0], [0.0, 0.0, 0.5], [0.0, 0.0, 0.0]])
w_cycle = np.array([[0.0, 0.8, 0.0], [0.0, 0.0, 0.5], [0.4, 0.0, 0.0]])
h_dag = acyclicity_value(w_dag)
h_cycle = acyclicity_value(w_cycle)
print("h(W) for DAG:", round(h_dag, 8))
print("h(W) for cyclic graph:", round(h_cycle, 8))
check_true(h_cycle > h_dag, "cycle has larger acyclicity penalty")
print("Causal lesson: some discovery methods encode DAG structure as a smooth constraint.")

Demo 24: Tian-Pearl identification condition preview

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 51

header("Demo 24 - Tian-Pearl identification condition preview: collider warning")
n = 3000
x = np.random.binomial(1, 0.5, size=n)
y = np.random.binomial(1, 0.5, size=n)
c = ((x + y + np.random.binomial(1, 0.15, size=n)) >= 1).astype(int)
unconditional = float(np.corrcoef(x, y)[0, 1])
conditioned = float(np.corrcoef(x[c == 1], y[c == 1])[0, 1])
print("Unconditional correlation:", round(unconditional, 3))
print("Correlation after conditioning on collider:", round(conditioned, 3))
check_true(abs(conditioned) > abs(unconditional), "conditioning on collider creates association")
print("Causal lesson: controlling for more variables can create bias.")

Demo 25: when an effect is not identifiable

This demo turns the approved TOC concept into a small causal object or calculation.

Code cell 53

header("Demo 25 - when an effect is not identifiable: confounding versus adjustment")
z, x, y = simulate_confounding(n=2000)
naive = float(np.mean(y[x == 1]) - np.mean(y[x == 0]))
adjusted = backdoor_adjustment(z, x, y)
print("Naive difference:", round(naive, 3))
print("Backdoor-adjusted effect:", round(adjusted, 3))
check_true(abs(adjusted - 2.0) < abs(naive - 2.0), "adjustment moves closer to true effect")
print("Causal lesson: conditioning on the right confounder can recover an interventional contrast.")

Previous lesson Next lesson