Theory Notebook
Converted from
theory.ipynbfor web reading.
Causal Discovery
Causal discovery studies when causal graph structure can be inferred from observational, interventional, or multi-environment data under explicit assumptions.
This notebook is the executable companion to notes.md. It uses small synthetic causal systems so every cell runs without external data.
Code cell 2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
try:
import seaborn as sns
sns.set_theme(style="whitegrid", palette="colorblind")
HAS_SNS = True
except ImportError:
plt.style.use("seaborn-v0_8-whitegrid")
HAS_SNS = False
mpl.rcParams.update({
"figure.figsize": (10, 6),
"figure.dpi": 120,
"font.size": 13,
"axes.titlesize": 15,
"axes.labelsize": 13,
"xtick.labelsize": 11,
"ytick.labelsize": 11,
"legend.fontsize": 11,
"legend.framealpha": 0.85,
"lines.linewidth": 2.0,
"axes.spines.top": False,
"axes.spines.right": False,
"savefig.bbox": "tight",
"savefig.dpi": 150,
})
np.random.seed(42)
print("Plot setup complete.")
Code cell 3
import itertools
import math
COLORS = {
"primary": "#0077BB",
"secondary": "#EE7733",
"tertiary": "#009988",
"error": "#CC3311",
"neutral": "#555555",
"highlight": "#EE3377",
}
def header(title):
print("\n" + "=" * 72)
print(title)
print("=" * 72)
def check_true(condition, name):
ok = bool(condition)
print(f"{'PASS' if ok else 'FAIL'} - {name}")
assert ok, name
def check_close(value, target, tol=1e-8, name="value"):
ok = abs(float(value) - float(target)) <= tol
print(f"{'PASS' if ok else 'FAIL'} - {name}: got {float(value):.6f}, expected {float(target):.6f}")
assert ok, name
def sigmoid(z):
return 1.0 / (1.0 + np.exp(-z))
def simulate_confounding(n=1000):
z = np.random.binomial(1, 0.5, size=n)
x = np.random.binomial(1, 0.2 + 0.6 * z)
y = 1.0 + 2.0 * x + 3.0 * z + np.random.normal(0, 0.2, size=n)
return z, x, y
def backdoor_adjustment(z, x, y):
effect = 0.0
for val in [0, 1]:
mask_z = z == val
p_z = np.mean(mask_z)
y1 = np.mean(y[mask_z & (x == 1)])
y0 = np.mean(y[mask_z & (x == 0)])
effect += (y1 - y0) * p_z
return float(effect)
def ate(y1, y0):
return float(np.mean(y1 - y0))
def shd(adj_true, adj_hat):
return int(np.sum(np.asarray(adj_true) != np.asarray(adj_hat)))
def acyclicity_value(w):
eigvals = np.linalg.eigvals(np.asarray(w) * np.asarray(w))
return float(np.sum(np.exp(eigvals)).real - w.shape[0])
print("Helper functions ready.")
Demo 1: learning causal graphs from data
This demo turns the approved TOC concept into a small causal object or calculation.
Code cell 5
header("Demo 1 - learning causal graphs from data: confounding versus adjustment")
z, x, y = simulate_confounding(n=2000)
naive = float(np.mean(y[x == 1]) - np.mean(y[x == 0]))
adjusted = backdoor_adjustment(z, x, y)
print("Naive difference:", round(naive, 3))
print("Backdoor-adjusted effect:", round(adjusted, 3))
check_true(abs(adjusted - 2.0) < abs(naive - 2.0), "adjustment moves closer to true effect")
print("Causal lesson: conditioning on the right confounder can recover an interventional contrast.")
Demo 2: why discovery is impossible without assumptions
This demo turns the approved TOC concept into a small causal object or calculation.
Code cell 7
header("Demo 2 - why discovery is impossible without assumptions: do is not condition")
z, x, y = simulate_confounding(n=3000)
observed = float(np.mean(y[x == 1]))
interventional = float(sum(np.mean(y[(x == 1) & (z == val)]) * np.mean(z == val) for val in [0, 1]))
print("Observed E[Y | X=1]:", round(observed, 3))
print("Adjusted E[Y | do(X=1)]:", round(interventional, 3))
check_true(abs(observed - interventional) > 0.1, "conditioning differs from intervention under confounding")
print("Causal lesson: intervention changes a mechanism, not merely a conditioning event.")
Demo 3: Markov equivalence
This demo turns the approved TOC concept into a small causal object or calculation.
Code cell 9
header("Demo 3 - Markov equivalence: potential outcomes")
n = 500
baseline = np.random.normal(0.0, 1.0, size=n)
y0 = baseline + np.random.normal(0, 0.1, size=n)
y1 = y0 + 1.5
effect = ate(y1, y0)
print("ATE:", round(effect, 3))
check_close(effect, 1.5, tol=0.05, name="average treatment effect")
print("Causal lesson: the causal effect compares two potential outcomes for the same units.")
Demo 4: interventions break equivalence
This demo turns the approved TOC concept into a small causal object or calculation.
Code cell 11
header("Demo 4 - interventions break equivalence: graph adjacency and SHD")
true_adj = np.array([[0, 1, 1], [0, 0, 1], [0, 0, 0]])
pred_adj = np.array([[0, 1, 0], [0, 0, 1], [0, 0, 0]])
distance = shd(true_adj, pred_adj)
print("Structural Hamming distance:", distance)
check_true(distance == 1, "one edge differs")
fig, ax = plt.subplots()
ax.imshow(true_adj - pred_adj, cmap="RdBu_r", vmin=-1, vmax=1)
ax.set_title("Difference between true and estimated adjacency")
ax.set_xlabel("Child index")
ax.set_ylabel("Parent index")
fig.tight_layout()
plt.show()
plt.close(fig)
print("Causal lesson: discovery is evaluated as graph structure, not just prediction loss.")
Demo 5: discovery as hypothesis generation
This demo turns the approved TOC concept into a small causal object or calculation.
Code cell 13
header("Demo 5 - discovery as hypothesis generation: NOTEARS acyclicity")
w_dag = np.array([[0.0, 0.8, 0.0], [0.0, 0.0, 0.5], [0.0, 0.0, 0.0]])
w_cycle = np.array([[0.0, 0.8, 0.0], [0.0, 0.0, 0.5], [0.4, 0.0, 0.0]])
h_dag = acyclicity_value(w_dag)
h_cycle = acyclicity_value(w_cycle)
print("h(W) for DAG:", round(h_dag, 8))
print("h(W) for cyclic graph:", round(h_cycle, 8))
check_true(h_cycle > h_dag, "cycle has larger acyclicity penalty")
print("Causal lesson: some discovery methods encode DAG structure as a smooth constraint.")
Demo 6: DAG and adjacency matrix
This demo turns the approved TOC concept into a small causal object or calculation.
Code cell 15
header("Demo 6 - DAG and adjacency matrix $A$: collider warning")
n = 3000
x = np.random.binomial(1, 0.5, size=n)
y = np.random.binomial(1, 0.5, size=n)
c = ((x + y + np.random.binomial(1, 0.15, size=n)) >= 1).astype(int)
unconditional = float(np.corrcoef(x, y)[0, 1])
conditioned = float(np.corrcoef(x[c == 1], y[c == 1])[0, 1])
print("Unconditional correlation:", round(unconditional, 3))
print("Correlation after conditioning on collider:", round(conditioned, 3))
check_true(abs(conditioned) > abs(unconditional), "conditioning on collider creates association")
print("Causal lesson: controlling for more variables can create bias.")
Demo 7: conditional independence oracle
This demo turns the approved TOC concept into a small causal object or calculation.
Code cell 17
header("Demo 7 - conditional independence oracle: confounding versus adjustment")
z, x, y = simulate_confounding(n=2000)
naive = float(np.mean(y[x == 1]) - np.mean(y[x == 0]))
adjusted = backdoor_adjustment(z, x, y)
print("Naive difference:", round(naive, 3))
print("Backdoor-adjusted effect:", round(adjusted, 3))
check_true(abs(adjusted - 2.0) < abs(naive - 2.0), "adjustment moves closer to true effect")
print("Causal lesson: conditioning on the right confounder can recover an interventional contrast.")
Demo 8: Markov equivalence class
This demo turns the approved TOC concept into a small causal object or calculation.
Code cell 19
header("Demo 8 - Markov equivalence class: do is not condition")
z, x, y = simulate_confounding(n=3000)
observed = float(np.mean(y[x == 1]))
interventional = float(sum(np.mean(y[(x == 1) & (z == val)]) * np.mean(z == val) for val in [0, 1]))
print("Observed E[Y | X=1]:", round(observed, 3))
print("Adjusted E[Y | do(X=1)]:", round(interventional, 3))
check_true(abs(observed - interventional) > 0.1, "conditioning differs from intervention under confounding")
print("Causal lesson: intervention changes a mechanism, not merely a conditioning event.")
Demo 9: CPDAG
This demo turns the approved TOC concept into a small causal object or calculation.
Code cell 21
header("Demo 9 - CPDAG: potential outcomes")
n = 500
baseline = np.random.normal(0.0, 1.0, size=n)
y0 = baseline + np.random.normal(0, 0.1, size=n)
y1 = y0 + 1.5
effect = ate(y1, y0)
print("ATE:", round(effect, 3))
check_close(effect, 1.5, tol=0.05, name="average treatment effect")
print("Causal lesson: the causal effect compares two potential outcomes for the same units.")
Demo 10: acyclicity constraint
This demo turns the approved TOC concept into a small causal object or calculation.
Code cell 23
header("Demo 10 - acyclicity constraint: graph adjacency and SHD")
true_adj = np.array([[0, 1, 1], [0, 0, 1], [0, 0, 0]])
pred_adj = np.array([[0, 1, 0], [0, 0, 1], [0, 0, 0]])
distance = shd(true_adj, pred_adj)
print("Structural Hamming distance:", distance)
check_true(distance == 1, "one edge differs")
fig, ax = plt.subplots()
ax.imshow(true_adj - pred_adj, cmap="RdBu_r", vmin=-1, vmax=1)
ax.set_title("Difference between true and estimated adjacency")
ax.set_xlabel("Child index")
ax.set_ylabel("Parent index")
fig.tight_layout()
plt.show()
plt.close(fig)
print("Causal lesson: discovery is evaluated as graph structure, not just prediction loss.")
Demo 11: PC algorithm skeleton
This demo turns the approved TOC concept into a small causal object or calculation.
Code cell 25
header("Demo 11 - PC algorithm skeleton: NOTEARS acyclicity")
w_dag = np.array([[0.0, 0.8, 0.0], [0.0, 0.0, 0.5], [0.0, 0.0, 0.0]])
w_cycle = np.array([[0.0, 0.8, 0.0], [0.0, 0.0, 0.5], [0.4, 0.0, 0.0]])
h_dag = acyclicity_value(w_dag)
h_cycle = acyclicity_value(w_cycle)
print("h(W) for DAG:", round(h_dag, 8))
print("h(W) for cyclic graph:", round(h_cycle, 8))
check_true(h_cycle > h_dag, "cycle has larger acyclicity penalty")
print("Causal lesson: some discovery methods encode DAG structure as a smooth constraint.")
Demo 12: conditional independence tests
This demo turns the approved TOC concept into a small causal object or calculation.
Code cell 27
header("Demo 12 - conditional independence tests: collider warning")
n = 3000
x = np.random.binomial(1, 0.5, size=n)
y = np.random.binomial(1, 0.5, size=n)
c = ((x + y + np.random.binomial(1, 0.15, size=n)) >= 1).astype(int)
unconditional = float(np.corrcoef(x, y)[0, 1])
conditioned = float(np.corrcoef(x[c == 1], y[c == 1])[0, 1])
print("Unconditional correlation:", round(unconditional, 3))
print("Correlation after conditioning on collider:", round(conditioned, 3))
check_true(abs(conditioned) > abs(unconditional), "conditioning on collider creates association")
print("Causal lesson: controlling for more variables can create bias.")
Demo 13: v-structures
This demo turns the approved TOC concept into a small causal object or calculation.
Code cell 29
header("Demo 13 - v-structures: confounding versus adjustment")
z, x, y = simulate_confounding(n=2000)
naive = float(np.mean(y[x == 1]) - np.mean(y[x == 0]))
adjusted = backdoor_adjustment(z, x, y)
print("Naive difference:", round(naive, 3))
print("Backdoor-adjusted effect:", round(adjusted, 3))
check_true(abs(adjusted - 2.0) < abs(naive - 2.0), "adjustment moves closer to true effect")
print("Causal lesson: conditioning on the right confounder can recover an interventional contrast.")
Demo 14: orientation rules
This demo turns the approved TOC concept into a small causal object or calculation.
Code cell 31
header("Demo 14 - orientation rules: do is not condition")
z, x, y = simulate_confounding(n=3000)
observed = float(np.mean(y[x == 1]))
interventional = float(sum(np.mean(y[(x == 1) & (z == val)]) * np.mean(z == val) for val in [0, 1]))
print("Observed E[Y | X=1]:", round(observed, 3))
print("Adjusted E[Y | do(X=1)]:", round(interventional, 3))
check_true(abs(observed - interventional) > 0.1, "conditioning differs from intervention under confounding")
print("Causal lesson: intervention changes a mechanism, not merely a conditioning event.")
Demo 15: faithfulness and finite-sample failure
This demo turns the approved TOC concept into a small causal object or calculation.
Code cell 33
header("Demo 15 - faithfulness and finite-sample failure: potential outcomes")
n = 500
baseline = np.random.normal(0.0, 1.0, size=n)
y0 = baseline + np.random.normal(0, 0.1, size=n)
y1 = y0 + 1.5
effect = ate(y1, y0)
print("ATE:", round(effect, 3))
check_close(effect, 1.5, tol=0.05, name="average treatment effect")
print("Causal lesson: the causal effect compares two potential outcomes for the same units.")
Demo 16: likelihood and BIC scores
This demo turns the approved TOC concept into a small causal object or calculation.
Code cell 35
header("Demo 16 - likelihood and BIC scores: graph adjacency and SHD")
true_adj = np.array([[0, 1, 1], [0, 0, 1], [0, 0, 0]])
pred_adj = np.array([[0, 1, 0], [0, 0, 1], [0, 0, 0]])
distance = shd(true_adj, pred_adj)
print("Structural Hamming distance:", distance)
check_true(distance == 1, "one edge differs")
fig, ax = plt.subplots()
ax.imshow(true_adj - pred_adj, cmap="RdBu_r", vmin=-1, vmax=1)
ax.set_title("Difference between true and estimated adjacency")
ax.set_xlabel("Child index")
ax.set_ylabel("Parent index")
fig.tight_layout()
plt.show()
plt.close(fig)
print("Causal lesson: discovery is evaluated as graph structure, not just prediction loss.")
Demo 17: greedy equivalence search
This demo turns the approved TOC concept into a small causal object or calculation.
Code cell 37
header("Demo 17 - greedy equivalence search: NOTEARS acyclicity")
w_dag = np.array([[0.0, 0.8, 0.0], [0.0, 0.0, 0.5], [0.0, 0.0, 0.0]])
w_cycle = np.array([[0.0, 0.8, 0.0], [0.0, 0.0, 0.5], [0.4, 0.0, 0.0]])
h_dag = acyclicity_value(w_dag)
h_cycle = acyclicity_value(w_cycle)
print("h(W) for DAG:", round(h_dag, 8))
print("h(W) for cyclic graph:", round(h_cycle, 8))
check_true(h_cycle > h_dag, "cycle has larger acyclicity penalty")
print("Causal lesson: some discovery methods encode DAG structure as a smooth constraint.")
Demo 18: super-exponential DAG search
This demo turns the approved TOC concept into a small causal object or calculation.
Code cell 39
header("Demo 18 - super-exponential DAG search: collider warning")
n = 3000
x = np.random.binomial(1, 0.5, size=n)
y = np.random.binomial(1, 0.5, size=n)
c = ((x + y + np.random.binomial(1, 0.15, size=n)) >= 1).astype(int)
unconditional = float(np.corrcoef(x, y)[0, 1])
conditioned = float(np.corrcoef(x[c == 1], y[c == 1])[0, 1])
print("Unconditional correlation:", round(unconditional, 3))
print("Correlation after conditioning on collider:", round(conditioned, 3))
check_true(abs(conditioned) > abs(unconditional), "conditioning on collider creates association")
print("Causal lesson: controlling for more variables can create bias.")
Demo 19: NOTEARS continuous optimization
This demo turns the approved TOC concept into a small causal object or calculation.
Code cell 41
header("Demo 19 - NOTEARS continuous optimization: confounding versus adjustment")
z, x, y = simulate_confounding(n=2000)
naive = float(np.mean(y[x == 1]) - np.mean(y[x == 0]))
adjusted = backdoor_adjustment(z, x, y)
print("Naive difference:", round(naive, 3))
print("Backdoor-adjusted effect:", round(adjusted, 3))
check_true(abs(adjusted - 2.0) < abs(naive - 2.0), "adjustment moves closer to true effect")
print("Causal lesson: conditioning on the right confounder can recover an interventional contrast.")
Demo 20: regularization and sparsity
This demo turns the approved TOC concept into a small causal object or calculation.
Code cell 43
header("Demo 20 - regularization and sparsity: do is not condition")
z, x, y = simulate_confounding(n=3000)
observed = float(np.mean(y[x == 1]))
interventional = float(sum(np.mean(y[(x == 1) & (z == val)]) * np.mean(z == val) for val in [0, 1]))
print("Observed E[Y | X=1]:", round(observed, 3))
print("Adjusted E[Y | do(X=1)]:", round(interventional, 3))
check_true(abs(observed - interventional) > 0.1, "conditioning differs from intervention under confounding")
print("Causal lesson: intervention changes a mechanism, not merely a conditioning event.")
Demo 21: LiNGAM
This demo turns the approved TOC concept into a small causal object or calculation.
Code cell 45
header("Demo 21 - LiNGAM: potential outcomes")
n = 500
baseline = np.random.normal(0.0, 1.0, size=n)
y0 = baseline + np.random.normal(0, 0.1, size=n)
y1 = y0 + 1.5
effect = ate(y1, y0)
print("ATE:", round(effect, 3))
check_close(effect, 1.5, tol=0.05, name="average treatment effect")
print("Causal lesson: the causal effect compares two potential outcomes for the same units.")
Demo 22: additive noise models
This demo turns the approved TOC concept into a small causal object or calculation.
Code cell 47
header("Demo 22 - additive noise models: graph adjacency and SHD")
true_adj = np.array([[0, 1, 1], [0, 0, 1], [0, 0, 0]])
pred_adj = np.array([[0, 1, 0], [0, 0, 1], [0, 0, 0]])
distance = shd(true_adj, pred_adj)
print("Structural Hamming distance:", distance)
check_true(distance == 1, "one edge differs")
fig, ax = plt.subplots()
ax.imshow(true_adj - pred_adj, cmap="RdBu_r", vmin=-1, vmax=1)
ax.set_title("Difference between true and estimated adjacency")
ax.set_xlabel("Child index")
ax.set_ylabel("Parent index")
fig.tight_layout()
plt.show()
plt.close(fig)
print("Causal lesson: discovery is evaluated as graph structure, not just prediction loss.")
Demo 23: invariant causal prediction
This demo turns the approved TOC concept into a small causal object or calculation.
Code cell 49
header("Demo 23 - invariant causal prediction: NOTEARS acyclicity")
w_dag = np.array([[0.0, 0.8, 0.0], [0.0, 0.0, 0.5], [0.0, 0.0, 0.0]])
w_cycle = np.array([[0.0, 0.8, 0.0], [0.0, 0.0, 0.5], [0.4, 0.0, 0.0]])
h_dag = acyclicity_value(w_dag)
h_cycle = acyclicity_value(w_cycle)
print("h(W) for DAG:", round(h_dag, 8))
print("h(W) for cyclic graph:", round(h_cycle, 8))
check_true(h_cycle > h_dag, "cycle has larger acyclicity penalty")
print("Causal lesson: some discovery methods encode DAG structure as a smooth constraint.")
Demo 24: causal discovery across environments
This demo turns the approved TOC concept into a small causal object or calculation.
Code cell 51
header("Demo 24 - causal discovery across environments: collider warning")
n = 3000
x = np.random.binomial(1, 0.5, size=n)
y = np.random.binomial(1, 0.5, size=n)
c = ((x + y + np.random.binomial(1, 0.15, size=n)) >= 1).astype(int)
unconditional = float(np.corrcoef(x, y)[0, 1])
conditioned = float(np.corrcoef(x[c == 1], y[c == 1])[0, 1])
print("Unconditional correlation:", round(unconditional, 3))
print("Correlation after conditioning on collider:", round(conditioned, 3))
check_true(abs(conditioned) > abs(unconditional), "conditioning on collider creates association")
print("Causal lesson: controlling for more variables can create bias.")
Demo 25: time-series causal discovery preview
This demo turns the approved TOC concept into a small causal object or calculation.
Code cell 53
header("Demo 25 - time-series causal discovery preview: confounding versus adjustment")
z, x, y = simulate_confounding(n=2000)
naive = float(np.mean(y[x == 1]) - np.mean(y[x == 0]))
adjusted = backdoor_adjustment(z, x, y)
print("Naive difference:", round(naive, 3))
print("Backdoor-adjusted effect:", round(adjusted, 3))
check_true(abs(adjusted - 2.0) < abs(naive - 2.0), "adjustment moves closer to true effect")
print("Causal lesson: conditioning on the right confounder can recover an interventional contrast.")