Theory Notebook
Converted from
theory.ipynbfor web reading.
PAC Learning
PAC learning formalizes learnability by asking how many samples are enough to make small true error probable.
This notebook is the executable companion to notes.md. It uses synthetic samples so every cell is reproducible.
Code cell 2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
try:
import seaborn as sns
sns.set_theme(style="whitegrid", palette="colorblind")
HAS_SNS = True
except ImportError:
plt.style.use("seaborn-v0_8-whitegrid")
HAS_SNS = False
mpl.rcParams.update({
"figure.figsize": (10, 6),
"figure.dpi": 120,
"font.size": 13,
"axes.titlesize": 15,
"axes.labelsize": 13,
"xtick.labelsize": 11,
"ytick.labelsize": 11,
"legend.fontsize": 11,
"legend.framealpha": 0.85,
"lines.linewidth": 2.0,
"axes.spines.top": False,
"axes.spines.right": False,
"savefig.bbox": "tight",
"savefig.dpi": 150,
})
np.random.seed(42)
print("Plot setup complete.")
Code cell 3
import math
COLORS = {
"primary": "#0077BB",
"secondary": "#EE7733",
"tertiary": "#009988",
"error": "#CC3311",
"neutral": "#555555",
"highlight": "#EE3377",
}
def header(title):
print("\n" + "=" * 72)
print(title)
print("=" * 72)
def check_true(condition, name):
ok = bool(condition)
print(f"{'PASS' if ok else 'FAIL'} - {name}")
assert ok, name
def check_close(value, target, tol=1e-8, name="value"):
ok = abs(float(value) - float(target)) <= tol
print(f"{'PASS' if ok else 'FAIL'} - {name}: got {float(value):.6f}, expected {float(target):.6f}")
assert ok, name
def finite_class_bound(h_size, epsilon, delta):
return (np.log(h_size) + np.log(1.0 / delta)) / epsilon
def hoeffding_gap(h_size, m, delta):
return np.sqrt((np.log(2.0 * h_size) + np.log(1.0 / delta)) / (2.0 * m))
def bias_variance(y_hats, y_true, noise_var):
mean_pred = np.mean(y_hats, axis=0)
bias2 = np.mean((mean_pred - y_true) ** 2)
variance = np.mean(np.var(y_hats, axis=0))
return float(bias2), float(variance), float(noise_var)
def empirical_rademacher_linear(x, radius=1.0, trials=200):
x = np.asarray(x, dtype=float)
vals = []
for _ in range(trials):
sigma = np.random.choice([-1.0, 1.0], size=x.shape[0])
vals.append(radius * abs(np.sum(sigma * x)) / x.shape[0])
return float(np.mean(vals))
print("Helper functions ready.")
Demo 1: learning as selecting a hypothesis from data
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 5
header("Demo 1 - learning as selecting a hypothesis from data: finite-class sample complexity")
h_size = 128
epsilon = 0.05
delta = 0.01
m = finite_class_bound(h_size, epsilon, delta)
print("Hypothesis count:", h_size)
print("Required samples:", int(np.ceil(m)))
check_true(m > 0, "sample complexity is positive")
print("Learning-theory lesson: logarithmic class-size dependence is powerful but not magic.")
Demo 2: probably approximately correct guarantee
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 7
header("Demo 2 - probably approximately correct guarantee: empirical and true risk")
y_true = np.array([1, 0, 1, 1, 0, 0, 1, 0])
y_pred = np.array([1, 0, 0, 1, 0, 1, 1, 0])
empirical_risk = np.mean(y_true != y_pred)
print("Empirical risk:", empirical_risk)
check_close(empirical_risk, 0.25, name="classification empirical risk")
print("Learning-theory lesson: risk is a loss expectation, estimated from finite samples.")
Demo 3: error confidence and sample size
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 9
header("Demo 3 - error confidence and sample size: VC growth comparison")
m_values = np.arange(1, 11)
d = 3
vc_growth = np.array([sum(math.comb(int(m), i) for i in range(min(d, int(m)) + 1)) for m in m_values])
all_labelings = 2 ** m_values
print("m values:", m_values.tolist())
print("VC upper growth:", vc_growth.tolist())
check_true(np.all(vc_growth <= all_labelings), "growth function is bounded by all labelings")
fig, ax = plt.subplots()
ax.plot(m_values, all_labelings, color=COLORS["secondary"], label="All dichotomies")
ax.plot(m_values, vc_growth, color=COLORS["primary"], label="VC polynomial bound")
ax.set_title("Exponential labelings versus VC growth")
ax.set_xlabel("Sample size $m$")
ax.set_ylabel("Dichotomy count")
ax.legend()
fig.tight_layout()
plt.show()
plt.close(fig)
print("Learning-theory lesson: finite VC dimension turns exponential growth into polynomial growth.")
Demo 4: why PAC is distribution-free
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 11
header("Demo 4 - why PAC is distribution-free: bias-variance components")
x = np.linspace(-1, 1, 40)
y_true = x ** 2
predictions = []
for shift in [-0.15, -0.05, 0.05, 0.15]:
predictions.append((x + shift) ** 2 + 0.02 * shift)
y_hats = np.vstack(predictions)
bias2, variance, noise = bias_variance(y_hats, y_true, noise_var=0.01)
print("Bias^2:", round(bias2, 6))
print("Variance:", round(variance, 6))
print("Noise:", round(noise, 6))
check_true(bias2 >= 0 and variance >= 0 and noise >= 0, "components are nonnegative")
print("Learning-theory lesson: expected error can be decomposed into interpretable terms.")
Demo 5: what PAC does not promise
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 13
header("Demo 5 - what PAC does not promise: Hoeffding generalization gap")
h_size = 64
m = 500
delta = 0.05
gap = hoeffding_gap(h_size, m, delta)
print("Generalization gap bound:", round(gap, 4))
check_true(0 < gap < 1, "gap bound is a probability-scale quantity")
print("Learning-theory lesson: more samples reduce the confidence penalty.")
Demo 6: instance space
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 15
header("Demo 6 - instance space $\\mathcal{X}$: empirical Rademacher complexity")
x = np.linspace(-1, 1, 80)
rad = empirical_rademacher_linear(x, radius=2.0, trials=300)
print("Estimated empirical Rademacher complexity:", round(rad, 6))
check_true(rad >= 0, "Rademacher complexity is nonnegative")
print("Learning-theory lesson: fitting random signs is a data-dependent capacity test.")
Demo 7: label space
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 17
header("Demo 7 - label space $\\mathcal{Y}$: finite-class sample complexity")
h_size = 128
epsilon = 0.05
delta = 0.01
m = finite_class_bound(h_size, epsilon, delta)
print("Hypothesis count:", h_size)
print("Required samples:", int(np.ceil(m)))
check_true(m > 0, "sample complexity is positive")
print("Learning-theory lesson: logarithmic class-size dependence is powerful but not magic.")
Demo 8: hypothesis class
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 19
header("Demo 8 - hypothesis class $\\mathcal{H}$: empirical and true risk")
y_true = np.array([1, 0, 1, 1, 0, 0, 1, 0])
y_pred = np.array([1, 0, 0, 1, 0, 1, 1, 0])
empirical_risk = np.mean(y_true != y_pred)
print("Empirical risk:", empirical_risk)
check_close(empirical_risk, 0.25, name="classification empirical risk")
print("Learning-theory lesson: risk is a loss expectation, estimated from finite samples.")
Demo 9: true risk and empirical risk
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 21
header("Demo 9 - true risk $L_{\\mathcal{D}}(h)$ and empirical risk $L_S(h)$: VC growth comparison")
m_values = np.arange(1, 11)
d = 3
vc_growth = np.array([sum(math.comb(int(m), i) for i in range(min(d, int(m)) + 1)) for m in m_values])
all_labelings = 2 ** m_values
print("m values:", m_values.tolist())
print("VC upper growth:", vc_growth.tolist())
check_true(np.all(vc_growth <= all_labelings), "growth function is bounded by all labelings")
fig, ax = plt.subplots()
ax.plot(m_values, all_labelings, color=COLORS["secondary"], label="All dichotomies")
ax.plot(m_values, vc_growth, color=COLORS["primary"], label="VC polynomial bound")
ax.set_title("Exponential labelings versus VC growth")
ax.set_xlabel("Sample size $m$")
ax.set_ylabel("Dichotomy count")
ax.legend()
fig.tight_layout()
plt.show()
plt.close(fig)
print("Learning-theory lesson: finite VC dimension turns exponential growth into polynomial growth.")
Demo 10: PAC learner with
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 23
header("Demo 10 - PAC learner with $(\\epsilon,\\delta)$: bias-variance components")
x = np.linspace(-1, 1, 40)
y_true = x ** 2
predictions = []
for shift in [-0.15, -0.05, 0.05, 0.15]:
predictions.append((x + shift) ** 2 + 0.02 * shift)
y_hats = np.vstack(predictions)
bias2, variance, noise = bias_variance(y_hats, y_true, noise_var=0.01)
print("Bias^2:", round(bias2, 6))
print("Variance:", round(variance, 6))
print("Noise:", round(noise, 6))
check_true(bias2 >= 0 and variance >= 0 and noise >= 0, "components are nonnegative")
print("Learning-theory lesson: expected error can be decomposed into interpretable terms.")
Demo 11: consistency assumption
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 25
header("Demo 11 - consistency assumption: Hoeffding generalization gap")
h_size = 64
m = 500
delta = 0.05
gap = hoeffding_gap(h_size, m, delta)
print("Generalization gap bound:", round(gap, 4))
check_true(0 < gap < 1, "gap bound is a probability-scale quantity")
print("Learning-theory lesson: more samples reduce the confidence penalty.")
Demo 12: finite hypothesis class bound
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 27
header("Demo 12 - finite hypothesis class bound: empirical Rademacher complexity")
x = np.linspace(-1, 1, 80)
rad = empirical_rademacher_linear(x, radius=2.0, trials=300)
print("Estimated empirical Rademacher complexity:", round(rad, 6))
check_true(rad >= 0, "Rademacher complexity is nonnegative")
print("Learning-theory lesson: fitting random signs is a data-dependent capacity test.")
Demo 13: union bound proof sketch
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 29
header("Demo 13 - union bound proof sketch: finite-class sample complexity")
h_size = 128
epsilon = 0.05
delta = 0.01
m = finite_class_bound(h_size, epsilon, delta)
print("Hypothesis count:", h_size)
print("Required samples:", int(np.ceil(m)))
check_true(m > 0, "sample complexity is positive")
print("Learning-theory lesson: logarithmic class-size dependence is powerful but not magic.")
Demo 14: sample complexity
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 31
header("Demo 14 - sample complexity $m_{\\mathcal{H}}(\\epsilon,\\delta)$: empirical and true risk")
y_true = np.array([1, 0, 1, 1, 0, 0, 1, 0])
y_pred = np.array([1, 0, 0, 1, 0, 1, 1, 0])
empirical_risk = np.mean(y_true != y_pred)
print("Empirical risk:", empirical_risk)
check_close(empirical_risk, 0.25, name="classification empirical risk")
print("Learning-theory lesson: risk is a loss expectation, estimated from finite samples.")
Demo 15: consistent ERM
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 33
header("Demo 15 - consistent ERM: VC growth comparison")
m_values = np.arange(1, 11)
d = 3
vc_growth = np.array([sum(math.comb(int(m), i) for i in range(min(d, int(m)) + 1)) for m in m_values])
all_labelings = 2 ** m_values
print("m values:", m_values.tolist())
print("VC upper growth:", vc_growth.tolist())
check_true(np.all(vc_growth <= all_labelings), "growth function is bounded by all labelings")
fig, ax = plt.subplots()
ax.plot(m_values, all_labelings, color=COLORS["secondary"], label="All dichotomies")
ax.plot(m_values, vc_growth, color=COLORS["primary"], label="VC polynomial bound")
ax.set_title("Exponential labelings versus VC growth")
ax.set_xlabel("Sample size $m$")
ax.set_ylabel("Dichotomy count")
ax.legend()
fig.tight_layout()
plt.show()
plt.close(fig)
print("Learning-theory lesson: finite VC dimension turns exponential growth into polynomial growth.")
Demo 16: Bayes error and approximation error
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 35
header("Demo 16 - Bayes error and approximation error: bias-variance components")
x = np.linspace(-1, 1, 40)
y_true = x ** 2
predictions = []
for shift in [-0.15, -0.05, 0.05, 0.15]:
predictions.append((x + shift) ** 2 + 0.02 * shift)
y_hats = np.vstack(predictions)
bias2, variance, noise = bias_variance(y_hats, y_true, noise_var=0.01)
print("Bias^2:", round(bias2, 6))
print("Variance:", round(variance, 6))
print("Noise:", round(noise, 6))
check_true(bias2 >= 0 and variance >= 0 and noise >= 0, "components are nonnegative")
print("Learning-theory lesson: expected error can be decomposed into interpretable terms.")
Demo 17: excess risk
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 37
header("Demo 17 - excess risk: Hoeffding generalization gap")
h_size = 64
m = 500
delta = 0.05
gap = hoeffding_gap(h_size, m, delta)
print("Generalization gap bound:", round(gap, 4))
check_true(0 < gap < 1, "gap bound is a probability-scale quantity")
print("Learning-theory lesson: more samples reduce the confidence penalty.")
Demo 18: agnostic ERM
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 39
header("Demo 18 - agnostic ERM: empirical Rademacher complexity")
x = np.linspace(-1, 1, 80)
rad = empirical_rademacher_linear(x, radius=2.0, trials=300)
print("Estimated empirical Rademacher complexity:", round(rad, 6))
check_true(rad >= 0, "Rademacher complexity is nonnegative")
print("Learning-theory lesson: fitting random signs is a data-dependent capacity test.")
Demo 19: finite-class agnostic bound
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 41
header("Demo 19 - finite-class agnostic bound: finite-class sample complexity")
h_size = 128
epsilon = 0.05
delta = 0.01
m = finite_class_bound(h_size, epsilon, delta)
print("Hypothesis count:", h_size)
print("Required samples:", int(np.ceil(m)))
check_true(m > 0, "sample complexity is positive")
print("Learning-theory lesson: logarithmic class-size dependence is powerful but not magic.")
Demo 20: noisy labels
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 43
header("Demo 20 - noisy labels: empirical and true risk")
y_true = np.array([1, 0, 1, 1, 0, 0, 1, 0])
y_pred = np.array([1, 0, 0, 1, 0, 1, 1, 0])
empirical_risk = np.mean(y_true != y_pred)
print("Empirical risk:", empirical_risk)
check_close(empirical_risk, 0.25, name="classification empirical risk")
print("Learning-theory lesson: risk is a loss expectation, estimated from finite samples.")
Demo 21: dependence on
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 45
header("Demo 21 - dependence on $\\epsilon$: VC growth comparison")
m_values = np.arange(1, 11)
d = 3
vc_growth = np.array([sum(math.comb(int(m), i) for i in range(min(d, int(m)) + 1)) for m in m_values])
all_labelings = 2 ** m_values
print("m values:", m_values.tolist())
print("VC upper growth:", vc_growth.tolist())
check_true(np.all(vc_growth <= all_labelings), "growth function is bounded by all labelings")
fig, ax = plt.subplots()
ax.plot(m_values, all_labelings, color=COLORS["secondary"], label="All dichotomies")
ax.plot(m_values, vc_growth, color=COLORS["primary"], label="VC polynomial bound")
ax.set_title("Exponential labelings versus VC growth")
ax.set_xlabel("Sample size $m$")
ax.set_ylabel("Dichotomy count")
ax.legend()
fig.tight_layout()
plt.show()
plt.close(fig)
print("Learning-theory lesson: finite VC dimension turns exponential growth into polynomial growth.")
Demo 22: dependence on
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 47
header("Demo 22 - dependence on $\\delta$: bias-variance components")
x = np.linspace(-1, 1, 40)
y_true = x ** 2
predictions = []
for shift in [-0.15, -0.05, 0.05, 0.15]:
predictions.append((x + shift) ** 2 + 0.02 * shift)
y_hats = np.vstack(predictions)
bias2, variance, noise = bias_variance(y_hats, y_true, noise_var=0.01)
print("Bias^2:", round(bias2, 6))
print("Variance:", round(variance, 6))
print("Noise:", round(noise, 6))
check_true(bias2 >= 0 and variance >= 0 and noise >= 0, "components are nonnegative")
print("Learning-theory lesson: expected error can be decomposed into interpretable terms.")
Demo 23: logarithmic class-size dependence
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 49
header("Demo 23 - logarithmic class-size dependence: Hoeffding generalization gap")
h_size = 64
m = 500
delta = 0.05
gap = hoeffding_gap(h_size, m, delta)
print("Generalization gap bound:", round(gap, 4))
check_true(0 < gap < 1, "gap bound is a probability-scale quantity")
print("Learning-theory lesson: more samples reduce the confidence penalty.")
Demo 24: infinite-class motivation
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 51
header("Demo 24 - infinite-class motivation: empirical Rademacher complexity")
x = np.linspace(-1, 1, 80)
rad = empirical_rademacher_linear(x, radius=2.0, trials=300)
print("Estimated empirical Rademacher complexity:", round(rad, 6))
check_true(rad >= 0, "Rademacher complexity is nonnegative")
print("Learning-theory lesson: fitting random signs is a data-dependent capacity test.")
Demo 25: AI-scale interpretation
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 53
header("Demo 25 - AI-scale interpretation: finite-class sample complexity")
h_size = 128
epsilon = 0.05
delta = 0.01
m = finite_class_bound(h_size, epsilon, delta)
print("Hypothesis count:", h_size)
print("Required samples:", int(np.ceil(m)))
check_true(m > 0, "sample complexity is positive")
print("Learning-theory lesson: logarithmic class-size dependence is powerful but not magic.")