Theory Notebook
Converted from
theory.ipynbfor web reading.
Generalization Bounds
Generalization bounds quantify when empirical performance is likely to approximate population performance.
This notebook is the executable companion to notes.md. It uses synthetic samples so every cell is reproducible.
Code cell 2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
try:
import seaborn as sns
sns.set_theme(style="whitegrid", palette="colorblind")
HAS_SNS = True
except ImportError:
plt.style.use("seaborn-v0_8-whitegrid")
HAS_SNS = False
mpl.rcParams.update({
"figure.figsize": (10, 6),
"figure.dpi": 120,
"font.size": 13,
"axes.titlesize": 15,
"axes.labelsize": 13,
"xtick.labelsize": 11,
"ytick.labelsize": 11,
"legend.fontsize": 11,
"legend.framealpha": 0.85,
"lines.linewidth": 2.0,
"axes.spines.top": False,
"axes.spines.right": False,
"savefig.bbox": "tight",
"savefig.dpi": 150,
})
np.random.seed(42)
print("Plot setup complete.")
Code cell 3
import math
COLORS = {
"primary": "#0077BB",
"secondary": "#EE7733",
"tertiary": "#009988",
"error": "#CC3311",
"neutral": "#555555",
"highlight": "#EE3377",
}
def header(title):
print("\n" + "=" * 72)
print(title)
print("=" * 72)
def check_true(condition, name):
ok = bool(condition)
print(f"{'PASS' if ok else 'FAIL'} - {name}")
assert ok, name
def check_close(value, target, tol=1e-8, name="value"):
ok = abs(float(value) - float(target)) <= tol
print(f"{'PASS' if ok else 'FAIL'} - {name}: got {float(value):.6f}, expected {float(target):.6f}")
assert ok, name
def finite_class_bound(h_size, epsilon, delta):
return (np.log(h_size) + np.log(1.0 / delta)) / epsilon
def hoeffding_gap(h_size, m, delta):
return np.sqrt((np.log(2.0 * h_size) + np.log(1.0 / delta)) / (2.0 * m))
def bias_variance(y_hats, y_true, noise_var):
mean_pred = np.mean(y_hats, axis=0)
bias2 = np.mean((mean_pred - y_true) ** 2)
variance = np.mean(np.var(y_hats, axis=0))
return float(bias2), float(variance), float(noise_var)
def empirical_rademacher_linear(x, radius=1.0, trials=200):
x = np.asarray(x, dtype=float)
vals = []
for _ in range(trials):
sigma = np.random.choice([-1.0, 1.0], size=x.shape[0])
vals.append(radius * abs(np.sum(sigma * x)) / x.shape[0])
return float(np.mean(vals))
print("Helper functions ready.")
Demo 1: from empirical risk to true risk
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 5
header("Demo 1 - from empirical risk to true risk: finite-class sample complexity")
h_size = 128
epsilon = 0.05
delta = 0.01
m = finite_class_bound(h_size, epsilon, delta)
print("Hypothesis count:", h_size)
print("Required samples:", int(np.ceil(m)))
check_true(m > 0, "sample complexity is positive")
print("Learning-theory lesson: logarithmic class-size dependence is powerful but not magic.")
Demo 2: concentration as bridge
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 7
header("Demo 2 - concentration as bridge: empirical and true risk")
y_true = np.array([1, 0, 1, 1, 0, 0, 1, 0])
y_pred = np.array([1, 0, 0, 1, 0, 1, 1, 0])
empirical_risk = np.mean(y_true != y_pred)
print("Empirical risk:", empirical_risk)
check_close(empirical_risk, 0.25, name="classification empirical risk")
print("Learning-theory lesson: risk is a loss expectation, estimated from finite samples.")
Demo 3: uniform bounds
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 9
header("Demo 3 - uniform bounds: VC growth comparison")
m_values = np.arange(1, 11)
d = 3
vc_growth = np.array([sum(math.comb(int(m), i) for i in range(min(d, int(m)) + 1)) for m in m_values])
all_labelings = 2 ** m_values
print("m values:", m_values.tolist())
print("VC upper growth:", vc_growth.tolist())
check_true(np.all(vc_growth <= all_labelings), "growth function is bounded by all labelings")
fig, ax = plt.subplots()
ax.plot(m_values, all_labelings, color=COLORS["secondary"], label="All dichotomies")
ax.plot(m_values, vc_growth, color=COLORS["primary"], label="VC polynomial bound")
ax.set_title("Exponential labelings versus VC growth")
ax.set_xlabel("Sample size $m$")
ax.set_ylabel("Dichotomy count")
ax.legend()
fig.tight_layout()
plt.show()
plt.close(fig)
print("Learning-theory lesson: finite VC dimension turns exponential growth into polynomial growth.")
Demo 4: margin and norm bounds
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 11
header("Demo 4 - margin and norm bounds: bias-variance components")
x = np.linspace(-1, 1, 40)
y_true = x ** 2
predictions = []
for shift in [-0.15, -0.05, 0.05, 0.15]:
predictions.append((x + shift) ** 2 + 0.02 * shift)
y_hats = np.vstack(predictions)
bias2, variance, noise = bias_variance(y_hats, y_true, noise_var=0.01)
print("Bias^2:", round(bias2, 6))
print("Variance:", round(variance, 6))
print("Noise:", round(noise, 6))
check_true(bias2 >= 0 and variance >= 0 and noise >= 0, "components are nonnegative")
print("Learning-theory lesson: expected error can be decomposed into interpretable terms.")
Demo 5: why bounds can be loose but useful
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 13
header("Demo 5 - why bounds can be loose but useful: Hoeffding generalization gap")
h_size = 64
m = 500
delta = 0.05
gap = hoeffding_gap(h_size, m, delta)
print("Generalization gap bound:", round(gap, 4))
check_true(0 < gap < 1, "gap bound is a probability-scale quantity")
print("Learning-theory lesson: more samples reduce the confidence penalty.")
Demo 6: true risk and empirical risk
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 15
header("Demo 6 - true risk and empirical risk: empirical Rademacher complexity")
x = np.linspace(-1, 1, 80)
rad = empirical_rademacher_linear(x, radius=2.0, trials=300)
print("Estimated empirical Rademacher complexity:", round(rad, 6))
check_true(rad >= 0, "Rademacher complexity is nonnegative")
print("Learning-theory lesson: fitting random signs is a data-dependent capacity test.")
Demo 7: generalization gap
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 17
header("Demo 7 - generalization gap: finite-class sample complexity")
h_size = 128
epsilon = 0.05
delta = 0.01
m = finite_class_bound(h_size, epsilon, delta)
print("Hypothesis count:", h_size)
print("Required samples:", int(np.ceil(m)))
check_true(m > 0, "sample complexity is positive")
print("Learning-theory lesson: logarithmic class-size dependence is powerful but not magic.")
Demo 8: uniform convergence
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 19
header("Demo 8 - uniform convergence: empirical and true risk")
y_true = np.array([1, 0, 1, 1, 0, 0, 1, 0])
y_pred = np.array([1, 0, 0, 1, 0, 1, 1, 0])
empirical_risk = np.mean(y_true != y_pred)
print("Empirical risk:", empirical_risk)
check_close(empirical_risk, 0.25, name="classification empirical risk")
print("Learning-theory lesson: risk is a loss expectation, estimated from finite samples.")
Demo 9: confidence parameter
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 21
header("Demo 9 - confidence parameter $\\delta$: VC growth comparison")
m_values = np.arange(1, 11)
d = 3
vc_growth = np.array([sum(math.comb(int(m), i) for i in range(min(d, int(m)) + 1)) for m in m_values])
all_labelings = 2 ** m_values
print("m values:", m_values.tolist())
print("VC upper growth:", vc_growth.tolist())
check_true(np.all(vc_growth <= all_labelings), "growth function is bounded by all labelings")
fig, ax = plt.subplots()
ax.plot(m_values, all_labelings, color=COLORS["secondary"], label="All dichotomies")
ax.plot(m_values, vc_growth, color=COLORS["primary"], label="VC polynomial bound")
ax.set_title("Exponential labelings versus VC growth")
ax.set_xlabel("Sample size $m$")
ax.set_ylabel("Dichotomy count")
ax.legend()
fig.tight_layout()
plt.show()
plt.close(fig)
print("Learning-theory lesson: finite VC dimension turns exponential growth into polynomial growth.")
Demo 10: bound template
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 23
header("Demo 10 - bound template: bias-variance components")
x = np.linspace(-1, 1, 40)
y_true = x ** 2
predictions = []
for shift in [-0.15, -0.05, 0.05, 0.15]:
predictions.append((x + shift) ** 2 + 0.02 * shift)
y_hats = np.vstack(predictions)
bias2, variance, noise = bias_variance(y_hats, y_true, noise_var=0.01)
print("Bias^2:", round(bias2, 6))
print("Variance:", round(variance, 6))
print("Noise:", round(noise, 6))
check_true(bias2 >= 0 and variance >= 0 and noise >= 0, "components are nonnegative")
print("Learning-theory lesson: expected error can be decomposed into interpretable terms.")
Demo 11: Hoeffding inequality recall
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 25
header("Demo 11 - Hoeffding inequality recall: Hoeffding generalization gap")
h_size = 64
m = 500
delta = 0.05
gap = hoeffding_gap(h_size, m, delta)
print("Generalization gap bound:", round(gap, 4))
check_true(0 < gap < 1, "gap bound is a probability-scale quantity")
print("Learning-theory lesson: more samples reduce the confidence penalty.")
Demo 12: union bound over
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 27
header("Demo 12 - union bound over $\\mathcal{H}$: empirical Rademacher complexity")
x = np.linspace(-1, 1, 80)
rad = empirical_rademacher_linear(x, radius=2.0, trials=300)
print("Estimated empirical Rademacher complexity:", round(rad, 6))
check_true(rad >= 0, "Rademacher complexity is nonnegative")
print("Learning-theory lesson: fitting random signs is a data-dependent capacity test.")
Demo 13: realizable bound
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 29
header("Demo 13 - realizable bound: finite-class sample complexity")
h_size = 128
epsilon = 0.05
delta = 0.01
m = finite_class_bound(h_size, epsilon, delta)
print("Hypothesis count:", h_size)
print("Required samples:", int(np.ceil(m)))
check_true(m > 0, "sample complexity is positive")
print("Learning-theory lesson: logarithmic class-size dependence is powerful but not magic.")
Demo 14: agnostic bound
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 31
header("Demo 14 - agnostic bound: empirical and true risk")
y_true = np.array([1, 0, 1, 1, 0, 0, 1, 0])
y_pred = np.array([1, 0, 0, 1, 0, 1, 1, 0])
empirical_risk = np.mean(y_true != y_pred)
print("Empirical risk:", empirical_risk)
check_close(empirical_risk, 0.25, name="classification empirical risk")
print("Learning-theory lesson: risk is a loss expectation, estimated from finite samples.")
Demo 15: sample-size calculator
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 33
header("Demo 15 - sample-size calculator: VC growth comparison")
m_values = np.arange(1, 11)
d = 3
vc_growth = np.array([sum(math.comb(int(m), i) for i in range(min(d, int(m)) + 1)) for m in m_values])
all_labelings = 2 ** m_values
print("m values:", m_values.tolist())
print("VC upper growth:", vc_growth.tolist())
check_true(np.all(vc_growth <= all_labelings), "growth function is bounded by all labelings")
fig, ax = plt.subplots()
ax.plot(m_values, all_labelings, color=COLORS["secondary"], label="All dichotomies")
ax.plot(m_values, vc_growth, color=COLORS["primary"], label="VC polynomial bound")
ax.set_title("Exponential labelings versus VC growth")
ax.set_xlabel("Sample size $m$")
ax.set_ylabel("Dichotomy count")
ax.legend()
fig.tight_layout()
plt.show()
plt.close(fig)
print("Learning-theory lesson: finite VC dimension turns exponential growth into polynomial growth.")
Demo 16: VC bound
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 35
header("Demo 16 - VC bound: bias-variance components")
x = np.linspace(-1, 1, 40)
y_true = x ** 2
predictions = []
for shift in [-0.15, -0.05, 0.05, 0.15]:
predictions.append((x + shift) ** 2 + 0.02 * shift)
y_hats = np.vstack(predictions)
bias2, variance, noise = bias_variance(y_hats, y_true, noise_var=0.01)
print("Bias^2:", round(bias2, 6))
print("Variance:", round(variance, 6))
print("Noise:", round(noise, 6))
check_true(bias2 >= 0 and variance >= 0 and noise >= 0, "components are nonnegative")
print("Learning-theory lesson: expected error can be decomposed into interpretable terms.")
Demo 17: Rademacher preview
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 37
header("Demo 17 - Rademacher preview: Hoeffding generalization gap")
h_size = 64
m = 500
delta = 0.05
gap = hoeffding_gap(h_size, m, delta)
print("Generalization gap bound:", round(gap, 4))
check_true(0 < gap < 1, "gap bound is a probability-scale quantity")
print("Learning-theory lesson: more samples reduce the confidence penalty.")
Demo 18: covering number preview
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 39
header("Demo 18 - covering number preview: empirical Rademacher complexity")
x = np.linspace(-1, 1, 80)
rad = empirical_rademacher_linear(x, radius=2.0, trials=300)
print("Estimated empirical Rademacher complexity:", round(rad, 6))
check_true(rad >= 0, "Rademacher complexity is nonnegative")
print("Learning-theory lesson: fitting random signs is a data-dependent capacity test.")
Demo 19: compression preview
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 41
header("Demo 19 - compression preview: finite-class sample complexity")
h_size = 128
epsilon = 0.05
delta = 0.01
m = finite_class_bound(h_size, epsilon, delta)
print("Hypothesis count:", h_size)
print("Required samples:", int(np.ceil(m)))
check_true(m > 0, "sample complexity is positive")
print("Learning-theory lesson: logarithmic class-size dependence is powerful but not magic.")
Demo 20: stability preview
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 43
header("Demo 20 - stability preview: empirical and true risk")
y_true = np.array([1, 0, 1, 1, 0, 0, 1, 0])
y_pred = np.array([1, 0, 0, 1, 0, 1, 1, 0])
empirical_risk = np.mean(y_true != y_pred)
print("Empirical risk:", empirical_risk)
check_close(empirical_risk, 0.25, name="classification empirical risk")
print("Learning-theory lesson: risk is a loss expectation, estimated from finite samples.")
Demo 21: margins for classifiers
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 45
header("Demo 21 - margins for classifiers: VC growth comparison")
m_values = np.arange(1, 11)
d = 3
vc_growth = np.array([sum(math.comb(int(m), i) for i in range(min(d, int(m)) + 1)) for m in m_values])
all_labelings = 2 ** m_values
print("m values:", m_values.tolist())
print("VC upper growth:", vc_growth.tolist())
check_true(np.all(vc_growth <= all_labelings), "growth function is bounded by all labelings")
fig, ax = plt.subplots()
ax.plot(m_values, all_labelings, color=COLORS["secondary"], label="All dichotomies")
ax.plot(m_values, vc_growth, color=COLORS["primary"], label="VC polynomial bound")
ax.set_title("Exponential labelings versus VC growth")
ax.set_xlabel("Sample size $m$")
ax.set_ylabel("Dichotomy count")
ax.legend()
fig.tight_layout()
plt.show()
plt.close(fig)
print("Learning-theory lesson: finite VC dimension turns exponential growth into polynomial growth.")
Demo 22: perceptron and SVM intuition
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 47
header("Demo 22 - perceptron and SVM intuition: bias-variance components")
x = np.linspace(-1, 1, 40)
y_true = x ** 2
predictions = []
for shift in [-0.15, -0.05, 0.05, 0.15]:
predictions.append((x + shift) ** 2 + 0.02 * shift)
y_hats = np.vstack(predictions)
bias2, variance, noise = bias_variance(y_hats, y_true, noise_var=0.01)
print("Bias^2:", round(bias2, 6))
print("Variance:", round(variance, 6))
print("Noise:", round(noise, 6))
check_true(bias2 >= 0 and variance >= 0 and noise >= 0, "components are nonnegative")
print("Learning-theory lesson: expected error can be decomposed into interpretable terms.")
Demo 23: norm-based neural network bounds
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 49
header("Demo 23 - norm-based neural network bounds: Hoeffding generalization gap")
h_size = 64
m = 500
delta = 0.05
gap = hoeffding_gap(h_size, m, delta)
print("Generalization gap bound:", round(gap, 4))
check_true(0 < gap < 1, "gap bound is a probability-scale quantity")
print("Learning-theory lesson: more samples reduce the confidence penalty.")
Demo 24: PAC-Bayes preview
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 51
header("Demo 24 - PAC-Bayes preview: empirical Rademacher complexity")
x = np.linspace(-1, 1, 80)
rad = empirical_rademacher_linear(x, radius=2.0, trials=300)
print("Estimated empirical Rademacher complexity:", round(rad, 6))
check_true(rad >= 0, "Rademacher complexity is nonnegative")
print("Learning-theory lesson: fitting random signs is a data-dependent capacity test.")
Demo 25: calibration to practice
This demo turns the approved TOC concept into a small executable learning-theory object.
Code cell 53
header("Demo 25 - calibration to practice: finite-class sample complexity")
h_size = 128
epsilon = 0.05
delta = 0.01
m = finite_class_bound(h_size, epsilon, delta)
print("Hypothesis count:", h_size)
print("Required samples:", int(np.ceil(m)))
check_true(m > 0, "sample complexity is positive")
print("Learning-theory lesson: logarithmic class-size dependence is powerful but not magic.")