Exercises Notebook
Converted from
exercises.ipynbfor web reading.
Kernel Methods - Exercises
Eight exercises covering the core arc of notes.md and theory.ipynb.
Each exercise has a problem cell, a runnable scaffold, and a complete solution.
Code cell 2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
try:
import seaborn as sns
sns.set_theme(style="whitegrid", palette="colorblind")
HAS_SNS = True
except ImportError:
plt.style.use("seaborn-v0_8-whitegrid")
HAS_SNS = False
mpl.rcParams.update({
"figure.figsize": (10, 6),
"figure.dpi": 120,
"font.size": 13,
"axes.titlesize": 15,
"axes.labelsize": 13,
"xtick.labelsize": 11,
"ytick.labelsize": 11,
"legend.fontsize": 11,
"legend.framealpha": 0.85,
"lines.linewidth": 2.0,
"axes.spines.top": False,
"axes.spines.right": False,
"savefig.bbox": "tight",
"savefig.dpi": 150,
})
np.random.seed(42)
print("Plot setup complete.")
Code cell 3
COLORS = {
"primary": "#0077BB",
"secondary": "#EE7733",
"tertiary": "#009988",
"error": "#CC3311",
"neutral": "#555555",
"highlight": "#EE3377",
}
def header(title):
print("\n" + "=" * 78)
print(title)
print("=" * 78)
def check_true(name, condition):
ok = bool(condition)
print(f"{'PASS' if ok else 'FAIL'} - {name}")
if not ok:
raise AssertionError(name)
def check_close(name, actual, expected, tol=1e-8):
ok = np.allclose(actual, expected, atol=tol, rtol=tol)
print(f"{'PASS' if ok else 'FAIL'} - {name}")
if not ok:
raise AssertionError(name)
def linear_kernel(X, Z=None):
Z = X if Z is None else Z
return X @ Z.T
def polynomial_kernel(X, Z=None, degree=2, c=1.0):
Z = X if Z is None else Z
return (X @ Z.T + c) ** degree
def rbf_kernel(X, Z=None, gamma=1.0):
Z = X if Z is None else Z
X2 = np.sum(X*X, axis=1, keepdims=True)
Z2 = np.sum(Z*Z, axis=1, keepdims=True).T
D2 = np.maximum(X2 + Z2 - 2*X @ Z.T, 0.0)
return np.exp(-gamma * D2)
def center_kernel(K):
n = K.shape[0]
H = np.eye(n) - np.ones((n, n)) / n
return H @ K @ H
def psd_eigenvalues(K):
return np.linalg.eigvalsh((K + K.T) / 2)
print("Exercise helpers ready.")
Exercise 1 (Easy) - Build Gram matrices
Compute linear, polynomial, and RBF Gram matrices and check symmetry plus PSD eigenvalues.
Code cell 5
# Your Solution - Exercise 1: Build Gram matrices
answer = None
print("answer:", answer)
Code cell 6
# Solution
header("Exercise 1: Build Gram matrices")
X = np.array([[-1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, -1.0]])
K_lin = linear_kernel(X)
K_poly = polynomial_kernel(X, degree=2, c=1.0)
K_rbf = rbf_kernel(X, gamma=0.7)
for name, K in [("linear", K_lin), ("polynomial", K_poly), ("RBF", K_rbf)]:
check_true(f"{name} symmetric", np.allclose(K, K.T))
check_true(f"{name} PSD", psd_eigenvalues(K).min() > -1e-10)
print(name, "eigenvalues:", np.round(psd_eigenvalues(K), 6))
print("\nTakeaway: valid kernels produce symmetric PSD Gram matrices on every finite dataset.")
Exercise 2 (Easy) - Verify a polynomial feature map
Build an explicit feature map for in two dimensions.
Code cell 8
# Your Solution - Exercise 2: Verify a polynomial feature map
answer = None
print("answer:", answer)
Code cell 9
# Solution
header("Exercise 2: Verify a polynomial feature map")
def phi_poly2(X):
x1, x2 = X[:, 0], X[:, 1]
return np.column_stack([x1**2, np.sqrt(2)*x1*x2, x2**2, np.sqrt(2)*x1, np.sqrt(2)*x2, np.ones(len(X))])
A = np.random.randn(6, 2)
B = np.random.randn(5, 2)
explicit = phi_poly2(A) @ phi_poly2(B).T
implicit = polynomial_kernel(A, B, degree=2, c=1.0)
check_close("explicit feature map matches kernel", explicit, implicit, tol=1e-10)
print("Feature dimension:", phi_poly2(A).shape[1])
print("\nTakeaway: polynomial kernels hide a concrete finite feature expansion.")
Exercise 3 (Easy) - Normalize and center a kernel
Normalize an RBF Gram matrix and center it with .
Code cell 11
# Your Solution - Exercise 3: Normalize and center a kernel
answer = None
print("answer:", answer)
Code cell 12
# Solution
header("Exercise 3: Normalize and center a kernel")
X = np.random.randn(20, 3)
K = rbf_kernel(X, gamma=0.5)
d = np.sqrt(np.diag(K))
K_norm = K / (d[:, None] * d[None, :])
K_centered = center_kernel(K_norm)
check_close("normalized diagonal equals one", np.diag(K_norm), np.ones(len(X)), tol=1e-10)
check_true("centered row sums near zero", np.linalg.norm(K_centered.sum(axis=1)) < 1e-9)
check_true("centered column sums near zero", np.linalg.norm(K_centered.sum(axis=0)) < 1e-9)
print("\nTakeaway: normalization controls feature norms; centering removes the feature-space mean.")
Exercise 4 (Medium) - Kernel ridge regression
Fit noisy nonlinear data using exact RBF kernel ridge regression.
Code cell 14
# Your Solution - Exercise 4: Kernel ridge regression
answer = None
print("answer:", answer)
Code cell 15
# Solution
header("Exercise 4: Kernel ridge regression")
def krr_fit(X_train, y_train, gamma=2.0, lam=0.05):
K = rbf_kernel(X_train, gamma=gamma)
return np.linalg.solve(K + lam*np.eye(len(X_train)), y_train)
def krr_predict(X_train, alpha, X_test, gamma=2.0):
return rbf_kernel(X_test, X_train, gamma=gamma) @ alpha
X_train = np.linspace(-3, 3, 40)[:, None]
y_true = np.cos(1.7 * X_train[:, 0])
y_train = y_true + 0.12*np.random.randn(len(X_train))
alpha = krr_fit(X_train, y_train, gamma=1.8, lam=0.05)
pred = krr_predict(X_train, alpha, X_train, gamma=1.8)
rmse = np.sqrt(np.mean((pred - y_true)**2))
print("Training-grid RMSE against noiseless function:", rmse)
check_true("KRR fits the nonlinear signal", rmse < 0.25)
print("\nTakeaway: KRR solves nonlinear regression through one regularized linear system.")
Exercise 5 (Medium) - Kernel PCA
Center an RBF kernel and compute the first two kernel principal coordinates.
Code cell 17
# Your Solution - Exercise 5: Kernel PCA
answer = None
print("answer:", answer)
Code cell 18
# Solution
header("Exercise 5: Kernel PCA")
theta = np.linspace(0, 2*np.pi, 80, endpoint=False)
inner = np.column_stack([np.cos(theta), np.sin(theta)]) * 0.7
outer = np.column_stack([np.cos(theta), np.sin(theta)]) * 1.5
X = np.vstack([inner, outer]) + 0.03*np.random.randn(160, 2)
Kc = center_kernel(rbf_kernel(X, gamma=2.0))
eigvals, eigvecs = np.linalg.eigh((Kc + Kc.T)/2)
order = np.argsort(eigvals)[::-1]
coords = eigvecs[:, order[:2]] * np.sqrt(np.maximum(eigvals[order[:2]], 0.0))
check_true("kernel PCA coordinates are finite", np.all(np.isfinite(coords)))
check_true("top kernel eigenvalue positive", eigvals[order[0]] > 0)
print("Coordinate shape:", coords.shape)
print("\nTakeaway: kernel PCA is ordinary spectral analysis applied to a centered feature-space Gram matrix.")
Exercise 6 (Medium) - Gaussian process posterior
Compute posterior mean and variance for one-dimensional GP regression.
Code cell 20
# Your Solution - Exercise 6: Gaussian process posterior
answer = None
print("answer:", answer)
Code cell 21
# Solution
header("Exercise 6: Gaussian process posterior")
def gp_posterior(X_train, y_train, X_test, gamma=2.0, noise=0.1):
K = rbf_kernel(X_train, gamma=gamma) + noise**2*np.eye(len(X_train))
Ks = rbf_kernel(X_train, X_test, gamma=gamma)
L = np.linalg.cholesky(K + 1e-8*np.eye(len(X_train)))
alpha = np.linalg.solve(L.T, np.linalg.solve(L, y_train))
mean = Ks.T @ alpha
v = np.linalg.solve(L, Ks)
var = np.maximum(1.0 - np.sum(v*v, axis=0), 0.0)
return mean, var
X_train = np.linspace(-2.5, 2.5, 12)[:, None]
y_train = np.sin(2*X_train[:, 0]) + 0.1*np.random.randn(len(X_train))
X_test = np.linspace(-3, 3, 100)[:, None]
mean, var = gp_posterior(X_train, y_train, X_test)
check_true("posterior mean finite", np.all(np.isfinite(mean)))
check_true("posterior variance nonnegative", np.all(var >= -1e-10))
print("Variance range:", float(var.min()), float(var.max()))
print("\nTakeaway: a GP kernel is a covariance function that yields both prediction and uncertainty.")
Exercise 7 (Hard) - Random Fourier features
Approximate the RBF kernel and measure error as feature count increases.
Code cell 23
# Your Solution - Exercise 7: Random Fourier features
answer = None
print("answer:", answer)
Code cell 24
# Solution
header("Exercise 7: Random Fourier features")
def rff_features(X, D, gamma=1.0):
omega = np.random.randn(D, X.shape[1]) * np.sqrt(2*gamma)
b = np.random.uniform(0, 2*np.pi, D)
return np.sqrt(2.0/D) * np.cos(X @ omega.T + b)
X = np.random.randn(60, 2)
K = rbf_kernel(X, gamma=0.8)
errors = []
for D in [25, 50, 100, 250, 500]:
Z = rff_features(X, D, gamma=0.8)
err = np.linalg.norm(K - Z@Z.T, "fro") / np.linalg.norm(K, "fro")
errors.append(err)
print(D, err)
check_true("RFF approximation is finite", np.all(np.isfinite(errors)))
check_true("largest feature budget is reasonably accurate", errors[-1] < 0.55)
print("\nTakeaway: random features trade exact kernel algebra for scalable approximate linear features.")
Exercise 8 (Hard) - Empirical neural tangent kernel
Compute a finite-difference NTK and verify PSD structure.
Code cell 26
# Your Solution - Exercise 8: Empirical neural tangent kernel
answer = None
print("answer:", answer)
Code cell 27
# Solution
header("Exercise 8: Empirical neural tangent kernel")
def unpack(theta, width=6):
idx = 0
W1 = theta[idx:idx+width].reshape(width, 1); idx += width
b1 = theta[idx:idx+width]; idx += width
W2 = theta[idx:idx+width].reshape(1, width); idx += width
b2 = theta[idx]
return W1, b1, W2, b2
def net(theta, X, width=6):
W1, b1, W2, b2 = unpack(theta, width)
H = np.tanh(X @ W1.T + b1)
return (H @ W2.T).ravel() + b2
def fd_jac(theta, X, eps=1e-5):
J = np.zeros((len(X), len(theta)))
for p in range(len(theta)):
step = np.zeros_like(theta)
step[p] = eps
J[:, p] = (net(theta + step, X) - net(theta - step, X)) / (2*eps)
return J
X = np.linspace(-2, 2, 16)[:, None]
width = 6
theta = 0.4*np.random.randn(width + width + width + 1)
J = fd_jac(theta, X)
Theta = J @ J.T
check_true("NTK matrix symmetric", np.allclose(Theta, Theta.T, atol=1e-9))
check_true("NTK matrix PSD", psd_eigenvalues(Theta).min() > -1e-8)
print("NTK shape:", Theta.shape)
print("\nTakeaway: the NTK is a kernel because it is a Gram matrix of parameter-gradient features.")
Exercise 9: Nystrom Kernel Approximation
Approximate an RBF Gram matrix from a small landmark set and compare the approximation error with the full matrix.
Code cell 29
# Your Solution
X = np.linspace(-2, 2, 12)[:, None]
landmarks = X[[0, 5, 11]]
print("Compute a Nystrom approximation for the RBF kernel.")
Code cell 30
# Solution
header("Exercise 9: Nystrom Approximation")
X = np.linspace(-2, 2, 12)[:, None]
landmarks = X[[0, 5, 11]]
K = rbf_kernel(X, gamma=0.7)
C = rbf_kernel(X, landmarks, gamma=0.7)
W = rbf_kernel(landmarks, gamma=0.7)
K_hat = C @ np.linalg.pinv(W) @ C.T
rel_error = np.linalg.norm(K - K_hat, "fro") / np.linalg.norm(K, "fro")
print("relative Frobenius error:", round(float(rel_error), 6))
check_true("approximation error is finite", np.isfinite(rel_error))
print("Takeaway: Nystrom trades exact kernel geometry for lower-rank computation.")
Exercise 10: Centered Kernel Alignment
Compare centered alignment between labels and two kernels. Interpret which kernel better matches the target geometry.
Code cell 32
# Your Solution
X = np.array([[-1.5], [-0.5], [0.5], [1.5]])
y = np.array([0.0, 1.0, 1.0, 0.0])
print("Compute centered kernel alignment.")
Code cell 33
# Solution
header("Exercise 10: Centered Kernel Alignment")
X = np.array([[-1.5], [-0.5], [0.5], [1.5]])
y = np.array([0.0, 1.0, 1.0, 0.0])
Y = np.outer(y - y.mean(), y - y.mean())
def alignment(K, Y):
Kc = center_kernel(K)
Yc = center_kernel(Y)
return float(np.sum(Kc * Yc) / (np.linalg.norm(Kc, "fro") * np.linalg.norm(Yc, "fro")))
lin = alignment(linear_kernel(X), Y)
rbf = alignment(rbf_kernel(X, gamma=1.0), Y)
print("linear alignment:", round(lin, 6))
print("rbf alignment:", round(rbf, 6))
check_true("RBF captures the nonmonotone labels better", rbf > lin)
print("Takeaway: alignment is a diagnostic for whether kernel geometry matches the prediction task.")