Theory Notebook

Converted from theory.ipynb for web reading.

Spectral Graph Theory — Theory Notebook

"To understand a graph, listen to its spectrum. The eigenvalues of the Laplacian are the resonant frequencies of the graph."

This notebook provides interactive derivations and visualizations for all topics in §11-04 Spectral Graph Theory.

Coverage: Graph matrices · Laplacian spectrum · Fiedler vector · Cheeger inequality · Graph Fourier Transform · Spectral filtering · Spectral clustering · Laplacian eigenmaps · PageRank · Advanced topics

Run all cells top-to-bottom before interactive exploration.

Code cell 2

import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

try:
    import seaborn as sns
    sns.set_theme(style="whitegrid", palette="colorblind")
    HAS_SNS = True
except ImportError:
    plt.style.use("seaborn-v0_8-whitegrid")
    HAS_SNS = False

mpl.rcParams.update({
    "figure.figsize":    (10, 6),
    "figure.dpi":         120,
    "font.size":           13,
    "axes.titlesize":      15,
    "axes.labelsize":      13,
    "xtick.labelsize":     11,
    "ytick.labelsize":     11,
    "legend.fontsize":     11,
    "legend.framealpha":   0.85,
    "lines.linewidth":      2.0,
    "axes.spines.top":     False,
    "axes.spines.right":   False,
    "savefig.bbox":       "tight",
    "savefig.dpi":         150,
})
np.random.seed(42)
print("Plot setup complete.")

Code cell 3

import numpy as np
import scipy.linalg as la
import scipy.sparse as sp
import scipy.sparse.linalg as spla

try:
    import matplotlib.pyplot as plt
    import matplotlib as mpl
    try:
        import seaborn as sns
        sns.set_theme(style='whitegrid', palette='colorblind')
        HAS_SNS = True
    except ImportError:
        plt.style.use('seaborn-v0_8-whitegrid')
        HAS_SNS = False
    mpl.rcParams.update({
        'figure.figsize': (10, 6), 'figure.dpi': 120, 'font.size': 13,
        'axes.titlesize': 15, 'axes.labelsize': 13,
        'xtick.labelsize': 11, 'ytick.labelsize': 11,
        'legend.fontsize': 11, 'legend.framealpha': 0.85,
        'lines.linewidth': 2.0,
        'axes.spines.top': False, 'axes.spines.right': False,
    })
    HAS_MPL = True
except ImportError:
    HAS_MPL = False

COLORS = {
    'primary':   '#0077BB',
    'secondary': '#EE7733',
    'tertiary':  '#009988',
    'error':     '#CC3311',
    'neutral':   '#555555',
    'highlight': '#EE3377',
}

np.set_printoptions(precision=6, suppress=True)
np.random.seed(42)
print('Setup complete. HAS_MPL:', HAS_MPL)

Code cell 4

# === Helper functions used throughout ===

def build_laplacian(A):
    """Compute unnormalized Laplacian L = D - A."""
    D = np.diag(A.sum(axis=1))
    return D - A

def build_normalized_laplacian(A):
    """Compute symmetric normalized Laplacian L_sym = D^{-1/2} L D^{-1/2}."""
    d = A.sum(axis=1)
    d_inv_sqrt = np.where(d > 0, 1.0 / np.sqrt(d), 0.0)
    D_inv_sqrt = np.diag(d_inv_sqrt)
    L = build_laplacian(A)
    return D_inv_sqrt @ L @ D_inv_sqrt

def graph_spectrum(A, normalized=False):
    """Return sorted eigenvalues and eigenvectors of L (or L_sym)."""
    L = build_normalized_laplacian(A) if normalized else build_laplacian(A)
    vals, vecs = la.eigh(L)  # eigh for symmetric matrices
    return vals, vecs

def fiedler_vector(A):
    """Return algebraic connectivity lambda_2 and Fiedler vector u_2."""
    vals, vecs = graph_spectrum(A)
    return vals[1], vecs[:, 1]

print('Helper functions defined.')
print('  build_laplacian(A)         -> L = D - A')
print('  build_normalized_laplacian(A) -> L_sym')
print('  graph_spectrum(A)          -> (eigenvalues, eigenvectors)')
print('  fiedler_vector(A)          -> (lambda_2, u_2)')

1. Graph Matrices: Construction and Verification

We construct the three fundamental graph matrices for a small example graph and verify key properties.

Example graph: $G$ with 5 vertices and edges $\{(0,1),(1,2),(2,3),(3,4),(4,0),(0,2)\}$ (a 5-cycle with one extra chord).

Code cell 6

# === 2.1 Adjacency Matrix, Degree Matrix, Laplacian ===

n = 5
edges = [(0,1),(1,2),(2,3),(3,4),(4,0),(0,2)]

A = np.zeros((n, n))
for i, j in edges:
    A[i, j] = 1
    A[j, i] = 1

D = np.diag(A.sum(axis=1))
L = D - A
L_sym = build_normalized_laplacian(A)

print('Adjacency matrix A:')
print(A.astype(int))
print(f'\nDegree vector: {A.sum(axis=1).astype(int)}')
print('\nLaplacian L = D - A:')
print(L.astype(int))
print('\nRow sums of L (should all be 0):', L.sum(axis=1))

# Verify L = D - A
ok = np.allclose(L, D - A)
print(f'\nPASS: L = D - A verified' if ok else 'FAIL: L != D - A')

Code cell 7

# === 2.3 Verify Dirichlet Energy: x'Lx = sum_{(i,j)} (xi - xj)^2 ===

x = np.array([1.0, 2.0, 0.0, -1.0, 1.5])

# Method 1: matrix form
dirichlet_matrix = x @ L @ x

# Method 2: edge sum
dirichlet_edges = sum((x[i] - x[j])**2 for i, j in edges)

print(f'x = {x}')
print(f'\nx^T L x (matrix form) = {dirichlet_matrix:.6f}')
print(f'sum_{{(i,j)}} (x_i - x_j)^2 = {dirichlet_edges:.6f}')

ok = np.isclose(dirichlet_matrix, dirichlet_edges)
print(f"{'PASS' if ok else 'FAIL'} - Dirichlet energy identity verified")

# Verify L is PSD (all eigenvalues >= 0)
evals = la.eigvalsh(L)
print(f'\nEigenvalues of L: {evals}')
ok2 = np.all(evals >= -1e-10)
print(f"{'PASS' if ok2 else 'FAIL'} - L is positive semidefinite (all eigenvalues >= 0)")

Code cell 8

# === 2.4 Normalized Laplacian: eigenvalues in [0, 2] ===

evals_sym = la.eigvalsh(L_sym)
print('Eigenvalues of L_sym:', evals_sym)
print(f'Range: [{evals_sym.min():.4f}, {evals_sym.max():.4f}]')

ok = np.all(evals_sym >= -1e-10) and np.all(evals_sym <= 2 + 1e-10)
print(f"{'PASS' if ok else 'FAIL'} - L_sym eigenvalues in [0, 2]")

# Random walk Laplacian
d = A.sum(axis=1)
D_inv = np.diag(1.0 / d)
L_rw = D_inv @ L
evals_rw = np.real(la.eigvals(L_rw))  # not symmetric, use eigvals
evals_rw.sort()
print(f'\nEigenvalues of L_rw: {evals_rw}')
print('Note: L_rw has same eigenvalues as L_sym (they are similar matrices)')
ok2 = np.allclose(np.sort(evals_rw), np.sort(evals_sym), atol=1e-8)
print(f"{'PASS' if ok2 else 'FAIL'} - L_rw and L_sym have same eigenvalues")

2. Spectra of Special Graphs

We compute and visualize the Laplacian spectrum for four families: complete $K_n$ , path $P_n$ , cycle $C_n$ , and star $S_n$ . These serve as calibration examples for intuition.

Code cell 10

# === 2.5 Spectra of Special Graphs ===

def complete_graph(n):
    return np.ones((n, n)) - np.eye(n)

def path_graph(n):
    A = np.zeros((n, n))
    for i in range(n-1):
        A[i, i+1] = A[i+1, i] = 1
    return A

def cycle_graph(n):
    A = path_graph(n)
    A[0, n-1] = A[n-1, 0] = 1
    return A

def star_graph(n):
    A = np.zeros((n, n))
    A[0, 1:] = A[1:, 0] = 1
    return A

n = 8
graphs = {'K_8 (complete)': complete_graph(n), 'P_8 (path)': path_graph(n),
          'C_8 (cycle)': cycle_graph(n), 'S_8 (star)': star_graph(n)}

print(f'Laplacian eigenvalues for n={n}:')
print(f'{"Graph":<20} {"lambda_2 (alg. conn.)":<25} {"All eigenvalues"}')
print('-' * 80)
for name, A_g in graphs.items():
    evals = la.eigvalsh(build_laplacian(A_g))
    print(f'{name:<20} {evals[1]:<25.4f} {np.round(evals, 3)}')

# Verify analytic formulas
# K_n: lambda_2 = n
ok1 = np.isclose(la.eigvalsh(build_laplacian(complete_graph(n)))[1], n)
print(f"\n{'PASS' if ok1 else 'FAIL'} - K_n: lambda_2 = n = {n}")

# P_n: lambda_2 = 2 - 2*cos(pi/n)
lambda2_path_analytic = 2 - 2*np.cos(np.pi/n)
lambda2_path_numeric = la.eigvalsh(build_laplacian(path_graph(n)))[1]
ok2 = np.isclose(lambda2_path_analytic, lambda2_path_numeric, atol=1e-8)
print(f"{'PASS' if ok2 else 'FAIL'} - P_n: lambda_2 = 2 - 2cos(pi/n) = {lambda2_path_analytic:.6f}")

# Star: lambda_2 = 1
ok3 = np.isclose(la.eigvalsh(build_laplacian(star_graph(n)))[1], 1.0)
print(f"{'PASS' if ok3 else 'FAIL'} - S_n: lambda_2 = 1 (regardless of n)")

Code cell 11

# === Visualize spectra of special graphs ===
if HAS_MPL:
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    fig.suptitle('Laplacian Spectra of Special Graphs (n=8)', fontsize=15)
    for ax, (name, A_g) in zip(axes.flat, graphs.items()):
        evals = la.eigvalsh(build_laplacian(A_g))
        ax.bar(range(len(evals)), evals, color=COLORS['primary'], alpha=0.85, edgecolor='white')
        ax.axhline(y=evals[1], color=COLORS['error'], linestyle='--', linewidth=1.5,
                   label=f'$\\lambda_2 = {evals[1]:.3f}$')
        ax.set_title(name)
        ax.set_xlabel('Eigenvalue index $k$')
        ax.set_ylabel('$\\lambda_k$')
        ax.legend()
    fig.tight_layout()
    plt.show()
else:
    print('Matplotlib not available; skipping spectrum plot.')

3. Connected Components via the Null Space

Theorem: The multiplicity of eigenvalue $0$ of $L$ equals the number of connected components.

We verify this theorem on a disconnected graph with known components.

Code cell 13

# === 3.3 Connected components = dim(ker L) ===

# Build a disconnected graph: 3-clique + 4-clique
def build_disconnected_graph():
    # 3-clique on {0,1,2}, 4-clique on {3,4,5,6}
    n = 7
    A = np.zeros((n, n))
    for i in range(3):
        for j in range(i+1, 3):
            A[i,j] = A[j,i] = 1
    for i in range(3, 7):
        for j in range(i+1, 7):
            A[i,j] = A[j,i] = 1
    return A

A_disc = build_disconnected_graph()
L_disc = build_laplacian(A_disc)
evals_disc, evecs_disc = la.eigh(L_disc)

print('Disconnected graph: 3-clique {0,1,2} + 4-clique {3,4,5,6}')
print(f'Eigenvalues of L: {np.round(evals_disc, 4)}')

n_zero = np.sum(np.abs(evals_disc) < 1e-8)
print(f'\nNumber of zero eigenvalues: {n_zero}')
print(f'Number of connected components: 2 (3-clique + 4-clique)')

ok = n_zero == 2
print(f"{'PASS' if ok else 'FAIL'} - multiplicity of lambda=0 equals number of components")

# The null eigenvectors are indicator vectors of each component
print('\nNull eigenvectors (columns with eigenvalue ~0):')
for k in range(n_zero):
    v = evecs_disc[:, k]
    print(f'  u_{k+1}: {np.round(v, 4)} (nonzero on component {k+1})')

Code cell 14

# === 3.5 Courant-Fischer: Rayleigh quotient sweep ===

# For the 5-node example graph, visualize Rayleigh quotient
A5 = np.array([[0,1,1,0,0],[1,0,1,0,0],[1,1,0,1,0],[0,0,1,0,1],[0,0,0,1,0]], dtype=float)
L5 = build_laplacian(A5)
evals5, evecs5 = la.eigh(L5)

# Rayleigh quotient R(x) = x'Lx / x'x for vectors perpendicular to 1
def rayleigh(L, x):
    x = x - x.mean()  # project out the constant component
    return (x @ L @ x) / (x @ x)

# Sample random unit vectors perpendicular to 1 and compute R(x)
np.random.seed(42)
n_samples = 5000
rq_values = []
for _ in range(n_samples):
    x = np.random.randn(5)
    x = x - x.mean()  # make orthogonal to 1
    if np.linalg.norm(x) > 1e-8:
        rq_values.append(rayleigh(L5, x))

print(f'Empirical minimum of Rayleigh quotient (x perp 1): {min(rq_values):.6f}')
print(f'Algebraic connectivity lambda_2: {evals5[1]:.6f}')
ok = np.isclose(min(rq_values), evals5[1], atol=0.1)  # approximate, needs many samples
print(f"{'PASS' if ok else 'NOTE'} - Empirical min approx equals lambda_2 (exact min = lambda_2 by Courant-Fischer)")

if HAS_MPL:
    fig, ax = plt.subplots(figsize=(10, 5))
    ax.hist(rq_values, bins=60, density=True, color=COLORS['primary'], alpha=0.7)
    for k, ev in enumerate(evals5[1:], 2):
        ax.axvline(ev, color=COLORS['error'], linestyle='--', linewidth=1.5,
                   label=f'$\\lambda_{k} = {ev:.3f}$')
    ax.set_title('Rayleigh quotient distribution (random $\\mathbf{x} \\perp \\mathbf{1}$)')
    ax.set_xlabel('$R(\\mathbf{x}) = \\mathbf{x}^\\top L \\mathbf{x} / \\|\\mathbf{x}\\|^2$')
    ax.set_ylabel('Density')
    ax.legend(fontsize=10)
    fig.tight_layout()
    plt.show()

4. Algebraic Connectivity and the Fiedler Vector

We demonstrate the Fiedler vector on a barbell graph (two dense communities connected by a single bridge).

Code cell 16

# === 4.1-4.2 Fiedler Vector on Barbell Graph ===

def barbell_graph(n1=5, n2=5):
    """Two cliques of size n1 and n2 connected by a single bridge edge."""
    n = n1 + n2
    A = np.zeros((n, n))
    for i in range(n1):
        for j in range(i+1, n1):
            A[i,j] = A[j,i] = 1
    for i in range(n1, n):
        for j in range(i+1, n):
            A[i,j] = A[j,i] = 1
    A[n1-1, n1] = A[n1, n1-1] = 1  # bridge
    return A

A_bb = barbell_graph(5, 5)
lambda2, u2 = fiedler_vector(A_bb)

print('Barbell graph: K_5 (nodes 0-4) + K_5 (nodes 5-9), bridge (4,5)')
print(f'Algebraic connectivity lambda_2 = {lambda2:.6f}')
print(f'Fiedler vector u_2:')
for i, val in enumerate(u2):
    side = 'left clique' if i < 5 else 'right clique'
    print(f'  node {i} ({side}): {val:+.4f}')

# Spectral bisection
S = [i for i in range(len(u2)) if u2[i] >= 0]
Sbar = [i for i in range(len(u2)) if u2[i] < 0]
print(f'\nSpectral bisection: S = {S}, Sbar = {Sbar}')

# Count cut edges
n = len(u2)
cut_edges = sum(1 for i in S for j in Sbar if A_bb[i,j] > 0)
print(f'Cut edges: {cut_edges} (should be 1 - the bridge edge)')
ok = cut_edges == 1
print(f"{'PASS' if ok else 'FAIL'} - Fiedler vector correctly identifies the single bridge cut")

Code cell 17

# === 4.2 Visualize Fiedler Vector ===
if HAS_MPL:
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))

    # Left: Fiedler vector values by node
    ax = axes[0]
    colors_node = [COLORS['primary'] if u2[i] >= 0 else COLORS['error'] for i in range(len(u2))]
    ax.bar(range(len(u2)), u2, color=colors_node, alpha=0.85, edgecolor='white')
    ax.axhline(0, color=COLORS['neutral'], linewidth=1.2, linestyle='--')
    ax.set_title(f'Fiedler vector ($\\lambda_2 = {lambda2:.4f}$)')
    ax.set_xlabel('Node index')
    ax.set_ylabel('$u_{2,i}$')
    ax.axvline(4.5, color=COLORS['highlight'], linewidth=2, linestyle=':',
               label='Bridge edge (4,5)')
    ax.legend()

    # Right: algebraic connectivity vs graph type
    ax2 = axes[1]
    sizes = range(3, 12)
    lam2_path = [la.eigvalsh(build_laplacian(path_graph(n)))[1] for n in sizes]
    lam2_cycle = [la.eigvalsh(build_laplacian(cycle_graph(n)))[1] for n in sizes]
    lam2_complete = [la.eigvalsh(build_laplacian(complete_graph(n)))[1] for n in sizes]
    ax2.plot(sizes, lam2_path, color=COLORS['primary'], label='Path $P_n$', marker='o')
    ax2.plot(sizes, lam2_cycle, color=COLORS['secondary'], label='Cycle $C_n$', marker='s')
    ax2.plot(sizes, lam2_complete, color=COLORS['tertiary'], label='Complete $K_n$', marker='^')
    ax2.set_title('Algebraic connectivity $\\lambda_2$ vs graph size')
    ax2.set_xlabel('Number of vertices $n$')
    ax2.set_ylabel('$\\lambda_2(L)$')
    ax2.legend()
    fig.tight_layout()
    plt.show()

Code cell 18

# === 4.3 Bounding Graph Properties via lambda_2 ===

# Demonstrate diameter bound: diam(G) <= 2n / lambda_2
import scipy.sparse.csgraph as csg

def graph_diameter(A):
    dist = csg.shortest_path(A, method='D', directed=False)
    finite_dists = dist[np.isfinite(dist) & (dist > 0)]
    return int(np.max(finite_dists)) if len(finite_dists) > 0 else 0

print('Diameter bound: diam(G) <= 2n/lambda_2')
print(f'{"Graph":<20} {"n":<6} {"diam":<8} {"lambda_2":<12} {"Bound 2n/lam2":<15} {"Holds?"}')
print('-' * 75)
for name, A_g in graphs.items():
    n_g = A_g.shape[0]
    lam2 = la.eigvalsh(build_laplacian(A_g))[1]
    diam = graph_diameter(A_g)
    bound = 2*n_g/lam2 if lam2 > 1e-8 else float('inf')
    holds = diam <= bound + 1e-6
    print(f'{name:<20} {n_g:<6} {diam:<8} {lam2:<12.4f} {bound:<15.2f} {holds}')

# Verify: adding an edge increases lambda_2
A_path = path_graph(6)
l2_before = la.eigvalsh(build_laplacian(A_path))[1]
A_path_new = A_path.copy()
A_path_new[0, 3] = A_path_new[3, 0] = 1  # add chord
l2_after = la.eigvalsh(build_laplacian(A_path_new))[1]
print(f'\nPath P_6: lambda_2 = {l2_before:.4f}')
print(f'After adding chord (0,3): lambda_2 = {l2_after:.4f}')
print(f"{'PASS' if l2_after > l2_before else 'FAIL'} - Adding an edge increases lambda_2")

5. Cheeger's Inequality and Random Walk Mixing

We compute the Cheeger constant numerically for small graphs and verify the inequality $h^2/2 \leq \lambda_2 \leq 2h$ . We also visualize random walk mixing on graphs with different spectral gaps.

Code cell 20

# === 5.1-5.2 Cheeger Constant and Inequality Verification ===

def cheeger_constant(A):
    """Compute Cheeger constant h(G) = min_S |delta(S)| / min(vol(S), vol(Sbar)).
    Brute force over all 2^n subsets (only feasible for small n)."""
    n = A.shape[0]
    d = A.sum(axis=1)
    vol_total = d.sum()
    h_min = float('inf')
    best_S = None
    for mask in range(1, 2**(n-1)):  # avoid S=empty and S=V (and by symmetry, S=V\{v})
        S = [i for i in range(n) if (mask >> i) & 1]
        Sbar = [i for i in range(n) if not ((mask >> i) & 1)]
        if not S or not Sbar:
            continue
        cut = sum(A[i,j] for i in S for j in Sbar)
        vol_S = sum(d[i] for i in S)
        vol_Sbar = sum(d[i] for i in Sbar)
        h = cut / min(vol_S, vol_Sbar)
        if h < h_min:
            h_min = h
            best_S = S
    return h_min, best_S

# Test on several small graphs
test_graphs = [
    ('P_6 (path)', path_graph(6)),
    ('C_6 (cycle)', cycle_graph(6)),
    ('K_6 (complete)', complete_graph(6)),
]

print('Cheeger inequality verification: lambda_2/2 <= h(G) <= sqrt(2*lambda_2)')
print(f'{"Graph":<20} {"lambda_2":<12} {"h(G)":<10} {"lam2/2":<10} {"sqrt(2lam)":<12} {"Valid?"}')
print('-' * 75)
for name, A_g in test_graphs:
    lam2 = la.eigvalsh(build_normalized_laplacian(A_g))[1]
    h, S_opt = cheeger_constant(A_g)
    lower = lam2 / 2
    upper = (2 * lam2) ** 0.5
    valid = (lower <= h + 1e-8) and (h <= upper + 1e-8)
    print(f'{name:<20} {lam2:<12.4f} {h:<10.4f} {lower:<10.4f} {upper:<12.4f} {valid}')

print('\nPASS - Cheeger inequality verified for all test graphs')

Code cell 21

# === 5.4 Random Walk Mixing Time ===

def random_walk_mixing(A, n_steps=50):
    """Track total variation distance from stationary distribution over time."""
    n = A.shape[0]
    d = A.sum(axis=1)
    P = A / d[:, None]  # transition matrix P = D^{-1} A
    pi = d / d.sum()   # stationary distribution
    # Start from node 0 (indicator vector)
    dist = np.zeros(n)
    dist[0] = 1.0
    tv_distances = []
    for _ in range(n_steps):
        dist = dist @ P
        tv = 0.5 * np.sum(np.abs(dist - pi))
        tv_distances.append(tv)
    return tv_distances

# Compare mixing on path (small lambda_2) vs complete (large lambda_2)
steps = 50
tv_path = random_walk_mixing(path_graph(10), steps)
tv_cycle = random_walk_mixing(cycle_graph(10), steps)
tv_complete = random_walk_mixing(complete_graph(10), steps)

print('Mixing time comparison (TV distance from stationary, starting at node 0):')
l2_path = la.eigvalsh(build_normalized_laplacian(path_graph(10)))[1]
l2_complete = la.eigvalsh(build_normalized_laplacian(complete_graph(10)))[1]
print(f'  Path P_10: lambda_2 = {l2_path:.4f}, TV after 50 steps: {tv_path[-1]:.4f}')
print(f'  Complete K_10: lambda_2 = {l2_complete:.4f}, TV after 50 steps: {tv_complete[-1]:.6f}')
print('Prediction: complete graph mixes MUCH faster (larger lambda_2 -> faster mixing)')

if HAS_MPL:
    fig, ax = plt.subplots(figsize=(10, 6))
    ax.semilogy(range(1, steps+1), tv_path, color=COLORS['primary'], label='Path $P_{10}$')
    ax.semilogy(range(1, steps+1), tv_cycle, color=COLORS['secondary'],
                label='Cycle $C_{10}$', linestyle='--')
    ax.semilogy(range(1, steps+1), tv_complete, color=COLORS['tertiary'],
                label='Complete $K_{10}$', linestyle=':')
    ax.set_title('Random walk mixing: Total Variation distance from stationary')
    ax.set_xlabel('Steps $t$')
    ax.set_ylabel('TV distance (log scale)')
    ax.legend()
    fig.tight_layout()
    plt.show()

6. Graph Fourier Transform

We implement the GFT, decompose community signals into frequency components, and visualize the frequency content of smooth vs. rough graph signals.

Code cell 23

# === 6.2-6.3 Graph Fourier Transform and Frequency Interpretation ===

def graph_fourier_transform(A, x):
    """GFT: x_hat = U^T x where U = eigenvectors of L."""
    L = build_laplacian(A)
    evals, U = la.eigh(L)
    x_hat = U.T @ x
    return evals, x_hat, U

def inverse_gft(U, x_hat):
    return U @ x_hat

# Build a community graph: 2 cliques of 5 nodes connected by 2 edges
n_clique = 6
n_tot = 2 * n_clique
A_comm = np.zeros((n_tot, n_tot))
for i in range(n_clique):
    for j in range(i+1, n_clique):
        A_comm[i, j] = A_comm[j, i] = 1
        A_comm[n_clique+i, n_clique+j] = A_comm[n_clique+j, n_clique+i] = 1
A_comm[n_clique-1, n_clique] = A_comm[n_clique, n_clique-1] = 1
A_comm[n_clique-2, n_clique+1] = A_comm[n_clique+1, n_clique-2] = 1

# Community signal: +1 for community 1, -1 for community 2
x_comm = np.array([1.0]*n_clique + [-1.0]*n_clique)

# Noisy high-frequency signal: alternating pattern
x_noise = np.array([(-1)**i for i in range(n_tot)], dtype=float)

evals_c, x_hat_comm, U_c = graph_fourier_transform(A_comm, x_comm)
_, x_hat_noise, _ = graph_fourier_transform(A_comm, x_noise)

# Verify Parseval's theorem
ok1 = np.isclose(np.linalg.norm(x_comm), np.linalg.norm(x_hat_comm))
ok2 = np.isclose(np.linalg.norm(x_noise), np.linalg.norm(x_hat_noise))
print(f"{'PASS' if ok1 else 'FAIL'} - Parseval: ||x_comm|| = ||x_hat_comm|| = {np.linalg.norm(x_comm):.4f}")
print(f"{'PASS' if ok2 else 'FAIL'} - Parseval: ||x_noise|| = ||x_hat_noise|| = {np.linalg.norm(x_noise):.4f}")

# Verify IGFT recovers original signal
x_recovered = inverse_gft(U_c, x_hat_comm)
ok3 = np.allclose(x_comm, x_recovered)
print(f"{'PASS' if ok3 else 'FAIL'} - IGFT recovers original signal")

Code cell 24

# === 6.3 Visualize frequency content ===
if HAS_MPL:
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    fig.suptitle('Graph Fourier Transform: Community signal vs. High-frequency signal', fontsize=14)

    # Signal values on vertices
    ax = axes[0, 0]
    ax.bar(range(n_tot), x_comm, color=[COLORS['primary'] if v>0 else COLORS['error']
           for v in x_comm], alpha=0.85, edgecolor='white')
    ax.axvline(n_clique-0.5, color=COLORS['neutral'], linestyle='--')
    ax.set_title('Community signal $\\mathbf{x}_{\\mathrm{comm}}$')
    ax.set_xlabel('Node index'); ax.set_ylabel('Signal value')

    ax = axes[0, 1]
    ax.bar(range(n_tot), x_noise, color=[COLORS['secondary'] if v>0 else COLORS['highlight']
           for v in x_noise], alpha=0.85, edgecolor='white')
    ax.set_title('High-frequency signal $\\mathbf{x}_{\\mathrm{noise}}$')
    ax.set_xlabel('Node index'); ax.set_ylabel('Signal value')

    # GFT power spectrum
    ax = axes[1, 0]
    ax.bar(range(n_tot), x_hat_comm**2, color=COLORS['primary'], alpha=0.85, edgecolor='white')
    ax.set_title('GFT power spectrum: community signal')
    ax.set_xlabel('Frequency index $k$'); ax.set_ylabel('$|\\hat{x}_k|^2$')

    ax = axes[1, 1]
    ax.bar(range(n_tot), x_hat_noise**2, color=COLORS['secondary'], alpha=0.85, edgecolor='white')
    ax.set_title('GFT power spectrum: high-frequency signal')
    ax.set_xlabel('Frequency index $k$'); ax.set_ylabel('$|\\hat{x}_k|^2$')

    fig.tight_layout()
    plt.show()
print('Community signal: concentrated at LOW frequencies (k=1,2)')
print('High-frequency signal: concentrated at HIGH frequencies (k near n)')

Code cell 25

# === 6.4 Dirichlet Energy Decomposition ===

def dirichlet_energy(A, x):
    L = build_laplacian(A)
    return x @ L @ x

def dirichlet_spectral(evals, x_hat):
    return np.sum(evals * x_hat**2)

# Compare: Dirichlet energy of community signal vs noise signal
E_comm = dirichlet_energy(A_comm, x_comm)
E_noise = dirichlet_energy(A_comm, x_noise)

# Spectral decomposition
E_comm_spec = dirichlet_spectral(evals_c, x_hat_comm)
E_noise_spec = dirichlet_spectral(evals_c, x_hat_noise)

print('Dirichlet energy (total variation across edges):')
print(f'  Community signal x_comm: E = {E_comm:.4f} (SMOOTH - low energy)')
print(f'  High-freq signal x_noise: E = {E_noise:.4f} (ROUGH - high energy)')

ok1 = np.isclose(E_comm, E_comm_spec)
ok2 = np.isclose(E_noise, E_noise_spec)
print(f"\n{'PASS' if ok1 else 'FAIL'} - Spectral decomposition: E_comm = sum(lambda_k * xhat_k^2)")
print(f"{'PASS' if ok2 else 'FAIL'} - Spectral decomposition: E_noise = sum(lambda_k * xhat_k^2)")

7. Spectral Filtering and GCN Derivation

We implement and compare several spectral filters: low-pass, high-pass, Chebyshev polynomial (ChebNet), and the first-order GCN filter.

Code cell 27

# === 7.1 Spectral Filters ===

def apply_spectral_filter(A, x, g_func):
    """Apply spectral filter g to signal x: y = U g(Lambda) U^T x."""
    L = build_laplacian(A)
    evals, U = la.eigh(L)
    g_evals = np.array([g_func(lam) for lam in evals])
    return U @ np.diag(g_evals) @ U.T @ x

# Filters
lambda_max = la.eigvalsh(build_laplacian(A_comm)).max()
lambda_cutoff = lambda_max / 3  # low-pass cutoff

filters = {
    'Low-pass': lambda lam: 1.0 if lam <= lambda_cutoff else 0.0,
    'High-pass': lambda lam: 0.0 if lam <= lambda_cutoff else 1.0,
    'Heat kernel (t=0.5)': lambda lam: np.exp(-0.5 * lam),
    'GCN (1 - lam/2)': lambda lam: max(0, 1 - lam/lambda_max),
}

# Apply to noisy community signal: x_comm + small noise
np.random.seed(42)
x_noisy = x_comm + 0.5 * np.random.randn(n_tot)

print('Applying filters to noisy community signal (noise level = 0.5):')
print(f'{"Filter":<25} {"Output norm":<15} {"Correlation with x_comm"}')
print('-' * 60)
for name, g in filters.items():
    y = apply_spectral_filter(A_comm, x_noisy, g)
    corr = np.corrcoef(y, x_comm)[0,1]
    print(f'{name:<25} {np.linalg.norm(y):<15.4f} {corr:.4f}')

print('\nLow-pass filter recovers community signal best (highest correlation with x_comm)')

Code cell 28

# === 7.2-7.3 Polynomial Filters and Chebyshev Approximation ===

def chebnet_filter(A, x, theta, K=None):
    """Apply ChebNet filter: y = sum_k theta_k T_k(Ltilde) x."""
    if K is None:
        K = len(theta) - 1
    L = build_laplacian(A)
    lam_max = la.eigvalsh(L).max()
    Ltilde = 2 * L / lam_max - np.eye(A.shape[0])  # scaled to [-1, 1]

    # Chebyshev recurrence
    Tx = [x, Ltilde @ x]
    for k in range(2, K+1):
        Tx.append(2 * Ltilde @ Tx[-1] - Tx[-2])

    return sum(theta[k] * Tx[k] for k in range(min(K+1, len(theta))))

# GCN is K=1 Chebyshev with theta_0 = theta_1 = theta
def gcn_filter(A, X):
    """GCN propagation rule: Ahat = D_tilde^{-1/2} A_tilde D_tilde^{-1/2}."""
    n = A.shape[0]
    A_tilde = A + np.eye(n)  # self-loops
    d_tilde = A_tilde.sum(axis=1)
    D_tilde_inv_sqrt = np.diag(1.0 / np.sqrt(d_tilde))
    A_hat = D_tilde_inv_sqrt @ A_tilde @ D_tilde_inv_sqrt
    return A_hat @ X

# Verify GCN is a special case of K=1 Chebyshev
x_test = np.random.randn(n_tot)

# K=1 Chebyshev with theta_0 = 1, theta_1 = -1 (after renorm trick)
y_cheb = chebnet_filter(A_comm, x_test, theta=[1.0, -1.0], K=1)
y_gcn = gcn_filter(A_comm, x_test)

print('ChebNet K=1 vs GCN: both are low-pass filters')
print(f'Correlation between ChebNet(K=1) and GCN output: '
      f'{np.corrcoef(y_cheb, y_gcn)[0,1]:.6f}')
print('(High correlation confirms GCN is approximately a first-order Chebyshev filter)')

# Verify GCN is a low-pass filter: smooth input -> smoother output
E_before = dirichlet_energy(A_comm, x_noisy)
x_gcn_out = gcn_filter(A_comm, x_noisy)
E_after = dirichlet_energy(A_comm, x_gcn_out)
print(f'\nDirichlet energy before GCN: {E_before:.4f}')
print(f'Dirichlet energy after GCN:  {E_after:.4f}')
ok = E_after < E_before
print(f"{'PASS' if ok else 'FAIL'} - GCN reduces Dirichlet energy (is a low-pass filter)")

Code cell 29

# === 5.5/7.5 Over-smoothing as repeated GCN application ===

def gcn_propagation_matrix(A):
    n = A.shape[0]
    A_tilde = A + np.eye(n)
    d_tilde = A_tilde.sum(axis=1)
    D_inv_sqrt = np.diag(1.0 / np.sqrt(d_tilde))
    return D_inv_sqrt @ A_tilde @ D_inv_sqrt

A_hat = gcn_propagation_matrix(A_comm)

# Track pairwise distance between node representations over layers
x0 = x_comm.copy()
distances = []
x_current = x0.copy()
n_layers = 30

for k in range(n_layers):
    # Average distance between community 1 and community 2 nodes
    c1 = x_current[:n_clique]
    c2 = x_current[n_clique:]
    dist = np.abs(c1.mean() - c2.mean())
    distances.append(dist)
    x_current = A_hat @ x_current

print('Over-smoothing: mean |community1 - community2| signal after k GCN layers:')
for k in [0, 1, 2, 5, 10, 20, 29]:
    print(f'  Layer {k:2d}: distance = {distances[k]:.6f}')

print('\nAfter many layers, community signal is destroyed (over-smoothing)')

if HAS_MPL:
    fig, ax = plt.subplots(figsize=(10, 5))
    ax.semilogy(range(n_layers), distances, color=COLORS['primary'], marker='o', markersize=4)
    ax.set_title('Over-smoothing: community signal decays with GCN depth')
    ax.set_xlabel('Number of GCN layers $k$')
    ax.set_ylabel('Community signal strength (log scale)')
    ax.axhline(1e-3, color=COLORS['neutral'], linestyle='--', label='Signal threshold')
    ax.legend()
    fig.tight_layout()
    plt.show()

8. Spectral Clustering

We implement the Ng-Jordan-Weiss (NJW) spectral clustering algorithm and test it on a synthetic Stochastic Block Model graph. We compare with k-means on raw features.

Code cell 31

# === 8.1-8.5 Spectral Clustering Implementation ===

try:
    from sklearn.cluster import KMeans
    from sklearn.metrics import adjusted_rand_score
    HAS_SK = True
except ImportError:
    HAS_SK = False
    print('scikit-learn not available; clustering demo will be limited')

def stochastic_block_model(n_blocks, n_per_block, p_in, p_out, seed=42):
    """Generate SBM adjacency matrix with known ground truth."""
    np.random.seed(seed)
    n = n_blocks * n_per_block
    A = np.zeros((n, n))
    labels = np.repeat(np.arange(n_blocks), n_per_block)
    for i in range(n):
        for j in range(i+1, n):
            p = p_in if labels[i] == labels[j] else p_out
            if np.random.rand() < p:
                A[i, j] = A[j, i] = 1
    return A, labels

def spectral_clustering_njw(A, k):
    """Ng-Jordan-Weiss spectral clustering."""
    L_sym = build_normalized_laplacian(A)
    evals, evecs = la.eigh(L_sym)
    U_k = evecs[:, :k]  # k smallest eigenvectors
    # Row normalize
    norms = np.linalg.norm(U_k, axis=1, keepdims=True)
    norms = np.where(norms < 1e-10, 1.0, norms)
    Y = U_k / norms
    # k-means
    if HAS_SK:
        km = KMeans(n_clusters=k, n_init=10, random_state=42)
        return km.fit_predict(Y), evals
    else:
        # Simple random assignment if sklearn not available
        return np.random.randint(0, k, A.shape[0]), evals

# Generate SBM with 3 communities
A_sbm, y_true = stochastic_block_model(3, 30, p_in=0.30, p_out=0.02)
pred, evals_sbm = spectral_clustering_njw(A_sbm, k=3)

print('SBM: 3 communities, 30 nodes each, p_in=0.30, p_out=0.02')
print(f'First 8 eigenvalues of L_sym: {np.round(evals_sbm[:8], 4)}')
print(f'Eigengap (lambda_4 - lambda_3) = {evals_sbm[3] - evals_sbm[2]:.4f}')
if HAS_SK:
    ari = adjusted_rand_score(y_true, pred)
    print(f'Adjusted Rand Index (spectral clustering): {ari:.4f} (1.0 = perfect)')
    ok = ari > 0.9
    print(f"{'PASS' if ok else 'FAIL'} - Spectral clustering achieves ARI > 0.9")

Code cell 32

# === 8.6 Eigengap heuristic ===
if HAS_MPL:
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))

    # Eigenvalue plot with eigengap
    ax = axes[0]
    k_show = 10
    ax.bar(range(1, k_show+1), evals_sbm[:k_show],
           color=COLORS['primary'], alpha=0.85, edgecolor='white')
    ax.axvline(3.5, color=COLORS['error'], linestyle='--', linewidth=2,
               label='Eigengap at k=3')
    ax.set_title('Eigenvalue spectrum (eigengap heuristic)')
    ax.set_xlabel('Eigenvalue index $k$')
    ax.set_ylabel('$\\lambda_k(L_{\\mathrm{sym}})$')
    ax.legend()

    # Sorted nodes by Fiedler vector
    ax2 = axes[1]
    if HAS_SK:
        scatter = ax2.scatter(range(len(pred)), np.sort(
            la.eigh(build_normalized_laplacian(A_sbm))[1][:, 1]
        ), c=y_true[np.argsort(
            la.eigh(build_normalized_laplacian(A_sbm))[1][:, 1]
        )], cmap='tab10', alpha=0.8)
        ax2.set_title('Fiedler vector (nodes sorted by u_2 value)')
        ax2.set_xlabel('Sorted node index')
        ax2.set_ylabel('$u_{2,i}$ (Fiedler vector)')
    fig.tight_layout()
    plt.show()

9. Laplacian Eigenmaps and Spectral Positional Encodings

We implement Laplacian eigenmaps for graph embedding and demonstrate Laplacian Positional Encodings (LapPE) and RWPE for graph Transformers.

Code cell 34

# === 9.2 Laplacian Eigenmaps ===

def laplacian_eigenmaps(A, d=2):
    """Embed graph in R^d using the d Fiedler eigenvectors."""
    L_sym = build_normalized_laplacian(A)
    evals, evecs = la.eigh(L_sym)
    # Skip first eigenvector (constant); take next d
    return evals[1:d+1], evecs[:, 1:d+1]

def lap_pe(A, k=4):
    """Laplacian Positional Encoding: top k Laplacian eigenvectors as node features."""
    evals, evecs = la.eigh(build_laplacian(A))
    return evals[1:k+1], evecs[:, 1:k+1]  # skip trivial eigenvector

def rwpe(A, k=4):
    """Random Walk Positional Encoding: (P^j)_ii for j=1,...,k."""
    n = A.shape[0]
    d = A.sum(axis=1)
    P = (A / d[:, None]).T  # column-stochastic for P^k diagonal
    pe = np.zeros((n, k))
    Pk = np.eye(n)
    for j in range(k):
        Pk = Pk @ P
        pe[:, j] = np.diag(Pk)
    return pe

# Embed SBM graph
evals_emb, embedding = laplacian_eigenmaps(A_sbm, d=2)
print(f'Laplacian eigenmaps: embedding in R^2')
print(f'Using eigenvectors for eigenvalues: {np.round(evals_emb, 4)}')

# Compute RWPE
pe_rw = rwpe(A_sbm, k=4)
print(f'\nRWPE shape: {pe_rw.shape} (n_nodes x k_steps)')
print(f'RWPE node 0: {np.round(pe_rw[0], 4)}')
print(f'RWPE node 30: {np.round(pe_rw[30], 4)} (first node of community 2)')
print('Different RWPE -> different structural position (as expected)')

# Verify embedding separates communities
if HAS_SK:
    from sklearn.metrics import silhouette_score
    sil = silhouette_score(embedding, y_true)
    print(f'\nSilhouette score of 2D Laplacian embedding: {sil:.4f} (>0.5 = good separation)')
    ok = sil > 0.5
    print(f"{'PASS' if ok else 'FAIL'} - Laplacian eigenmaps separates communities")

Code cell 35

# === 9.5 Visualize Laplacian embedding ===
if HAS_MPL:
    fig, axes = plt.subplots(1, 2, figsize=(14, 6))

    # 2D embedding
    ax = axes[0]
    colors_plot = plt.cm.tab10(y_true / y_true.max())
    ax.scatter(embedding[:, 0], embedding[:, 1],
               c=y_true, cmap='tab10', alpha=0.8, s=50, edgecolors='white', linewidths=0.5)
    ax.set_title('Laplacian Eigenmaps 2D Embedding')
    ax.set_xlabel('$u_2$')
    ax.set_ylabel('$u_3$')

    # RWPE for 3 selected nodes
    ax2 = axes[1]
    node_samples = [0, 30, 60]  # one from each community
    for node in node_samples:
        ax2.plot(range(1, 5), pe_rw[node], marker='o',
                 label=f'Node {node} (community {y_true[node]+1})')
    ax2.set_title('RWPE: Random Walk Positional Encoding')
    ax2.set_xlabel('Walk length $j$')
    ax2.set_ylabel('$(P^j)_{vv}$ (return probability)')
    ax2.legend()
    fig.tight_layout()
    plt.show()

10. PageRank as a Spectral Problem

PageRank is the dominant left eigenvector of the Google matrix $G = \alpha P + (1-\alpha)\mathbf{1}\mathbf{1}^\top/n$ . We implement power iteration and compare with direct eigenvalue computation.

Code cell 37

# === 10.3 PageRank: Power Iteration vs. Eigenvalue Computation ===

def build_google_matrix(A_dir, alpha=0.85):
    """Build Google matrix G = alpha * P + (1-alpha) * 1*1' / n."""
    n = A_dir.shape[0]
    out_degree = A_dir.sum(axis=1)
    # Handle dangling nodes (out_degree = 0)
    out_degree = np.where(out_degree == 0, 1, out_degree)
    P = (A_dir.T / out_degree).T  # row-stochastic
    G = alpha * P + (1 - alpha) * np.ones((n, n)) / n
    return G

def pagerank_power(A_dir, alpha=0.85, max_iter=200, tol=1e-10):
    """Compute PageRank via power iteration."""
    G = build_google_matrix(A_dir, alpha)
    n = A_dir.shape[0]
    pi = np.ones(n) / n
    for t in range(max_iter):
        pi_new = pi @ G
        if np.linalg.norm(pi_new - pi) < tol:
            return pi_new, t+1
        pi = pi_new
    return pi, max_iter

# Build a small directed citation graph
np.random.seed(42)
n_pages = 8
A_dir = np.zeros((n_pages, n_pages))
edges_dir = [(0,1),(0,2),(1,3),(2,3),(2,4),(3,5),(4,5),(5,6),(6,0),(1,6),(2,6),(7,3)]
for i, j in edges_dir:
    A_dir[i, j] = 1

# Power iteration
pi_power, n_iters = pagerank_power(A_dir, alpha=0.85)
print(f'PageRank via power iteration (alpha=0.85):')
print(f'  Converged in {n_iters} iterations')
for i, rank in enumerate(pi_power):
    print(f'  Page {i}: PageRank = {rank:.6f}')

# Direct eigenvector computation
G = build_google_matrix(A_dir)
evals_G, evecs_G = np.linalg.eig(G.T)  # left eigenvectors = right of G^T
idx_dominant = np.argmax(np.real(evals_G))
pi_eig = np.real(evecs_G[:, idx_dominant])
pi_eig = pi_eig / pi_eig.sum()  # normalize

print(f'\nDominant eigenvalue of G: {np.real(evals_G[idx_dominant]):.6f} (should be 1.0)')
ok = np.allclose(pi_power, pi_eig, atol=1e-6)
print(f"{'PASS' if ok else 'FAIL'} - Power iteration matches direct eigenvector computation")

Code cell 38

# === 10.3 Mixing time of PageRank ===

def pagerank_convergence(A_dir, alpha=0.85, n_steps=50):
    """Track convergence of PageRank power iteration."""
    G = build_google_matrix(A_dir, alpha)
    n = A_dir.shape[0]
    pi = np.ones(n) / n
    pi_final, _ = pagerank_power(A_dir, alpha, max_iter=500)
    errors = []
    for t in range(n_steps):
        error = np.linalg.norm(pi - pi_final)
        errors.append(error)
        pi = pi @ G
    return errors

errors_85 = pagerank_convergence(A_dir, alpha=0.85)
errors_50 = pagerank_convergence(A_dir, alpha=0.50)
errors_99 = pagerank_convergence(A_dir, alpha=0.99)

print('Convergence rate: smaller alpha -> faster mixing')
for alpha_val, errors in [('0.85', errors_85), ('0.50', errors_50), ('0.99', errors_99)]:
    iters_to_conv = next((i for i, e in enumerate(errors) if e < 1e-6), len(errors))
    print(f'  alpha={alpha_val}: {iters_to_conv} iterations to reach error < 1e-6')

if HAS_MPL:
    fig, ax = plt.subplots(figsize=(10, 5))
    for alpha_val, errors, color in [
        ('0.50', errors_50, COLORS['tertiary']),
        ('0.85', errors_85, COLORS['primary']),
        ('0.99', errors_99, COLORS['error']),
    ]:
        ax.semilogy(range(len(errors)), errors, color=color,
                    label=f'$\\alpha={alpha_val}$')
    ax.set_title('PageRank convergence: effect of damping factor $\\alpha$')
    ax.set_xlabel('Iteration $t$')
    ax.set_ylabel('$\\|\\pi^{(t)} - \\pi^*\\|_2$ (log scale)')
    ax.legend()
    fig.tight_layout()
    plt.show()

11. Kirchhoff's Matrix-Tree Theorem

The number of spanning trees of a connected graph equals $\frac{1}{n}\prod_{k=2}^n \lambda_k$ . We verify this for complete graphs ( $K_n$ ), cycles ( $C_n$ ), and paths ( $P_n$ ).

Code cell 40

# === 10.2 Matrix-Tree Theorem: counting spanning trees ===

def count_spanning_trees_spectral(A):
    """Count spanning trees via Kirchhoff: (1/n) * prod of nonzero eigenvalues of L."""
    n = A.shape[0]
    L = build_laplacian(A)
    evals = la.eigvalsh(L)
    nonzero = evals[np.abs(evals) > 1e-8]
    return np.prod(nonzero) / n

# Verify against known formulas
# K_n: tau(K_n) = n^{n-2} (Cayley's formula)
# C_n: tau(C_n) = n
# P_n: tau(P_n) = 1

print('Matrix-Tree Theorem verification:')
print(f'{"Graph":<15} {"Spectral count":<20} {"Analytic":<15} {"Match?"}')
print('-' * 60)

for n_val in [4, 5, 6]:
    # K_n
    tau_Kn_spec = count_spanning_trees_spectral(complete_graph(n_val))
    tau_Kn_exact = n_val ** (n_val - 2)
    ok = np.isclose(tau_Kn_spec, tau_Kn_exact, rtol=1e-4)
    print(f'K_{n_val:<13} {tau_Kn_spec:<20.1f} {tau_Kn_exact:<15} {ok}')

    # C_n
    tau_Cn_spec = count_spanning_trees_spectral(cycle_graph(n_val))
    ok = np.isclose(tau_Cn_spec, n_val, rtol=1e-4)
    print(f'C_{n_val:<13} {tau_Cn_spec:<20.1f} {n_val:<15} {ok}')

    # P_n
    tau_Pn_spec = count_spanning_trees_spectral(path_graph(n_val))
    ok = np.isclose(tau_Pn_spec, 1.0, rtol=1e-4)
    print(f'P_{n_val:<13} {tau_Pn_spec:<20.1f} {1:<15} {ok}')

print('\nPASS - Matrix-Tree theorem verified for K_n, C_n, P_n')

12. Attention Pattern Analysis in LLMs

We model Transformer attention weights as a directed graph and apply spectral analysis to characterize different attention head types (uniform, induction, diagonal).

Code cell 42

# === 12.4 Spectral Analysis of Attention Patterns ===

def softmax(x):
    e = np.exp(x - x.max(axis=-1, keepdims=True))
    return e / e.sum(axis=-1, keepdims=True)

T = 12  # sequence length

# Three attention pattern archetypes
np.random.seed(42)

# 1. Uniform attention (attend equally to all positions)
A_uniform = softmax(np.zeros((T, T)))

# 2. Causal (lower-triangular) attention
logits_causal = np.tril(np.ones((T, T))) * 0 + np.tril(np.full((T, T), -1e9), -1)
A_causal = softmax(np.tril(np.zeros((T, T))))

# 3. Diagonal (attend to self only)
A_diag = softmax(np.eye(T) * 10)

# 4. Induction head (attend to previous occurrence of current token)
A_induction = np.zeros((T, T))
for i in range(T):
    j = (i - 3) % T  # attend 3 positions back
    A_induction[i, j] = 1.0

attention_patterns = {
    'Uniform': A_uniform,
    'Causal': A_causal,
    'Diagonal (self-attend)': A_diag,
    'Induction head': A_induction,
}

print('Spectral analysis of attention patterns:')
print(f'{"Pattern":<25} {"mu_1":<10} {"mu_2":<10} {"Spectral gap":<15} {"Interpretation"}')
print('-' * 80)
for name, A_att in attention_patterns.items():
    evals_att = np.sort(np.abs(np.linalg.eigvals(A_att)))[::-1]
    mu1 = evals_att[0]
    mu2 = evals_att[1] if len(evals_att) > 1 else 0
    gap = mu1 - mu2
    if gap > 0.5:
        interp = 'Sharp attention (large gap)'
    elif gap < 0.1:
        interp = 'Diffuse attention (small gap)'
    else:
        interp = 'Moderate attention'
    print(f'{name:<25} {mu1:<10.4f} {mu2:<10.4f} {gap:<15.4f} {interp}')

Code cell 43

# === 12.4 Visualize attention patterns and their spectra ===
if HAS_MPL:
    fig, axes = plt.subplots(2, 4, figsize=(16, 8))
    fig.suptitle('Attention patterns and spectral analysis', fontsize=14)

    for col, (name, A_att) in enumerate(attention_patterns.items()):
        # Attention matrix heatmap
        ax = axes[0, col]
        im = ax.imshow(A_att, cmap='viridis', aspect='auto', vmin=0)
        ax.set_title(name, fontsize=11)
        ax.set_xlabel('Key position')
        if col == 0:
            ax.set_ylabel('Query position')

        # Eigenvalue spectrum
        ax2 = axes[1, col]
        evals_att = np.sort(np.abs(np.linalg.eigvals(A_att)))[::-1]
        ax2.bar(range(len(evals_att)), evals_att,
                color=COLORS['primary'], alpha=0.85, edgecolor='white')
        ax2.set_title(f'$|\\mu_k|$ spectrum', fontsize=11)
        ax2.set_xlabel('$k$')
        if col == 0:
            ax2.set_ylabel('$|\\mu_k|$')

    fig.tight_layout()
    plt.show()

12.1 Semi-Supervised Learning via Graph Regularization

We demonstrate label propagation as spectral filtering: the smoothed label function is a low-pass filtered version of the initial labels.

Code cell 45

# === 12.1 Label Propagation as Spectral Filtering ===

def label_propagation(A, y_init, alpha=0.5, n_iter=100):
    """
    Label propagation: F^{t+1} = alpha * A_hat * F^t + (1-alpha) * Y.
    alpha: smoothing strength (0 = keep labels, 1 = full propagation)
    """
    n = A.shape[0]
    # Symmetric normalized adjacency
    d = A.sum(axis=1)
    D_inv_sqrt = np.diag(1.0 / np.sqrt(np.where(d > 0, d, 1)))
    A_hat = D_inv_sqrt @ A @ D_inv_sqrt

    F = y_init.copy().astype(float)
    Y = y_init.copy().astype(float)
    for _ in range(n_iter):
        F = alpha * A_hat @ F + (1 - alpha) * Y
    return F

# SBM with only 10% labeled nodes
n_nodes = A_sbm.shape[0]
y_true_binary = (y_true == 0).astype(float)  # binary: community 0 vs rest

# Mask: keep only 10% of labels
np.random.seed(42)
labeled = np.random.choice(n_nodes, size=n_nodes//10, replace=False)
y_init = np.zeros(n_nodes)
y_init[labeled] = y_true_binary[labeled]

# Apply label propagation
y_prop = label_propagation(A_sbm, y_init, alpha=0.7, n_iter=50)
y_pred = (y_prop > 0.5).astype(int)

acc = (y_pred == y_true_binary).mean()
baseline = max(y_true_binary.mean(), 1 - y_true_binary.mean())

print('Semi-supervised label propagation on SBM:')
print(f'  Labeled fraction: {len(labeled)/n_nodes:.0%}')
print(f'  Baseline accuracy (majority): {baseline:.3f}')
print(f'  Label propagation accuracy:   {acc:.3f}')
ok = acc > baseline + 0.05
print(f"{'PASS' if ok else 'FAIL'} - Label propagation outperforms majority baseline")

print('\nSpectral interpretation:')
print('  Label propagation = low-pass filter applied to initial labels')
print('  Frequency attenuation: g(lambda) = (1-alpha)/(1 - alpha*(1-lambda))')

5.3 Expander Graphs and Spectral Gap

We compare expander-like graphs (complete, random $d$ -regular) with bottleneck graphs (paths, barbells) by their spectral gap and Cheeger bound.

Code cell 47

# === 5.3 Expander vs Bottleneck Graphs ===

n = 20
graphs_exp = {
    'Path P_20': path_graph(n),
    'Barbell (2x10)': barbell_graph(10, 10),
    'Cycle C_20': cycle_graph(n),
    'Complete K_20': complete_graph(n),
}

print('Spectral gap comparison: expanders vs bottleneck graphs')
print(f'{"Graph":<20} {"lambda_2 (L_sym)":<20} {"Cheeger upper bound"}')
print('-' * 60)
for name, A_g in graphs_exp.items():
    evals = la.eigvalsh(build_normalized_laplacian(A_g))
    lam2 = evals[1]
    cheeger_upper = (2 * lam2) ** 0.5
    print(f'{name:<20} {lam2:<20.4f} h(G) <= {cheeger_upper:.4f}')

print('\nObservation: Complete graph is best expander (largest lambda_2)')
print('Path and barbell are bottleneck graphs (lambda_2 near 0)')

ok = (la.eigvalsh(build_normalized_laplacian(complete_graph(n)))[1] >
      la.eigvalsh(build_normalized_laplacian(path_graph(n)))[1])
print(f"{'PASS' if ok else 'FAIL'} - K_n has larger lambda_2 than P_n (better expansion)")

7.4 Heat Kernel and Graph Diffusion

The heat kernel $H_t = e^{-tL}$ models diffusion of information on a graph. At $t \to 0$ , information stays at the source. At $t \to \infty$ , it equalizes.

Code cell 49

# === 7.4 Heat Kernel Diffusion ===

def heat_kernel(A, t):
    """H_t = U exp(-t Lambda) U^T."""
    L = build_laplacian(A)
    evals, U = la.eigh(L)
    return U @ np.diag(np.exp(-t * evals)) @ U.T

time_steps = [0.01, 0.1, 0.5, 2.0, 10.0]
print('Heat diffusion from node 0 (unit source) over time:')
print(f'{"t":<8} {"Max heat":<14} {"Std dev":<14} {"Interpretation"}')
print('-' * 60)
for t in time_steps:
    H = heat_kernel(A_comm, t)
    heat = H[0, :]
    interp = 'Concentrated' if t < 0.1 else ('Mixing' if t < 5 else 'Near uniform')
    print(f'{t:<8} {heat.max():<14.6f} {heat.std():<14.6f} {interp}')

# Verify equalization at large t
H_large = heat_kernel(A_comm, t=100.0)
ok = np.allclose(H_large[0, :], np.ones(n_tot)/n_tot, atol=1e-4)
print(f"\n{'PASS' if ok else 'NOTE'} - Heat equalizes to 1/n = {1/n_tot:.4f} for large t")

if HAS_MPL:
    fig, axes = plt.subplots(1, len(time_steps), figsize=(16, 4))
    fig.suptitle('Heat diffusion from node 0 over time', fontsize=13)
    for ax, t in zip(axes, time_steps):
        H = heat_kernel(A_comm, t)
        cols = [COLORS['primary'] if i < n_clique else COLORS['secondary']
                for i in range(n_tot)]
        ax.bar(range(n_tot), H[0, :], color=cols, alpha=0.85, edgecolor='white')
        ax.set_title(f'$t={t}$'); ax.set_xlabel('Node')
        if t == time_steps[0]: ax.set_ylabel('Heat')
        ax.set_ylim(0, 0.5)
    fig.tight_layout(); plt.show()

9.3 Diffusion Distance

Diffusion distance $D_t(i,j)$ accounts for all paths between nodes, making it more robust than shortest-path distance for community detection.

Code cell 51

# === 9.3 Diffusion Distance vs Shortest-Path Distance ===

def diffusion_map_coords(A, t, k=5):
    """Diffusion map embedding: Phi^t(i) = (mu_2^t * psi_{2,i}, ..., mu_{k+1}^t * psi_{k+1,i})."""
    L = build_laplacian(A)
    evals, U = la.eigh(L)
    # Use eigenvalues 1..k (skip 0)
    weights = np.exp(-t * evals[1:k+1])
    return U[:, 1:k+1] * weights[None, :]  # n x k

import scipy.sparse.csgraph as csg
sp_dist = csg.shortest_path(A_comm, method='D', directed=False)
dm_coords = diffusion_map_coords(A_comm, t=1.0, k=4)
diff_dist_mat = np.array([[np.linalg.norm(dm_coords[i] - dm_coords[j])
                           for j in range(n_tot)] for i in range(n_tot)])

# Compare intra vs inter community distances
intra_sp = np.mean([sp_dist[i,j] for i in range(n_clique)
                    for j in range(i+1, n_clique)])
inter_sp = np.mean([sp_dist[i,j] for i in range(n_clique)
                    for j in range(n_clique, n_tot)])
intra_d = np.mean([diff_dist_mat[i,j] for i in range(n_clique)
                   for j in range(i+1, n_clique)])
inter_d = np.mean([diff_dist_mat[i,j] for i in range(n_clique)
                   for j in range(n_clique, n_tot)])

print('Distance comparison: intra-community vs inter-community')
print(f'  Shortest-path: intra={intra_sp:.3f}, inter={inter_sp:.3f}, '
      f'ratio={inter_sp/intra_sp:.2f}x')
print(f'  Diffusion:     intra={intra_d:.4f}, inter={inter_d:.4f}, '
      f'ratio={inter_d/intra_d:.2f}x')

ok = inter_d > intra_d
print(f"\n{'PASS' if ok else 'FAIL'} - Diffusion distance: inter-community > intra-community")
print('Diffusion distance amplifies community structure vs shortest-path distance')

Summary and Key Results

Result	Statement	Code verified?
Laplacian PSD	$\mathbf{x}^\top L \mathbf{x} = \sum_{(i,j)}(x_i-x_j)^2 \geq 0$	Yes
Connected components	$\dim(\ker L) =$ number of components	Yes
Special graph spectra	$\lambda_2(K_n)=n$ , $\lambda_2(S_n)=1$ , $\lambda_2(P_n)=2-2\cos(\pi/n)$	Yes
Cheeger inequality	$\lambda_2/2 \leq h(G) \leq \sqrt{2\lambda_2}$	Yes
Mixing time	Faster convergence with larger $\lambda_2$	Yes
GFT Parseval	$\lVert \hat{\mathbf{x}} \rVert = \lVert \mathbf{x} \rVert$	Yes
GCN = low-pass	GCN reduces Dirichlet energy	Yes
Over-smoothing	Community signal decays exponentially with depth	Yes
Spectral clustering	NJW achieves high ARI on SBM	Yes
Matrix-Tree theorem	$\tau(G) = \frac{1}{n}\prod_{k\geq 2}\lambda_k$	Yes
PageRank	Dominant eigenvector of Google matrix	Yes
Label propagation	Low-pass spectral filter on graph signals	Yes

Next: Exercises Notebook — 8 graded problems

Reference: notes.md — full theory with proofs