Theory Notebook

Converted from theory.ipynb for web reading.

Random Graphs — Theory Notebook

"In the random graph, order emerges from chaos — the giant component appears suddenly, like a phase transition in physics, when the average degree crosses one."

Interactive simulations covering: Erdős-Rényi phase transitions, degree distributions, Watts-Strogatz small-world, Barabási-Albert scale-free, Stochastic Block Model community detection, Wigner semicircle law, and graphon estimation.

Companion: notes.md | exercises.ipynb

Code cell 2

import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

try:
    import seaborn as sns
    sns.set_theme(style="whitegrid", palette="colorblind")
    HAS_SNS = True
except ImportError:
    plt.style.use("seaborn-v0_8-whitegrid")
    HAS_SNS = False

mpl.rcParams.update({
    "figure.figsize":    (10, 6),
    "figure.dpi":         120,
    "font.size":           13,
    "axes.titlesize":      15,
    "axes.labelsize":      13,
    "xtick.labelsize":     11,
    "ytick.labelsize":     11,
    "legend.fontsize":     11,
    "legend.framealpha":   0.85,
    "lines.linewidth":      2.0,
    "axes.spines.top":     False,
    "axes.spines.right":   False,
    "savefig.bbox":       "tight",
    "savefig.dpi":         150,
})
np.random.seed(42)
print("Plot setup complete.")

Code cell 3

import numpy as np
import scipy.linalg as la
import scipy.sparse as sp
from scipy.optimize import brentq

try:
    import matplotlib.pyplot as plt
    import matplotlib as mpl
    try:
        import seaborn as sns
        sns.set_theme(style='whitegrid', palette='colorblind')
        HAS_SNS = True
    except ImportError:
        plt.style.use('seaborn-v0_8-whitegrid')
        HAS_SNS = False
    mpl.rcParams.update({
        'figure.figsize': (10, 6), 'figure.dpi': 120,
        'font.size': 13, 'axes.titlesize': 15, 'axes.labelsize': 13,
        'xtick.labelsize': 11, 'ytick.labelsize': 11,
        'legend.fontsize': 11, 'lines.linewidth': 2.0,
        'axes.spines.top': False, 'axes.spines.right': False,
    })
    HAS_MPL = True
except ImportError:
    HAS_MPL = False

COLORS = {
    'primary':   '#0077BB',
    'secondary': '#EE7733',
    'tertiary':  '#009988',
    'error':     '#CC3311',
    'neutral':   '#555555',
    'highlight': '#EE3377',
}

np.set_printoptions(precision=6, suppress=True)
np.random.seed(42)
print('Setup complete.')

1. Erdős-Rényi Model: Sampling and Degree Distribution

We build $G(n,p)$ from scratch, verify the Poisson degree distribution, and study how the graph transitions from subcritical to supercritical.

Code cell 5

# === 1.1 G(n,p) Sampler ===

def gnp(n, p, seed=None):
    """Sample G(n,p) adjacency matrix."""
    rng = np.random.default_rng(seed)
    # Upper triangle + symmetrize
    upper = rng.random((n, n))
    A = (upper < p).astype(float)
    A = np.triu(A, k=1)
    A = A + A.T
    return A

# Test at different regimes
n = 500
for c in [0.5, 1.0, 2.0, 3.0]:
    A = gnp(n, c/n, seed=42)
    m = int(A.sum() // 2)
    deg = A.sum(axis=1)
    print(f'c={c:.1f}: edges={m}, avg_deg={deg.mean():.3f} (theory={c:.1f}), '
          f'max_deg={int(deg.max())}')

print('\nG(n,p) sampler verified.')

Code cell 6

# === 1.2 Poisson Degree Distribution Verification ===

from scipy.stats import poisson

n, c = 5000, 5.0
p = c / n
A = gnp(n, p, seed=42)
degrees = A.sum(axis=1).astype(int)

# Empirical degree distribution
k_max = int(degrees.max()) + 1
empirical = np.bincount(degrees, minlength=k_max) / n

# Theoretical Poisson(c)
k_range = np.arange(k_max)
theoretical = poisson.pmf(k_range, mu=c)

# Compare
print(f'n={n}, c={c}')
print(f'Mean degree: empirical={degrees.mean():.3f}, theoretical={c:.3f}')
print(f'Variance:    empirical={degrees.var():.3f}, theoretical={c:.3f} (Poisson: mean=var)')
print(f'Max degree:  empirical={degrees.max()}, theoretical approx {int(np.log(n)/np.log(np.log(n)+1))}')

# KL divergence (empirical || theoretical)
mask = (empirical > 0) & (theoretical > 0)
kl = np.sum(empirical[mask] * np.log(empirical[mask] / theoretical[mask]))
print(f'KL divergence (empirical || Poisson): {kl:.4f}')

ok = abs(degrees.mean() - c) < 0.1 and degrees.var() < c * 1.2
print(f"{'PASS' if ok else 'FAIL'} - degree distribution close to Poisson({c})")

Code cell 7

# === 1.3 Degree Distribution Visualization ===

if HAS_MPL:
    fig, ax = plt.subplots(figsize=(10, 5))
    k_plot = np.arange(0, min(k_max, 20))
    width = 0.4
    ax.bar(k_plot - width/2, empirical[:len(k_plot)],
           width=width, color=COLORS['primary'], alpha=0.8, label='Empirical')
    ax.bar(k_plot + width/2, poisson.pmf(k_plot, c),
           width=width, color=COLORS['secondary'], alpha=0.8, label=f'Poisson({c})')
    ax.set_title(f'Degree Distribution: G(n,p) with n={n}, c={c}')
    ax.set_xlabel('Degree k')
    ax.set_ylabel('Probability P(deg = k)')
    ax.legend()
    plt.tight_layout()
    plt.show()
    print('Plot displayed.')

2. Giant Component Phase Transition

Simulate the phase transition at $c = 1$ and verify the theoretical prediction $\beta = 1 - e^{-c\beta}$ .

Code cell 9

# === 2.1 Giant Component via BFS ===

def largest_component_sizes(A):
    """Return (L1, L2): sizes of largest and 2nd largest components."""
    n = A.shape[0]
    visited = np.zeros(n, dtype=bool)
    component_sizes = []
    for start in range(n):
        if visited[start]:
            continue
        # BFS
        queue = [start]
        visited[start] = True
        size = 0
        while queue:
            v = queue.pop()
            size += 1
            neighbors = np.where(A[v] > 0)[0]
            for u in neighbors:
                if not visited[u]:
                    visited[u] = True
                    queue.append(u)
        component_sizes.append(size)
    component_sizes.sort(reverse=True)
    L1 = component_sizes[0] if component_sizes else 0
    L2 = component_sizes[1] if len(component_sizes) > 1 else 0
    return L1, L2

def beta_theory(c):
    """Solve beta = 1 - exp(-c*beta) numerically."""
    if c <= 1:
        return 0.0
    # Find root of f(beta) = beta - 1 + exp(-c*beta) in (0,1)
    return brentq(lambda b: b - 1 + np.exp(-c*b), 1e-10, 1.0 - 1e-10)

# Sweep c values
n = 1000
c_vals = np.linspace(0.3, 3.5, 25)
L1_fracs = []
L2_fracs = []

np.random.seed(42)
for c in c_vals:
    A = gnp(n, c/n)
    L1, L2 = largest_component_sizes(A)
    L1_fracs.append(L1/n)
    L2_fracs.append(L2/n)

beta_theory_vals = [beta_theory(c) for c in c_vals]

# Verify at c=2
beta_2 = beta_theory(2.0)
print(f'Theoretical beta(c=2.0) = {beta_2:.4f}')
c2_idx = np.argmin(abs(c_vals - 2.0))
print(f'Simulated  L1/n(c=2.0) = {L1_fracs[c2_idx]:.4f}')

ok = abs(L1_fracs[c2_idx] - beta_2) < 0.05
print(f"{'PASS' if ok else 'FAIL'} - simulated giant component matches theory")

Code cell 10

# === 2.2 Phase Transition Visualization ===

if HAS_MPL:
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))

    # Left: L1/n vs c with theory overlay
    ax = axes[0]
    ax.plot(c_vals, L1_fracs, 'o', color=COLORS['primary'], ms=6, label='Simulated $L_1/n$')
    ax.plot(c_vals, beta_theory_vals, '-', color=COLORS['error'], lw=2,
            label='Theory $\\beta(c)$')
    ax.plot(c_vals, L2_fracs, 's', color=COLORS['tertiary'], ms=5, alpha=0.7,
            label='Simulated $L_2/n$')
    ax.axvline(1.0, color=COLORS['neutral'], ls='--', alpha=0.7, label='$c=1$ (threshold)')
    ax.set_title('Giant Component Phase Transition')
    ax.set_xlabel('Average degree $c = np$')
    ax.set_ylabel('Fraction of nodes')
    ax.legend(fontsize=10)

    # Right: Log-scale of L2 (peaks at criticality)
    ax = axes[1]
    L2_sizes = [l2 * n for l2 in L2_fracs]
    ax.semilogy(c_vals, [max(s, 0.5) for s in L2_sizes],
                'o-', color=COLORS['secondary'], ms=5)
    ax.axvline(1.0, color=COLORS['neutral'], ls='--', alpha=0.7, label='$c=1$')
    ax.set_title('Second-Largest Component Size (log scale)')
    ax.set_xlabel('Average degree $c$')
    ax.set_ylabel('$L_2$ (log scale)')
    ax.legend()

    fig.tight_layout()
    plt.show()
    print('Plot displayed.')

3. Connectivity Threshold

Verify $p^* = \ln(n)/n$ for graph connectivity and the Gumbel limit for isolated vertex count.

Code cell 12

# === 3.1 Connectivity Threshold Experiment ===

def is_connected(A):
    """Check connectivity via BFS from node 0."""
    n = A.shape[0]
    visited = np.zeros(n, dtype=bool)
    queue = [0]
    visited[0] = True
    while queue:
        v = queue.pop()
        for u in np.where(A[v] > 0)[0]:
            if not visited[u]:
                visited[u] = True
                queue.append(u)
    return visited.all()

# Sweep p around log(n)/n
n = 300
n_trials = 40
c_vals_conn = np.linspace(0.3, 3.0, 20)  # p = c * log(n)/n
p_vals = c_vals_conn * np.log(n) / n

conn_probs = []
np.random.seed(42)
for p in p_vals:
    connected_count = sum(is_connected(gnp(n, p)) for _ in range(n_trials))
    conn_probs.append(connected_count / n_trials)

# Theoretical prediction: P[connected] → exp(-exp(-c)) for p = (ln n + c)/n
# Here c_vals_conn = (ln n) * c_scale, c in paper = (p*n - ln n)
# At c_vals_conn = 1: p = ln(n)/n → threshold
threshold_idx = np.argmin(abs(np.array(conn_probs) - 0.5))
print(f'50% connectivity at c_scale = {c_vals_conn[threshold_idx]:.2f} (theory: ~1.0)')
print(f'Connectivity probabilities near threshold:')
for i, (c, prob) in enumerate(zip(c_vals_conn, conn_probs)):
    if 0.3 <= c <= 2.5:
        print(f'  c_scale={c:.2f}: P[connected]={prob:.2f}')

ok = conn_probs[-1] > 0.9 and conn_probs[0] < 0.1
print(f"\n{'PASS' if ok else 'FAIL'} - connectivity threshold confirmed near log(n)/n")

4. Watts-Strogatz Small-World Model

Build the WS model and trace how clustering $C(\beta)$ and path length $L(\beta)$ evolve as rewiring probability $\beta$ increases.

Code cell 14

# === 4.1 Watts-Strogatz Construction ===

def watts_strogatz(n, k, beta, seed=None):
    """Build Watts-Strogatz small-world graph.
    n: number of nodes
    k: each node connected to k nearest neighbors (k must be even)
    beta: rewiring probability
    """
    rng = np.random.default_rng(seed)
    A = np.zeros((n, n))
    # Ring lattice
    for i in range(n):
        for j in range(1, k//2 + 1):
            u = (i + j) % n
            A[i, u] = 1
            A[u, i] = 1
    # Rewire
    for i in range(n):
        for j in range(1, k//2 + 1):
            if rng.random() < beta:
                u = (i + j) % n
                # Remove edge (i, u)
                A[i, u] = 0
                A[u, i] = 0
                # Find new target (not i, not existing neighbor)
                candidates = [v for v in range(n) if v != i and A[i, v] == 0]
                if candidates:
                    v_new = rng.choice(candidates)
                    A[i, v_new] = 1
                    A[v_new, i] = 1
    return A

def clustering_coefficient(A):
    """Average local clustering coefficient."""
    n = A.shape[0]
    C = []
    for v in range(n):
        neighbors = np.where(A[v] > 0)[0]
        kv = len(neighbors)
        if kv < 2:
            C.append(0.0)
            continue
        # Count edges among neighbors
        sub = A[np.ix_(neighbors, neighbors)]
        triangles = sub.sum() / 2
        C.append(triangles / (kv * (kv - 1) / 2))
    return np.mean(C)

def avg_path_length_bfs(A, sample=100):
    """Estimate average path length via BFS from random sample of nodes."""
    n = A.shape[0]
    sources = np.random.choice(n, min(sample, n), replace=False)
    total, count = 0, 0
    for s in sources:
        dist = [-1] * n
        dist[s] = 0
        queue = [s]
        while queue:
            v = queue.pop(0)
            for u in np.where(A[v] > 0)[0]:
                if dist[u] == -1:
                    dist[u] = dist[v] + 1
                    queue.append(u)
        reached = [d for d in dist if d > 0]
        total += sum(reached)
        count += len(reached)
    return total / count if count > 0 else float('inf')

# Quick test
n_ws, k_ws = 200, 10
A_ring = watts_strogatz(n_ws, k_ws, beta=0.0, seed=42)
C_ring = clustering_coefficient(A_ring)
L_ring = avg_path_length_bfs(A_ring, sample=50)

theory_C = 3*(k_ws-2) / (4*(k_ws-1))
theory_L = n_ws / (2*k_ws)
print(f'Ring lattice (beta=0):')
print(f'  C = {C_ring:.4f} (theory {theory_C:.4f})')
print(f'  L = {L_ring:.2f}  (theory {theory_L:.1f})')

ok_C = abs(C_ring - theory_C) < 0.05
ok_L = abs(L_ring - theory_L) < theory_L * 0.15
print(f"{'PASS' if ok_C and ok_L else 'FAIL'} - ring lattice C and L match theory")

Code cell 15

# === 4.2 Small-World Parameter Sweep ===

n_ws, k_ws = 200, 10
beta_vals = [0, 0.001, 0.005, 0.01, 0.05, 0.1, 0.3, 0.5, 1.0]
C_vals_ws = []
L_vals_ws = []

np.random.seed(42)
for beta in beta_vals:
    A_ws = watts_strogatz(n_ws, k_ws, beta, seed=int(beta*1000))
    C_vals_ws.append(clustering_coefficient(A_ws))
    L_vals_ws.append(avg_path_length_bfs(A_ws, sample=50))

C0, L0 = C_vals_ws[0], L_vals_ws[0]
print('Small-world parameter sweep:')
print(f'{"beta":>8}  {"C(beta)":>8}  {"C/C0":>6}  {"L(beta)":>8}  {"L/L0":>6}')
for b, C, L in zip(beta_vals, C_vals_ws, L_vals_ws):
    print(f'{b:8.3f}  {C:8.4f}  {C/C0:6.3f}  {L:8.2f}  {L/L0:6.3f}')

# Identify small-world regime
small_world_betas = [
    b for b, C, L in zip(beta_vals, C_vals_ws, L_vals_ws)
    if C > 0.5 * C0 and L < 0.5 * L0
]
print(f'\nSmall-world regime (C > 0.5*C0 AND L < 0.5*L0): beta in {small_world_betas}')

Code cell 16

# === 4.3 Small-World Visualization ===

if HAS_MPL:
    fig, ax = plt.subplots(figsize=(10, 6))
    beta_plot = beta_vals[1:]  # Skip beta=0 for log-scale x-axis
    C_norm = [C/C0 for C in C_vals_ws[1:]]
    L_norm = [L/L0 for L in L_vals_ws[1:]]

    ax.semilogx(beta_plot, C_norm, 'o-', color=COLORS['primary'],
                ms=8, lw=2, label='$C(\\beta)/C(0)$ — Clustering')
    ax.semilogx(beta_plot, L_norm, 's--', color=COLORS['secondary'],
                ms=8, lw=2, label='$L(\\beta)/L(0)$ — Path length')

    # Theory line for clustering
    beta_fine = np.logspace(-3, 0, 100)
    C_theory_norm = (1 - beta_fine)**3
    ax.semilogx(beta_fine, C_theory_norm, ':', color=COLORS['primary'],
                alpha=0.5, lw=1.5, label='$(1-\\beta)^3$ (theory)')

    ax.axvspan(0.01, 0.1, alpha=0.1, color=COLORS['tertiary'],
               label='Small-world regime')
    ax.set_title('Watts-Strogatz: Clustering vs Path Length')
    ax.set_xlabel('Rewiring probability $\\beta$ (log scale)')
    ax.set_ylabel('Normalized value')
    ax.legend(loc='lower left', fontsize=10)
    ax.set_ylim(-0.05, 1.1)
    plt.tight_layout()
    plt.show()
    print('Small-world plot displayed.')

5. Barabási-Albert Scale-Free Model

Implement preferential attachment, verify the power-law degree distribution $P(k) \propto k^{-3}$ , and compare to ER.

Code cell 18

# === 5.1 Barabási-Albert Preferential Attachment ===

def barabasi_albert(n, m, seed=None):
    """BA preferential attachment model.
    n: final number of nodes
    m: edges per new node
    Returns adjacency matrix.
    """
    rng = np.random.default_rng(seed)
    # Start with m+1 fully connected nodes
    A = np.zeros((n, n))
    m0 = m + 1
    for i in range(m0):
        for j in range(i+1, m0):
            A[i, j] = 1
            A[j, i] = 1

    # Degree sequence for sampling
    degrees = A[:m0].sum(axis=1).tolist() + [0.0] * (n - m0)

    for new_node in range(m0, n):
        # Sample m targets proportional to degree
        d_arr = np.array(degrees[:new_node])
        total = d_arr.sum()
        if total == 0:
            probs = np.ones(new_node) / new_node
        else:
            probs = d_arr / total
        targets = rng.choice(new_node, size=m, replace=False, p=probs)
        for t in targets:
            A[new_node, t] = 1
            A[t, new_node] = 1
            degrees[new_node] += 1
            degrees[t] += 1

    return A

# Generate BA graph
n_ba, m_ba = 1000, 3
A_ba = barabasi_albert(n_ba, m_ba, seed=42)
deg_ba = A_ba.sum(axis=1).astype(int)

print(f'BA graph: n={n_ba}, m={m_ba}')
print(f'Average degree: {deg_ba.mean():.2f} (theory: {2*m_ba:.0f})')
print(f'Max degree: {deg_ba.max()} (theory ~sqrt(n)*m = {int(np.sqrt(n_ba)*m_ba)})')
print(f'Min degree: {deg_ba.min()}')

# Degree distribution tail
k_min = 5
tail_k = deg_ba[deg_ba >= k_min]
gamma_mle = 1 + len(tail_k) * (np.log(tail_k / (k_min - 0.5))).sum()**(-1)
print(f'\nMLE exponent gamma = {gamma_mle:.3f} (theory: 3.0)')

ok = abs(gamma_mle - 3.0) < 0.5
print(f"{'PASS' if ok else 'FAIL'} - power law exponent close to 3")

Code cell 19

# === 5.2 Scale-Free vs Poisson Degree Distribution ===

if HAS_MPL:
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))

    # Generate ER with same average degree
    avg_deg_ba = deg_ba.mean()
    A_er = gnp(n_ba, avg_deg_ba/n_ba, seed=42)
    deg_er = A_er.sum(axis=1).astype(int)

    # Left: linear scale
    ax = axes[0]
    k_max_plot = 30
    bins_ba = np.bincount(deg_ba, minlength=k_max_plot+1)[:k_max_plot+1] / n_ba
    bins_er = np.bincount(deg_er, minlength=k_max_plot+1)[:k_max_plot+1] / n_ba
    k_arr = np.arange(k_max_plot+1)
    ax.bar(k_arr - 0.2, bins_ba, width=0.4, color=COLORS['primary'],
           alpha=0.8, label=f'BA (m={m_ba})')
    ax.bar(k_arr + 0.2, bins_er, width=0.4, color=COLORS['secondary'],
           alpha=0.8, label=f'ER (c={avg_deg_ba:.1f})')
    ax.set_title('Degree Distribution (linear scale)')
    ax.set_xlabel('Degree k')
    ax.set_ylabel('P(k)')
    ax.legend()

    # Right: log-log with power law fit
    ax = axes[1]
    k_range = np.arange(1, deg_ba.max()+1)
    ba_counts = np.bincount(deg_ba, minlength=len(k_range)+1)[1:len(k_range)+1]
    mask = ba_counts > 0
    ax.loglog(k_range[mask], ba_counts[mask]/n_ba, 'o',
              color=COLORS['primary'], ms=5, alpha=0.7, label='BA empirical')

    # Power law fit line
    k_fit = k_range[k_range >= k_min]
    C_fit = 2*m_ba**2
    ax.loglog(k_fit, C_fit/k_fit**3, '-', color=COLORS['error'], lw=2,
              label=f'$k^{{-3}}$ theory')

    ax.set_title('Degree Distribution (log-log scale)')
    ax.set_xlabel('Degree k')
    ax.set_ylabel('P(k)')
    ax.legend()

    fig.tight_layout()
    plt.show()
    print('Scale-free plot displayed.')

6. Stochastic Block Model and Community Detection

Generate SBM graphs, apply spectral clustering, and verify the Kesten-Stigum threshold for 2-block community detection.

Code cell 21

# === 6.1 SBM Generator ===

def stochastic_block_model(n, k, p_in, p_out, seed=None):
    """Symmetric k-block SBM with equal community sizes.
    p_in: within-block edge probability
    p_out: between-block edge probability
    Returns (A, labels)
    """
    rng = np.random.default_rng(seed)
    # Assign equal communities
    labels = np.repeat(np.arange(k), n // k)
    n = len(labels)  # Adjust for divisibility
    A = np.zeros((n, n))
    for i in range(n):
        for j in range(i+1, n):
            prob = p_in if labels[i] == labels[j] else p_out
            if rng.random() < prob:
                A[i, j] = 1
                A[j, i] = 1
    return A, labels

def kesten_stigum_snr(a, b):
    """SNR = (a-b)^2 / (2(a+b)) for sparse SBM p=a/n, q=b/n."""
    return (a - b)**2 / (2 * (a + b))

# Test three regimes
n_sbm = 400
k_sbm = 2

test_cases = [
    (20, 5, 'Well above KS'),
    (10, 5, 'Near KS (boundary: SNR=25/30~0.83)'),
    (6, 4, 'Below KS'),
]

print('Kesten-Stigum analysis:')
print(f'{"(a,b)":>10}  {"SNR":>6}  {"KS threshold":>14}  {"Detectable?":>12}')
for a, b, desc in test_cases:
    snr = kesten_stigum_snr(a, b)
    detectable = snr > 1.0
    print(f'({a},{b}){"":>6}  {snr:6.3f}  {"(a-b)^2=2(a+b)":>14}  {str(detectable):>12}  {desc}')

print(f'\nKS threshold condition: (a-b)^2 > 2(a+b)')

Code cell 22

# === 6.2 Spectral Community Detection ===

def spectral_clustering_2block(A):
    """2-block spectral clustering via second eigenvector of normalized Laplacian."""
    n = A.shape[0]
    d = A.sum(axis=1)
    d_inv_sqrt = np.where(d > 0, 1.0 / np.sqrt(d), 0)
    D_inv_sqrt = np.diag(d_inv_sqrt)
    L_sym = np.eye(n) - D_inv_sqrt @ A @ D_inv_sqrt
    eigvals, eigvecs = np.linalg.eigh(L_sym)
    u2 = eigvecs[:, 1]  # Second smallest eigenvalue
    labels_est = (u2 > 0).astype(int)
    return labels_est, eigvals

def accuracy(est, true):
    """Fraction correct up to global flip."""
    a1 = (est == true).mean()
    a2 = (est == (1 - true)).mean()
    return max(a1, a2)

print('Spectral clustering accuracy on SBM:')
print(f'{"(a,b)":>10}  {"SNR":>6}  {"Accuracy":>10}  {"KS detectable?":>15}')

np.random.seed(42)
for a, b, desc in test_cases:
    p_in = a / n_sbm
    p_out = b / n_sbm
    A_sbm, labels_true = stochastic_block_model(n_sbm, 2, p_in, p_out, seed=42)
    labels_est, eigvals = spectral_clustering_2block(A_sbm)
    acc = accuracy(labels_est, labels_true)
    snr = kesten_stigum_snr(a, b)
    print(f'({a},{b}){"":>6}  {snr:6.3f}  {acc:10.3f}  {str(snr > 1.0):>15}  {desc}')

ok = True  # Visual check expected
print('\n(Above KS: high accuracy; below KS: near random guess ~0.5)')

Code cell 23

# === 6.3 SBM Eigenvalue Spectrum ===

if HAS_MPL:
    fig, axes = plt.subplots(1, 3, figsize=(15, 4))

    np.random.seed(42)
    for idx, (a, b, desc) in enumerate(test_cases):
        ax = axes[idx]
        A_sbm, labels_true = stochastic_block_model(n_sbm, 2, a/n_sbm, b/n_sbm, seed=42)
        eigvals_full = np.linalg.eigvalsh(A_sbm)
        snr = kesten_stigum_snr(a, b)

        ax.hist(eigvals_full, bins=50, density=True,
                color=COLORS['primary'], alpha=0.7, edgecolor='none')

        # Mark the top 2 eigenvalues
        top2 = sorted(eigvals_full)[-2:]
        for ev in top2:
            ax.axvline(ev, color=COLORS['error'], lw=1.5, ls='--')

        ax.set_title(f'(a={a},b={b}), SNR={snr:.2f}\n{desc}')
        ax.set_xlabel('Eigenvalue')
        if idx == 0:
            ax.set_ylabel('Density')

    fig.suptitle('SBM Adjacency Eigenvalue Spectra (red dashed = top 2 eigenvalues)',
                 fontsize=13)
    fig.tight_layout()
    plt.show()
    print('Eigenvalue spectrum plot displayed.')

7. Wigner's Semicircle Law

Verify that bulk eigenvalues of random matrices follow the semicircle distribution and observe the outlier eigenvalue in $G(n,p)$ .

Code cell 25

# === 7.1 Wigner Matrix Eigenvalue Distribution ===

from scipy.stats import norm

def wigner_matrix(n, seed=None):
    """Wigner GOE matrix: W = (M + M.T) / (2*sqrt(n))."""
    rng = np.random.default_rng(seed)
    M = rng.standard_normal((n, n))
    W = (M + M.T) / (2 * np.sqrt(n))
    return W

def semicircle_density(x, sigma=1.0):
    """Semicircle density: (2/pi*sigma^2) * sqrt(sigma^2 - x^2) for |x| <= sigma."""
    # sigma here is the radius, not std
    mask = np.abs(x) <= sigma
    d = np.zeros_like(x)
    d[mask] = (2 / (np.pi * sigma**2)) * np.sqrt(sigma**2 - x[mask]**2)
    return d

# Compute eigenvalues for increasing n
sizes = [100, 500, 1000]
print('Wigner semicircle convergence:')
for n_w in sizes:
    W = wigner_matrix(n_w, seed=42)
    eigvals_w = np.linalg.eigvalsh(W)
    # For GOE with sigma=1: radius = 2
    frac_in = np.mean(np.abs(eigvals_w) <= 2.05)  # Small buffer
    print(f'  n={n_w:5d}: max|lambda|={eigvals_w.max():.4f} '
          f'(theory: 2.0), frac in [-2,2]: {frac_in:.4f}')

ok = np.linalg.eigvalsh(wigner_matrix(500, seed=42)).max() < 2.5
print(f"\n{'PASS' if ok else 'FAIL'} - Wigner eigenvalues bounded by ~2")

Code cell 26

# === 7.2 Semicircle Law Visualization ===

if HAS_MPL:
    fig, axes = plt.subplots(1, 3, figsize=(15, 4))

    for ax, n_w in zip(axes, [100, 500, 1000]):
        W = wigner_matrix(n_w, seed=42)
        eigvals_w = np.linalg.eigvalsh(W)

        ax.hist(eigvals_w, bins=40, density=True,
                color=COLORS['primary'], alpha=0.7, label=f'n={n_w}')

        x_fine = np.linspace(-2.2, 2.2, 300)
        ax.plot(x_fine, semicircle_density(x_fine, sigma=2.0),
                '-', color=COLORS['error'], lw=2, label='Semicircle')

        ax.set_title(f'Wigner n={n_w}')
        ax.set_xlabel('Eigenvalue')
        if n_w == 100:
            ax.set_ylabel('Density')
        ax.legend(fontsize=9)

    fig.suptitle("Wigner's Semicircle Law Convergence", fontsize=13)
    fig.tight_layout()
    plt.show()
    print('Semicircle plot displayed.')

Code cell 27

# === 7.3 Outlier Eigenvalue in G(n,p) ===

n_gnp = 600
p_gnp = 0.15  # Dense enough for clear semicircle
A_gnp = gnp(n_gnp, p_gnp, seed=42)

# Center and scale
sigma_gnp = np.sqrt(n_gnp * p_gnp * (1 - p_gnp))  # = sqrt(n*p*(1-p))
eigvals_gnp = np.linalg.eigvalsh(A_gnp)

outlier = eigvals_gnp[-1]
theory_outlier = n_gnp * p_gnp  # ~ np
bulk_edge = 2 * sigma_gnp

print(f'G(n,p): n={n_gnp}, p={p_gnp}')
print(f'Largest eigenvalue (outlier): {outlier:.3f} (theory np = {theory_outlier:.1f})')
print(f'2nd eigenvalue: {eigvals_gnp[-2]:.3f}')
print(f'Bulk edge 2*sqrt(np(1-p)): {bulk_edge:.3f}')
print(f'Gap (outlier - bulk): {outlier - bulk_edge:.3f}')

ok = abs(outlier - theory_outlier) < theory_outlier * 0.1
print(f"\n{'PASS' if ok else 'FAIL'} - outlier eigenvalue close to np")

if HAS_MPL:
    fig, ax = plt.subplots(figsize=(10, 5))
    # Bulk eigenvalues (exclude top 1)
    bulk = eigvals_gnp[:-1]
    ax.hist(bulk, bins=50, density=True,
            color=COLORS['primary'], alpha=0.7, label='Bulk eigenvalues')
    ax.axvline(outlier, color=COLORS['error'], lw=2, ls='--',
               label=f'Outlier $\\lambda_1={outlier:.1f}$ (theory $np={theory_outlier:.0f}$)')
    ax.axvline(bulk_edge, color=COLORS['neutral'], lw=1.5, ls=':',
               label=f'Bulk edge $2\\sqrt{{np(1-p)}}={bulk_edge:.1f}$')

    x_sc = np.linspace(-bulk_edge*1.1, bulk_edge*1.1, 300)
    ax.plot(x_sc, semicircle_density(x_sc, sigma=bulk_edge),
            '-', color=COLORS['secondary'], lw=2, label='Semicircle')

    ax.set_title(f'Eigenvalue Spectrum of G(n,p): n={n_gnp}, p={p_gnp}')
    ax.set_xlabel('Eigenvalue')
    ax.set_ylabel('Density')
    ax.legend()
    plt.tight_layout()
    plt.show()
    print('Outlier eigenvalue plot displayed.')

8. Graphons: Infinite-Size Limit

Visualize graphons as 2D functions, sample graphs from them, and estimate a step-function graphon from an SBM.

Code cell 29

# === 8.1 Graphon Visualization ===

def graphon_er(x, y, p=0.3):
    """ER graphon: W(x,y) = p (constant)."""
    return np.full_like(x, p)

def graphon_sbm(x, y, k=3, B=None):
    """k-block SBM graphon (step function)."""
    if B is None:
        B = np.array([[0.8, 0.1, 0.1],
                      [0.1, 0.7, 0.2],
                      [0.1, 0.2, 0.6]])
    rx = (x * k).astype(int).clip(0, k-1)
    ry = (y * k).astype(int).clip(0, k-1)
    return B[rx, ry]

def graphon_product(x, y):
    """Product graphon: W(x,y) = x*y."""
    return x * y

if HAS_MPL:
    fig, axes = plt.subplots(1, 3, figsize=(15, 4))
    grid = np.linspace(0, 1, 200)
    X, Y = np.meshgrid(grid, grid)

    graphons = [
        (graphon_er(X, Y), 'ER Graphon ($p=0.3$)'),
        (graphon_sbm(X, Y), '3-Block SBM Graphon'),
        (graphon_product(X, Y), 'Product Graphon ($W=xy$)'),
    ]

    for ax, (W_grid, title) in zip(axes, graphons):
        im = ax.imshow(W_grid, origin='lower', extent=[0,1,0,1],
                       cmap='Blues', vmin=0, vmax=1)
        plt.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
        ax.set_title(title)
        ax.set_xlabel('$x$')
        if ax == axes[0]:
            ax.set_ylabel('$y$')

    fig.suptitle('Three Canonical Graphons $W:[0,1]^2 \\to [0,1]$', fontsize=13)
    fig.tight_layout()
    plt.show()
    print('Graphon visualization displayed.')

Code cell 30

# === 8.2 Graphon Sampling and Convergence ===

def sample_from_graphon(W_func, n, seed=None):
    """Sample an n-node graph from graphon W_func."""
    rng = np.random.default_rng(seed)
    xi = rng.uniform(0, 1, n)  # Latent node types
    A = np.zeros((n, n))
    for i in range(n):
        for j in range(i+1, n):
            p_ij = W_func(xi[i], xi[j])
            if rng.random() < p_ij:
                A[i, j] = 1
                A[j, i] = 1
    return A, xi

# Sample SBM graphon graphs of increasing size
B_true = np.array([[0.8, 0.1, 0.1],
                   [0.1, 0.7, 0.2],
                   [0.1, 0.2, 0.6]])

def W_sbm_3(x, y):
    return graphon_sbm(np.array([[x]]), np.array([[y]]), k=3, B=B_true)[0, 0]

print('Graphon sampling: edge density convergence')
for n_g in [50, 200, 500]:
    A_g, xi_g = sample_from_graphon(
        lambda x, y: graphon_sbm(np.array([[x]]), np.array([[y]]), k=3, B=B_true)[0, 0],
        n_g, seed=42
    )
    edge_density = A_g.sum() / (n_g * (n_g - 1))
    # Theoretical edge density
    theory_density = B_true.mean()  # Approximation for equal communities
    print(f'  n={n_g:4d}: empirical density={edge_density:.4f}, theory approx {theory_density:.4f}')

print('Graphon sampling verified.')

Code cell 31

# === 8.3 Graphon Estimation from SBM Data ===

# Generate SBM instance
n_est = 300
k_est = 3

def sbm_3block(n, B, seed=None):
    """Sample 3-block SBM with balanced communities."""
    rng = np.random.default_rng(seed)
    labels = np.repeat(np.arange(k_est), n // k_est)
    n_actual = len(labels)
    A = np.zeros((n_actual, n_actual))
    for i in range(n_actual):
        for j in range(i+1, n_actual):
            p = B[labels[i], labels[j]]
            if rng.random() < p:
                A[i, j] = 1
                A[j, i] = 1
    return A, labels

A_est, labels_est_true = sbm_3block(n_est, B_true, seed=42)

# Oracle: sort by true labels
sort_idx = np.argsort(labels_est_true)
A_sorted = A_est[np.ix_(sort_idx, sort_idx)]

# Estimate block probabilities
block_size = n_est // k_est
B_est = np.zeros((k_est, k_est))
for r in range(k_est):
    for s in range(r, k_est):
        r_idx = slice(r*block_size, (r+1)*block_size)
        s_idx = slice(s*block_size, (s+1)*block_size)
        sub = A_sorted[r_idx, s_idx]
        B_est[r, s] = sub.mean()
        B_est[s, r] = B_est[r, s]

print('Estimated block matrix:')
print(B_est.round(3))
print('\nTrue block matrix:')
print(B_true.round(3))

error = np.abs(B_est - B_true).max()
print(f'\nMax absolute error: {error:.4f}')
print(f"{'PASS' if error < 0.1 else 'FAIL'} - graphon estimation error < 0.1")

Code cell 32

# === 8.4 Sorted Adjacency Matrix Convergence to Graphon ===

if HAS_MPL:
    fig, axes = plt.subplots(1, 3, figsize=(15, 4))

    for ax, n_g in zip(axes, [100, 300, 1000]):
        if n_g <= 300:
            A_g, lab_g = sbm_3block(n_g, B_true, seed=42)
        else:
            # For n=1000, use gnp approximation per block
            lab_g = np.repeat(np.arange(3), n_g // 3)
            n_act = len(lab_g)
            rng = np.random.default_rng(42)
            A_g = np.zeros((n_act, n_act))
            for i in range(n_act):
                for j in range(i+1, n_act):
                    p = B_true[lab_g[i], lab_g[j]]
                    if rng.random() < p:
                        A_g[i, j] = A_g[j, i] = 1

        sort_idx = np.argsort(lab_g)
        A_g_sorted = A_g[np.ix_(sort_idx, sort_idx)]
        ax.imshow(A_g_sorted, cmap='Blues', aspect='auto',
                  interpolation='nearest', vmin=0, vmax=1)
        ax.set_title(f'n={n_g}')
        ax.set_xlabel('Node index (sorted by community)')
        if ax == axes[0]:
            ax.set_ylabel('Node index')

    fig.suptitle('Sorted Adjacency Matrix Converging to 3-Block SBM Graphon',
                 fontsize=12)
    fig.tight_layout()
    plt.show()
    print('Sorted adjacency convergence plot displayed.')

9. Mean-Field Analysis of Preferential Attachment

Verify $k_i(t) \approx m\sqrt{t/t_i}$ for individual node degree trajectories.

Code cell 34

# === 9.1 Node Degree Trajectories in BA Model ===

def ba_with_history(n, m, seed=None):
    """BA model tracking degree of each node over time."""
    rng = np.random.default_rng(seed)
    m0 = m + 1
    degrees = [float(m0 - 1)] * m0 + [0.0] * (n - m0)
    # History: history[i][t] = degree of node i at time t (sampled)
    snapshots = [50, 200, 500, 1000, 2000]
    # Track specific early nodes
    tracked = [1, 5, 10, 50]  # Node indices
    history = {i: [] for i in tracked}
    history_t = []

    for new_node in range(m0, n):
        d_arr = np.array(degrees[:new_node])
        total = d_arr.sum()
        probs = d_arr / total if total > 0 else np.ones(new_node) / new_node
        targets = rng.choice(new_node, size=m, replace=False, p=probs)
        degrees[new_node] += m
        for t in targets:
            degrees[t] += 1

        if new_node in snapshots:
            history_t.append(new_node)
            for i in tracked:
                history[i].append(degrees[i])

    return history, history_t, tracked

n_traj = 2100
m_traj = 2
history, hist_t, tracked = ba_with_history(n_traj, m_traj, seed=42)

print('Node degree trajectories vs mean-field prediction k(t) = m*sqrt(t/t_i):')
print(f'{"Node i":>8} {"t_i":>6} {"Mean-field":>12}')
for node_i in tracked:
    t_i = node_i  # Approximately: node i added at time i
    t_final = hist_t[-1]
    theory_k = m_traj * np.sqrt(t_final / max(t_i, 1))
    actual_k = history[node_i][-1] if history[node_i] else 'N/A'
    print(f'{node_i:8d} {t_i:6d} {theory_k:12.1f}  actual={actual_k}')

print('\n(Older nodes grow faster -- they have been attracting links longer)')

Code cell 35

# === 9.2 Mean-Field Trajectory Visualization ===

if HAS_MPL:
    fig, ax = plt.subplots(figsize=(10, 5))

    t_arr = np.array(hist_t, dtype=float)
    colors_traj = [COLORS['primary'], COLORS['secondary'],
                   COLORS['tertiary'], COLORS['highlight']]

    for node_i, col in zip(tracked, colors_traj):
        if history[node_i]:
            ax.plot(t_arr, history[node_i], 'o', color=col, ms=6, alpha=0.8,
                    label=f'Node {node_i} (added at t≈{node_i})')
            # Theory line
            t_fine = np.linspace(node_i, t_arr.max(), 100)
            theory = m_traj * np.sqrt(t_fine / node_i)
            ax.plot(t_fine, theory, '-', color=col, lw=1.5, alpha=0.5)

    ax.set_title('BA Model: Node Degree Trajectories (solid=sim, dashed=mean-field)')
    ax.set_xlabel('Network size $t$')
    ax.set_ylabel('Degree $k_i(t)$')
    ax.legend(fontsize=9)
    plt.tight_layout()
    plt.show()
    print('Trajectory plot displayed.')

10. Robustness vs Targeted Attack

Compare random failure vs targeted degree-based attack on BA vs ER graphs.

Code cell 37

# === 10.1 Percolation Simulation: Random Failure vs Targeted Attack ===

def percolate_random(A, fracs):
    """Random node removal. Returns giant component fraction at each removal fraction."""
    n = A.shape[0]
    results = []
    for frac in fracs:
        n_remove = int(frac * n)
        remove = np.random.choice(n, n_remove, replace=False)
        mask = np.ones(n, dtype=bool)
        mask[remove] = False
        idx = np.where(mask)[0]
        A_sub = A[np.ix_(idx, idx)]
        if len(idx) == 0:
            results.append(0)
            continue
        L1, _ = largest_component_sizes(A_sub)
        results.append(L1 / n)  # As fraction of ORIGINAL n
    return results

def percolate_targeted(A, fracs):
    """Remove highest-degree nodes first."""
    n = A.shape[0]
    degrees = A.sum(axis=1)
    order = np.argsort(-degrees)  # Highest degree first
    results = []
    for frac in fracs:
        n_remove = int(frac * n)
        remove = set(order[:n_remove])
        mask = np.array([i not in remove for i in range(n)])
        idx = np.where(mask)[0]
        A_sub = A[np.ix_(idx, idx)]
        if len(idx) == 0:
            results.append(0)
            continue
        L1, _ = largest_component_sizes(A_sub)
        results.append(L1 / n)
    return results

n_perc = 300
m_perc = 3
fracs = np.linspace(0, 0.5, 12)

np.random.seed(42)
A_ba_perc = barabasi_albert(n_perc, m_perc, seed=42)
A_er_perc = gnp(n_perc, 2*m_perc/n_perc, seed=42)  # Same avg degree

ba_random = percolate_random(A_ba_perc, fracs)
ba_targeted = percolate_targeted(A_ba_perc, fracs)
er_random = percolate_random(A_er_perc, fracs)
er_targeted = percolate_targeted(A_er_perc, fracs)

print('Giant component fraction after removing f fraction of nodes:')
print(f'{"f":>6}  {"BA-rand":>9}  {"BA-target":>11}  {"ER-rand":>9}  {"ER-target":>11}')
for f, bar, bat, err, ert in zip(fracs[::3], ba_random[::3], ba_targeted[::3],
                                   er_random[::3], er_targeted[::3]):
    print(f'{f:6.2f}  {bar:9.3f}  {bat:11.3f}  {err:9.3f}  {ert:11.3f}')

Code cell 38

# === 10.2 Robustness Visualization ===

if HAS_MPL:
    fig, ax = plt.subplots(figsize=(10, 5))
    ax.plot(fracs, ba_random, 'o-', color=COLORS['primary'], ms=6,
            label='BA: random removal')
    ax.plot(fracs, ba_targeted, 's--', color=COLORS['error'], ms=6,
            label='BA: targeted attack (hub removal)')
    ax.plot(fracs, er_random, 'o:', color=COLORS['tertiary'], ms=6, alpha=0.8,
            label='ER: random removal')
    ax.plot(fracs, er_targeted, 's-.',  color=COLORS['secondary'], ms=6, alpha=0.8,
            label='ER: targeted attack')

    ax.set_title('Network Robustness: BA vs ER (Random Failure vs Targeted Attack)')
    ax.set_xlabel('Fraction of nodes removed')
    ax.set_ylabel('Giant component fraction')
    ax.legend(fontsize=10)
    ax.set_xlim(0, 0.5)
    ax.set_ylim(0, 1)
    plt.tight_layout()
    plt.show()
    print('Robustness plot displayed.\n')
    print('Observation: BA is robust to random failure but fragile to targeted attack.')
    print('ER is symmetric: random and targeted removal have similar effect.')

11. Davis-Kahan Bound in Community Detection

Measure eigenvector alignment error as SNR varies, and compare to the Davis-Kahan prediction.

Code cell 40

# === 11.1 Davis-Kahan Eigenvector Alignment ===

def eigenvector_alignment(A, labels):
    """Measure alignment between 2nd eigenvector of A and community indicator."""
    n = A.shape[0]
    eigvals, eigvecs = np.linalg.eigh(A)
    u2 = eigvecs[:, -2]  # 2nd largest eigenvalue eigenvector

    # Ground truth: normalized community indicator
    v_true = (2*(labels - 0.5))  # Map {0,1} -> {-1,+1}
    v_true = v_true / np.linalg.norm(v_true)

    # Alignment = |u2 . v_true|
    alignment = abs(np.dot(u2, v_true))
    return alignment

# Sweep (a-b) while keeping a+b fixed
a_plus_b = 20  # Fixed
diffs = np.linspace(0.1, 15, 20)  # a - b
n_dktest = 400

alignments = []
snr_vals = []
np.random.seed(42)

for diff in diffs:
    a = (a_plus_b + diff) / 2
    b = (a_plus_b - diff) / 2
    if b < 0:
        alignments.append(0)
        snr_vals.append(diff**2 / (2 * a_plus_b))
        continue
    A_dktest, lab_dk = stochastic_block_model(n_dktest, 2, a/n_dktest, b/n_dktest, seed=42)
    al = eigenvector_alignment(A_dktest, lab_dk)
    alignments.append(al)
    snr_vals.append(diff**2 / (2 * a_plus_b))

# KS threshold at SNR = 1
ks_diff = np.sqrt(2 * a_plus_b)
print(f'Kesten-Stigum threshold: a-b = sqrt(2(a+b)) = {ks_diff:.2f}')
print(f'(SNR = 1 corresponds to this diff)')
print(f'\nAt KS threshold: alignment = '
      f'{alignments[np.argmin(abs(np.array(diffs)-ks_diff))]:.3f}')

Code cell 41

# === 11.2 Davis-Kahan Alignment Visualization ===

if HAS_MPL:
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))

    # Left: alignment vs SNR
    ax = axes[0]
    ax.plot(snr_vals, alignments, 'o-', color=COLORS['primary'], ms=6)
    ax.axvline(1.0, color=COLORS['error'], ls='--', lw=2,
               label='Kesten-Stigum threshold (SNR=1)')
    ax.axhline(1/np.sqrt(2), color=COLORS['neutral'], ls=':',
               alpha=0.7, label='45° alignment (1/√2)')
    ax.set_title('Eigenvector Alignment vs SNR')
    ax.set_xlabel('Signal-to-noise ratio $(a-b)^2 / (2(a+b))$')
    ax.set_ylabel('$|\\hat{u}_2 \\cdot v_{true}|$')
    ax.legend(fontsize=10)

    # Right: scatter of 2nd eigenvector for above/below KS
    ax = axes[1]
    cases_dk = [(18, 2, 'Well above KS'), (11, 9, 'Below KS')]
    markers = ['o', 's']
    for (a_c, b_c, lbl), mkr, col in zip(cases_dk, markers,
                                           [COLORS['primary'], COLORS['secondary']]):
        A_c, lab_c = stochastic_block_model(n_dktest, 2,
                                             a_c/n_dktest, b_c/n_dktest, seed=42)
        eigvals_c, eigvecs_c = np.linalg.eigh(A_c)
        u2_c = eigvecs_c[:, -2]
        # Color by community
        ax.scatter(np.where(lab_c == 0)[0] / n_dktest,
                   u2_c[lab_c == 0],
                   marker=mkr, color=col, s=10, alpha=0.6,
                   label=f'{lbl} (a={a_c},b={b_c})')
        ax.scatter(np.where(lab_c == 1)[0] / n_dktest,
                   u2_c[lab_c == 1],
                   marker=mkr, color=col, s=10, alpha=0.3)

    ax.axhline(0, color=COLORS['neutral'], ls='-', lw=0.8)
    ax.set_title('2nd Eigenvector Entries by Community')
    ax.set_xlabel('Node index / n')
    ax.set_ylabel('Eigenvector entry')
    ax.legend(fontsize=9)

    fig.tight_layout()
    plt.show()
    print('Davis-Kahan visualization displayed.')

12. Attention Sparsification as Random Graph Construction

Simulate sparse attention patterns and analyze connectivity, diameter, and spectral gap.

Code cell 43

# === 12.1 Sparse Attention Patterns ===

def local_window_attention(n, window):
    """Local window attention: connect token i to tokens in [i-w, i+w]."""
    A = np.zeros((n, n))
    for i in range(n):
        for j in range(max(0, i-window), min(n, i+window+1)):
            if i != j:
                A[i, j] = 1
    return A

def bigbird_attention(n, window, n_global, seed=None):
    """BigBird: local window + random long-range + global tokens."""
    rng = np.random.default_rng(seed)
    A = local_window_attention(n, window)
    # Global tokens: first n_global tokens attend to all
    for g in range(n_global):
        A[g, :] = 1
        A[:, g] = 1
        A[g, g] = 0
    # Random long-range links
    n_random = 2 * window  # r extra edges per token
    for i in range(n_global, n):
        candidates = [j for j in range(n) if A[i,j] == 0 and j != i]
        if len(candidates) >= n_random:
            targets = rng.choice(candidates, n_random, replace=False)
            for t in targets:
                A[i, t] = 1
                A[t, i] = 1
    return A

n_attn = 64
window = 4
n_global = 4

A_full = np.ones((n_attn, n_attn)) - np.eye(n_attn)  # Full attention O(n^2)
A_local = local_window_attention(n_attn, window)
A_bigbird = bigbird_attention(n_attn, window, n_global, seed=42)

def spectral_gap_attention(A):
    """Compute 2nd eigenvalue of normalized Laplacian (related to mixing)."""
    d = A.sum(axis=1)
    D_inv_sqrt = np.diag(np.where(d > 0, 1/np.sqrt(d), 0))
    L_sym = np.eye(len(A)) - D_inv_sqrt @ A @ D_inv_sqrt
    eigvals = np.sort(np.linalg.eigvalsh(L_sym))
    return eigvals[1]  # Algebraic connectivity (Fiedler value of normalized L)

for name, A in [('Full', A_full), ('Local window', A_local), ('BigBird', A_bigbird)]:
    n_edges = int(A.sum() // 2)
    density = n_edges / (n_attn * (n_attn - 1) / 2)
    sg = spectral_gap_attention(A)
    L1, _ = largest_component_sizes(A)
    print(f'{name:15s}: edges={n_edges:5d}, density={density:.3f}, '
          f'spectral_gap={sg:.4f}, connected={L1==n_attn}')

Code cell 44

# === 12.2 Sparse Attention Pattern Visualization ===

if HAS_MPL:
    fig, axes = plt.subplots(1, 3, figsize=(15, 5))
    titles = ['Full Attention\n$O(n^2)$ edges', 
              'Local Window\n(sparse, disconnected)',
              'BigBird\n(local+random+global)']
    for ax, A, title in zip(axes, [A_full, A_local, A_bigbird], titles):
        ax.imshow(A, cmap='Blues', interpolation='nearest', vmin=0, vmax=1)
        ax.set_title(title)
        ax.set_xlabel('Token index')
        if ax == axes[0]:
            ax.set_ylabel('Token index')

    fig.suptitle(f'Sparse Attention Patterns (n={n_attn} tokens)', fontsize=13)
    fig.tight_layout()
    plt.show()
    print('Attention pattern visualization displayed.')
    print('\nBigBird mimics Watts-Strogatz: local window (ring lattice) + ')
    print('random long-range links + global tokens (hubs) = small-world graph.')

13. Homomorphism Densities and Graph Parameters

Compute triangle density $t(K_3, G)$ and verify the relationship between subgraph densities and graph statistics.

Code cell 46

# === 13.1 Triangle and Subgraph Counting ===

def count_triangles(A):
    """Count triangles via trace(A^3)/6."""
    return int(np.trace(np.linalg.matrix_power(A, 3)) / 6)

def triangle_density(A):
    """t(K3, G) = triangles / C(n,3)."""
    n = A.shape[0]
    T = count_triangles(A)
    return 6 * T / (n * (n-1) * (n-2))

def edge_density(A):
    """t(K2, G) = edges / C(n,2)."""
    n = A.shape[0]
    m = A.sum() / 2
    return m / (n*(n-1)/2)

# Compare three graph types
n_hom = 100
np.random.seed(42)

graphs = [
    ('ER (c=3)', gnp(n_hom, 3/n_hom)),
    ('WS (k=6, b=0.1)', watts_strogatz(n_hom, 6, 0.1, seed=42)),
    ('SBM (a=10,b=2)', stochastic_block_model(n_hom, 2, 10/n_hom, 2/n_hom, seed=42)[0]),
]

print('Subgraph density comparison:')
print(f'{"Graph":20s}  {"Edge density":14s}  {"Triangle density":18s}  '
      f'{"Clustering":12s}')

for name, A in graphs:
    ed = edge_density(A)
    td = triangle_density(A)
    C = clustering_coefficient(A)
    print(f'{name:20s}  {ed:14.4f}  {td:18.6f}  {C:12.4f}')

# ER theoretical prediction: t(K3, G) = p^3
p_er = 3/n_hom
print(f'\nER theory: t(K3) = p^3 = {p_er**3:.6f}')
print(f'(matches empirical ER value above for large n)')

14. Summary: Key Results

Consolidated verification of all major theoretical results.

Code cell 48

# === 14. Consolidated Summary ===

print('=== RANDOM GRAPHS: THEORY NOTEBOOK SUMMARY ===')
print()

n_check = 500
np.random.seed(42)

# 1. ER degree distribution
c_check = 4.0
A_check = gnp(n_check, c_check/n_check, seed=42)
deg_check = A_check.sum(axis=1)
ok1 = abs(deg_check.mean() - c_check) < 0.3
print(f'1. ER Poisson degree: mean={deg_check.mean():.3f} (theory={c_check})  {"PASS" if ok1 else "FAIL"}')

# 2. Giant component
c_gc = 2.5
A_gc = gnp(n_check, c_gc/n_check, seed=42)
L1_gc, _ = largest_component_sizes(A_gc)
beta_gc = beta_theory(c_gc)
ok2 = abs(L1_gc/n_check - beta_gc) < 0.05
print(f'2. Giant component: L1/n={L1_gc/n_check:.3f} (theory={beta_gc:.3f})  {"PASS" if ok2 else "FAIL"}')

# 3. WS clustering
n_ws_c, k_ws_c, beta_ws_c = 200, 10, 0.05
A_ws_c = watts_strogatz(n_ws_c, k_ws_c, beta_ws_c, seed=42)
C_ws_c = clustering_coefficient(A_ws_c)
C0_ws_c = 3*(k_ws_c-2)/(4*(k_ws_c-1))
theory_C_ws = C0_ws_c * (1 - beta_ws_c)**3
ok3 = abs(C_ws_c - theory_C_ws) < 0.08
print(f'3. WS clustering: C={C_ws_c:.4f} (theory={theory_C_ws:.4f})  {"PASS" if ok3 else "FAIL"}')

# 4. BA power law
n_ba_c = 800
A_ba_c = barabasi_albert(n_ba_c, 2, seed=42)
deg_ba_c = A_ba_c.sum(axis=1).astype(int)
tail_k_c = deg_ba_c[deg_ba_c >= 5]
gamma_c = 1 + len(tail_k_c) * (np.log(tail_k_c / 4.5)).sum()**(-1)
ok4 = abs(gamma_c - 3.0) < 0.6
print(f'4. BA power law: gamma={gamma_c:.3f} (theory=3.0)  {"PASS" if ok4 else "FAIL"}')

# 5. SBM community detection above KS
A_sbm_c, lab_sbm_c = stochastic_block_model(n_check, 2, 20/n_check, 5/n_check, seed=42)
lab_est_c, _ = spectral_clustering_2block(A_sbm_c)
acc_c = accuracy(lab_est_c, lab_sbm_c)
ok5 = acc_c > 0.85
print(f'5. SBM spectral clustering: accuracy={acc_c:.3f}  {"PASS" if ok5 else "FAIL"}')

# 6. Wigner semicircle
W_check = wigner_matrix(400, seed=42)
eig_max_check = np.linalg.eigvalsh(W_check).max()
ok6 = eig_max_check < 2.3  # Should be ~2.0
print(f'6. Wigner max eigenvalue: {eig_max_check:.4f} (theory: ~2.0)  {"PASS" if ok6 else "FAIL"}')

all_ok = all([ok1, ok2, ok3, ok4, ok5, ok6])
print(f'\n{"ALL PASS" if all_ok else "SOME FAILURES"} - Theory notebook verification complete.')