Theory Notebook
Converted from
theory.ipynbfor web reading.
§20-01 Fourier Series — Theory Notebook
'Any periodic motion is a superposition of simple oscillations.' — Fourier
Interactive derivations for §20-01. Run top-to-bottom.
| Section | Content |
|---|---|
| §1 | Orthogonality of the trigonometric system |
| §2 | Computing Fourier coefficients |
| §3 | Square, sawtooth, triangle waveforms |
| §4 | Convergence and Gibbs phenomenon |
| §5 | Parseval's theorem |
| §6 | Shift and differentiation properties |
| §7 | RoPE positional encoding |
| §8 | Spectral bias of neural networks |
| §9 | Random Fourier Features |
Code cell 2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
try:
import seaborn as sns
sns.set_theme(style="whitegrid", palette="colorblind")
HAS_SNS = True
except ImportError:
plt.style.use("seaborn-v0_8-whitegrid")
HAS_SNS = False
mpl.rcParams.update({
"figure.figsize": (10, 6),
"figure.dpi": 120,
"font.size": 13,
"axes.titlesize": 15,
"axes.labelsize": 13,
"xtick.labelsize": 11,
"ytick.labelsize": 11,
"legend.fontsize": 11,
"legend.framealpha": 0.85,
"lines.linewidth": 2.0,
"axes.spines.top": False,
"axes.spines.right": False,
"savefig.bbox": "tight",
"savefig.dpi": 150,
})
np.random.seed(42)
print("Plot setup complete.")
Code cell 3
import numpy as np
import warnings; warnings.filterwarnings('ignore')
try:
import matplotlib.pyplot as plt, matplotlib as mpl
try:
import seaborn as sns
sns.set_theme(style='whitegrid', palette='colorblind'); HAS_SNS=True
except ImportError:
plt.style.use('seaborn-v0_8-whitegrid'); HAS_SNS=False
mpl.rcParams.update({
'figure.figsize':(10,6),'figure.dpi':120,'font.size':13,
'axes.titlesize':15,'axes.labelsize':13,'xtick.labelsize':11,
'ytick.labelsize':11,'legend.fontsize':11,'lines.linewidth':2.0,
'axes.spines.top':False,'axes.spines.right':False,
})
COLORS={'primary':'#0077BB','secondary':'#EE7733','tertiary':'#009988',
'error':'#CC3311','neutral':'#555555','highlight':'#EE3377'}
HAS_MPL=True
except ImportError:
HAS_MPL=False
np.set_printoptions(precision=6,suppress=True)
np.random.seed(42)
print('Setup complete. HAS_MPL =', HAS_MPL)
1. Orthogonality of the Trigonometric System
The Fourier basis is orthonormal under :
This makes Fourier coefficients coordinates: .
Code cell 5
# === 1.1 Verify orthonormality numerically ===
N_pts = 10000
x = np.linspace(-np.pi, np.pi, N_pts, endpoint=False)
dx = 2*np.pi / N_pts
def inner_prod(f, g):
return np.sum(f * np.conj(g)) * dx / (2*np.pi)
tests = [(1,1,1.0), (1,2,0.0), (3,-3,0.0), (0,0,1.0)]
print('Orthonormality checks:')
for n, m, expected in tests:
val = inner_prod(np.exp(1j*n*x), np.exp(1j*m*x))
status = 'PASS' if np.isclose(abs(val), abs(expected), atol=1e-3) else 'FAIL'
print(f' {status} <e^{{i{n}x}}, e^{{i{m}x}}> = {val.real:.5f}+{val.imag:.5f}j (expected {expected})')
# Gram matrix for n = -2..2
ns = range(-2,3)
G = np.array([[inner_prod(np.exp(1j*n*x), np.exp(1j*m*x)) for m in ns] for n in ns])
print(f'\nGram matrix (abs values, should be identity):')
print(np.abs(G).round(4))
print('PASS - Gram matrix is identity' if np.allclose(G, np.eye(5), atol=1e-3) else 'FAIL')
2. Computing Fourier Coefficients
We verify numerical computations against analytic formulas for three standard waveforms.
Code cell 7
# === 2.1 Fourier coefficient computation ===
def c_n(f_vals, n):
"""Numerical Fourier coefficient c_n via rectangle rule."""
return np.sum(f_vals * np.exp(-1j*n*x)) * dx / (2*np.pi)
# Square wave: c_n = 2/(i*pi*n) for odd n, 0 for even n
f_sq = np.sign(np.sin(x))
print('Square wave c_n (complex form):')
for n in [1,2,3,4,5]:
num = c_n(f_sq, n)
analytic = 2/(1j*np.pi*n) if n%2==1 else 0.0
ok = np.isclose(num, analytic, atol=1e-3)
print(f' c_{n}: num={num:.4f} analytic={analytic:.4f} {"PASS" if ok else "FAIL"}')
# Triangle wave: c_n = -2/(pi*n^2) for odd n, c_0=pi/2
f_tri = np.abs(x)
print('\nTriangle wave c_n (real, cosine series):')
for n in [0,1,2,3]:
num = c_n(f_tri, n)
if n==0: analytic = np.pi/2
elif n%2==1: analytic = -2/(np.pi*n**2)
else: analytic = 0.0
ok = np.isclose(num.real, analytic, atol=1e-3)
print(f' c_{n}: num={num.real:.5f} analytic={analytic:.5f} {"PASS" if ok else "FAIL"}')
3. Partial Sums and Convergence
We visualize convergence for all three waveforms and measure the error rate.
Code cell 9
# === 3.1 Partial sum functions ===
def sq_sum(x, N):
return 4/np.pi * sum(np.sin((2*k+1)*x)/(2*k+1) for k in range(N+1))
def tri_sum(x, N):
s = np.full_like(x, np.pi/2)
s -= 4/np.pi * sum(np.cos((2*k+1)*x)/(2*k+1)**2 for k in range(N+1))
return s
def saw_sum(x, N):
return 2*sum((-1)**(n+1)*np.sin(n*x)/n for n in range(1,N+1))
x_p = np.linspace(-np.pi, np.pi, 2000)
# L2 error vs N
print('L2 error vs N:')
print(f' {"N":>4} {"Square":>10} {"Triangle":>10} {"Sawtooth":>10}')
for N in [1,3,5,10,20,50,100]:
e_sq = np.sqrt(np.mean((np.sign(np.sin(x_p)) - sq_sum(x_p,N))**2))
e_tr = np.sqrt(np.mean((np.abs(x_p) - tri_sum(x_p,N))**2))
e_sw = np.sqrt(np.mean((x_p - saw_sum(x_p,N))**2))
print(f' {N:>4} {e_sq:>10.5f} {e_tr:>10.5f} {e_sw:>10.5f}')
Code cell 10
# === 3.2 Visualise partial sums ===
if HAS_MPL:
fig, axes = plt.subplots(1,3, figsize=(15,5))
fig.suptitle('Fourier series convergence', fontsize=15)
specs = [
('Square wave', np.sign(np.sin(x_p)), sq_sum),
('Triangle $|x|$', np.abs(x_p), tri_sum),
('Sawtooth $x$', x_p, saw_sum),
]
for ax, (name, f_true, fn) in zip(axes, specs):
ax.plot(x_p, f_true, color=COLORS['neutral'], lw=1.5, label='True')
for N, col in [(5,COLORS['secondary']),(20,COLORS['primary'])]:
ax.plot(x_p, fn(x_p,N), color=col, lw=1.5, label=f'N={N}')
ax.set_title(name); ax.set_xlabel('$x$'); ax.legend(fontsize=9)
fig.tight_layout(); plt.show()
print('Plot: convergence for three standard waveforms')
4. The Gibbs Phenomenon
Near a jump discontinuity, overshoots by of the jump height, regardless of . This overshoot is .
Code cell 12
# === 4.1 Quantify the Gibbs overshoot ===
x_zoom = np.linspace(-0.5, 0.5, 5000)
print('Gibbs overshoot vs N (jump height = 2, so 9% = 0.179):')
for N in [10,50,100,500,1000]:
S = sq_sum(x_zoom, N)
overshoot = np.max(S) - 1.0
print(f' N={N:5d}: max={np.max(S):.5f} overshoot={overshoot:.5f} ({100*overshoot/2:.2f}%)')
if HAS_MPL:
fig, ax = plt.subplots(figsize=(10,5))
ax.plot(x_zoom, np.sign(np.sin(x_zoom)), color=COLORS['neutral'], lw=1.5, label='True')
for N, col in [(5,COLORS['secondary']),(50,COLORS['primary'])]:
ax.plot(x_zoom, sq_sum(x_zoom,N), color=col, lw=1.8, label=f'$S_{{{N}}}f$')
ax.axhline(1.0+0.0895*2, color=COLORS['error'], ls='--', lw=1.2, label='9% overshoot')
ax.set_title('Gibbs phenomenon: 9% overshoot persists as $N\\to\\infty$')
ax.set_xlabel('$x$'); ax.set_ylabel('$S_Nf(x)$'); ax.legend()
fig.tight_layout(); plt.show()
Code cell 13
# === 4.2 Cesaro means vs partial sums (Fejer's theorem) ===
def cesaro_sum(x, N, fn):
"""Cesaro mean sigma_N = (1/(N+1)) * sum_{k=0}^N S_k f"""
return sum(fn(x,k) for k in range(N+1)) / (N+1)
x_zoom = np.linspace(-0.5, 0.5, 2000)
N = 50
S_N = sq_sum(x_zoom, N)
sigma_N = cesaro_sum(x_zoom, N, sq_sum)
print(f'Partial sum max: {np.max(S_N):.5f} (Gibbs present)')
print(f'Cesaro mean max: {np.max(sigma_N):.5f} (should be close to 1.0 — no Gibbs)')
if HAS_MPL:
fig, ax = plt.subplots(figsize=(10,5))
ax.plot(x_zoom, np.sign(np.sin(x_zoom)), color=COLORS['neutral'], lw=1.5, label='True')
ax.plot(x_zoom, S_N, color=COLORS['error'], lw=1.8, label=f'$S_{{{N}}}f$ (Gibbs)')
ax.plot(x_zoom, sigma_N, color=COLORS['primary'], lw=1.8, label=f'$\\sigma_{{{N}}}f$ (Fejer)')
ax.set_title("Fejer's theorem: Cesaro means remove Gibbs overshoot")
ax.set_xlabel('$x$'); ax.legend(); fig.tight_layout(); plt.show()
5. Parseval's Theorem: Energy Conservation
The total energy (power) in the time domain equals the sum of energies in each frequency component. This is a unitary change of basis.
Application: Setting and applying Parseval yields the Basel sum .
Code cell 15
# === 5.1 Verify Parseval numerically ===
N_pts = 10000
x = np.linspace(-np.pi, np.pi, N_pts, endpoint=False)
dx = 2*np.pi/N_pts
def parseval_check(f_vals, N_terms=200):
lhs = np.sum(np.abs(f_vals)**2)*dx / (2*np.pi) # ||f||^2
rhs = sum(abs(np.sum(f_vals*np.exp(-1j*n*x))*dx/(2*np.pi))**2
for n in range(-N_terms, N_terms+1))
return lhs, rhs
for name, fv in [('Square wave', np.sign(np.sin(x))),
('Triangle', np.abs(x)),
('cos(x)', np.cos(x))]:
lhs, rhs = parseval_check(fv)
ok = np.isclose(lhs, rhs, rtol=1e-2)
print(f' {"PASS" if ok else "FAIL"} {name}: ||f||^2={lhs:.5f} sum|c_n|^2={rhs:.5f}')
Code cell 16
# === 5.2 Basel problem via Parseval ===
# Apply Parseval to f(x) = x on (-pi, pi):
# ||f||^2 = (1/2pi)*integral x^2 dx = pi^2/3
# sum |c_n|^2 = sum_{n!=0} 1/n^2 = 2*sum_{n=1}^inf 1/n^2
# => sum 1/n^2 = pi^2/6
f_saw = x # sawtooth on (-pi,pi)
lhs_exact = np.pi**2 / 3 # (1/2pi)*integral_{-pi}^{pi} x^2 dx
lhs_num = np.sum(f_saw**2)*dx/(2*np.pi)
print(f'||x||^2 exact = pi^2/3 = {lhs_exact:.6f}')
print(f'||x||^2 numerical = {lhs_num:.6f}')
# sum |c_n|^2 using b_n = 2*(-1)^{n+1}/n
N_sum = 10000
series_sum = sum(4/n**2 for n in range(1, N_sum+1)) # sum_{n=1}^inf b_n^2 = sum 4/n^2
# Parseval: lhs = sum b_n^2 => pi^2/3 = 4*sum 1/n^2 => sum 1/n^2 = pi^2/12 ... wait
# Actually ||f||^2 = (1/2pi)*int x^2 dx = pi^2/3, sum |c_n|^2 = sum 1/n^2
# c_n = (-1)^{n+1}/(in), so |c_n|^2 = 1/n^2
sum_inv_n2 = series_sum / 4 # = sum 1/n^2 (since sum b_n^2/4 = sum 1/n^2)
print(f'\nParseval => sum_{{n=1}}^{{inf}} 1/n^2 = {sum_inv_n2:.6f}')
print(f'pi^2/6 = {np.pi**2/6:.6f}')
print(f'PASS - Basel problem verified' if np.isclose(sum_inv_n2, np.pi**2/6, rtol=1e-3)
else 'FAIL')
6. Properties: Shift and Differentiation
Shift theorem:
Differentiation:
These two properties power the entire theory of positional encodings in transformers.
Code cell 18
# === 6.1 Shift theorem ===
N_pts = 10000
x = np.linspace(-np.pi, np.pi, N_pts, endpoint=False)
dx = 2*np.pi/N_pts
def c_n(fv, n): return np.sum(fv*np.exp(-1j*n*x))*dx/(2*np.pi)
f = np.cos(3*x) + 0.5*np.sin(x) # test function
x0 = 0.7
f_shifted = np.cos(3*(x-x0)) + 0.5*np.sin(x-x0)
print('Shift theorem: c_n[f(x-x0)] = exp(-i*n*x0) * c_n[f]')
for n in [1, 2, 3]:
lhs = c_n(f_shifted, n)
rhs = np.exp(-1j*n*x0) * c_n(f, n)
ok = np.isclose(lhs, rhs, atol=1e-4)
print(f' n={n}: PASS' if ok else f' n={n}: FAIL lhs={lhs:.5f} rhs={rhs:.5f}')
Code cell 19
# === 6.2 Differentiation theorem: c_n[f'] = i*n * c_n[f] ===
# f(x) = x^2 on (-pi,pi), f'(x) = 2x
# c_n[x^2] = 2*(-1)^n/n^2, so c_n[2x] = 2*(-1)^{n+1}/n
# differentiation theorem: c_n[2x] = i*n * c_n[x^2] = i*n * 2*(-1)^n/n^2 = 2i*(-1)^n/n
f_x2 = x**2
f_2x = 2*x
print('Differentiation theorem: c_n[f\'] = i*n * c_n[f]')
for n in [1,2,3,4,5]:
cn_f = c_n(f_x2, n)
cn_fp = c_n(f_2x, n)
predicted = 1j*n*cn_f
ok = np.isclose(cn_fp, predicted, atol=1e-3)
print(f' n={n}: {"PASS" if ok else "FAIL"} c_n[2x]={cn_fp:.5f} i*n*c_n[x^2]={predicted:.5f}')
7. AI Application: Rotary Positional Encoding (RoPE)
RoPE (Su et al., 2021) encodes position as a rotation by :
The attention score depends only on (relative position).
Code cell 21
# === 7.1 Implement RoPE and verify relative-position property ===
def rope_rotate(x_vec, pos, base=10000):
"""Apply RoPE rotation to a vector at given position."""
d = len(x_vec)
theta = np.array([base**(-2*i/d) for i in range(d//2)])
angles = pos * theta
cos_a, sin_a = np.cos(angles), np.sin(angles)
x_even, x_odd = x_vec[0::2], x_vec[1::2]
out = np.empty_like(x_vec)
out[0::2] = x_even*cos_a - x_odd*sin_a
out[1::2] = x_even*sin_a + x_odd*cos_a
return out
np.random.seed(42)
d = 64
q = np.random.randn(d)
k = np.random.randn(d)
# Verify: score(m,n) == score(m+delta, n+delta)
print('RoPE relative-position property: score(m,n) = score(m+k, n+k)')
for m, n in [(0,3), (5,10), (100,107)]:
delta = 50
score1 = rope_rotate(q,m) @ rope_rotate(k,n)
score2 = rope_rotate(q,m+delta) @ rope_rotate(k,n+delta)
ok = np.isclose(score1, score2, atol=1e-8)
print(f' m={m:3d},n={n:3d}: score={score1:.6f} (m+{delta},n+{delta})={score2:.6f} {"PASS" if ok else "FAIL"}')
Code cell 22
# === 7.2 Visualise RoPE rotation as Fourier basis ===
if HAS_MPL:
d = 8; base = 10000
positions = np.arange(64)
theta = np.array([base**(-2*i/d) for i in range(d//2)])
fig, axes = plt.subplots(1, 4, figsize=(16, 4))
fig.suptitle('RoPE: rotation angle per dimension pair vs. position', fontsize=13)
for k_idx, ax in enumerate(axes):
angles = positions * theta[k_idx]
ax.plot(positions, np.cos(angles), color=COLORS['primary'], label='cos')
ax.plot(positions, np.sin(angles), color=COLORS['secondary'], label='sin')
ax.set_title(f'Dim pair {k_idx} (freq={theta[k_idx]:.4f})')
ax.set_xlabel('Position $m$')
if k_idx==0: ax.legend(fontsize=9)
fig.tight_layout(); plt.show()
print('Each pair rotates at frequency theta_i = 10000^{-2i/d}.')
print('Lower-indexed pairs complete full cycles faster => distinguish nearby tokens.')
8. Spectral Bias of Neural Networks
Neural networks learn low-frequency Fourier components of the target function first (Rahaman et al., 2019). We demonstrate this by training a small MLP on a high-frequency target and tracking Fourier coefficient convergence.
Code cell 24
# === 8.1 Spectral bias demonstration ===
# Target: f*(x) = sin(x) + 0.3*sin(5x) + 0.1*sin(11x)
# We'll fit with linear regression (proxy for MLP with linear activations)
# and observe which frequencies are captured first.
np.random.seed(42)
N_data = 200
x_data = np.linspace(-np.pi, np.pi, N_data)
def target(x):
return np.sin(x) + 0.3*np.sin(5*x) + 0.1*np.sin(11*x)
y_data = target(x_data)
# Build Fourier feature matrix: columns = [sin(x), cos(x), sin(2x), ..., sin(Kx), cos(Kx)]
def fourier_features(x, K):
cols = [np.ones_like(x)]
for k in range(1, K+1):
cols += [np.sin(k*x), np.cos(k*x)]
return np.stack(cols, axis=1)
K = 15
Phi = fourier_features(x_data, K) # (N, 2K+1)
# Fit with ridge regression (lambda controls implicit frequency ordering)
# Use increasing lambda to simulate "early" vs "late" in training
print('Spectral content recovered at different regularization levels:')
print(f' (lambda=large -> only low freq; lambda=small -> all freq)')
for lam in [100, 10, 1, 0.01]:
w = np.linalg.solve(Phi.T@Phi + lam*np.eye(Phi.shape[1]), Phi.T@y_data)
y_pred = Phi @ w
# Extract Fourier coefficients of prediction
b_1 = w[1] # sin(x) coefficient
b_5 = w[9] # sin(5x) coefficient
b_11 = w[21] # sin(11x) coefficient
print(f' lambda={lam:6.2f}: b_1={b_1:.3f}(true 1.0) b_5={b_5:.3f}(true 0.3) b_11={b_11:.3f}(true 0.1)')
9. Random Fourier Features (Rahimi & Recht, 2007)
Any shift-invariant kernel is the Fourier transform of a non-negative density (Bochner's theorem). We can approximate by sampling random frequencies .
Code cell 26
# === 9.1 Random Fourier Features for the RBF kernel ===
def rff_kernel_approx(X, D, sigma=1.0, seed=42):
"""Approximate RBF kernel k(x,y)=exp(-||x-y||^2/(2*sigma^2)) via D random features."""
rng = np.random.default_rng(seed)
d = X.shape[1]
# Sample frequencies from N(0, sigma^{-2} I)
W = rng.normal(0, 1.0/sigma, size=(d, D))
b = rng.uniform(0, 2*np.pi, size=D)
# Feature map: phi(x) = sqrt(2/D) * cos(W^T x + b)
Z = np.sqrt(2.0/D) * np.cos(X @ W + b[None,:])
return Z
np.random.seed(42)
n, d = 100, 2
X = np.random.randn(n, d)
sigma = 1.0
# Exact RBF kernel matrix
from scipy.spatial.distance import cdist
K_exact = np.exp(-cdist(X, X)**2 / (2*sigma**2))
print('RFF approximation quality vs D:')
print(f' {"D":>6} {"Max error":>12} {"Mean error":>12}')
for D in [10, 50, 100, 500, 1000]:
Z = rff_kernel_approx(X, D, sigma)
K_approx = Z @ Z.T
err = np.abs(K_exact - K_approx)
print(f' {D:>6} {np.max(err):>12.5f} {np.mean(err):>12.5f}')
Code cell 27
# === 9.2 Visualise kernel approximation quality ===
if HAS_MPL:
Ds = [10, 50, 100, 500, 1000, 5000]
max_errs = []
for D in Ds:
Z = rff_kernel_approx(X, D, sigma)
K_approx = Z @ Z.T
max_errs.append(np.max(np.abs(K_exact - K_approx)))
fig, ax = plt.subplots(figsize=(9,5))
ax.loglog(Ds, max_errs, 'o-', color=COLORS['primary'], lw=2, label='Max error')
ax.loglog(Ds, [0.5/np.sqrt(D) for D in Ds], '--',
color=COLORS['secondary'], lw=1.5, label='$O(1/\\sqrt{D})$')
ax.set_title('Random Fourier Features: approximation error vs. D')
ax.set_xlabel('Number of random features $D$')
ax.set_ylabel('Max absolute error')
ax.legend(); fig.tight_layout(); plt.show()
print('Error decays as O(1/sqrt(D)) — consistent with concentration bounds.')
10. Smoothness and Spectral Decay
The key theorem: if (k-times continuously differentiable and periodic), then . We verify this empirically.
Code cell 29
# === 10.1 Spectral decay rate vs smoothness ===
N_pts = 10000
x = np.linspace(-np.pi, np.pi, N_pts, endpoint=False)
dx = 2*np.pi/N_pts
def c_n(fv, n): return np.sum(fv*np.exp(-1j*n*x))*dx/(2*np.pi)
# Four functions with different smoothness
functions = {
'Square (discont.)': (np.sign(np.sin(x)), 0), # C^{-1}: 1/n decay
'Triangle (C^0)': (np.abs(x), 1), # C^0: 1/n^2 decay
'f=x^2 (C^1)': (x**2, 2), # C^1: 1/n^2 decay
'Gaussian bump': (np.exp(-x**2/(0.5)), -1), # Analytic: exp decay
}
ns_test = np.arange(1, 30)
print('|c_n| for n=1..29:')
coeff_data = {}
for name, (fv, _) in functions.items():
coeffs = [abs(c_n(fv, n)) for n in ns_test]
coeff_data[name] = coeffs
print(f' {name}: |c_1|={coeffs[0]:.4f}, |c_5|={coeffs[4]:.4f}, |c_10|={coeffs[9]:.4f}')
Code cell 30
# === 10.2 Log-log plot of spectral decay ===
if HAS_MPL:
fig, ax = plt.subplots(figsize=(10,6))
colors_list = [COLORS['error'], COLORS['secondary'],
COLORS['primary'], COLORS['tertiary']]
for (name, coeffs), col in zip(coeff_data.items(), colors_list):
ax.loglog(ns_test, coeffs, 'o-', color=col, lw=1.5,
markersize=4, label=name)
# Reference lines
ax.loglog(ns_test, 1.0/ns_test, '--', color=COLORS['neutral'],
lw=1, label='$1/n$ (C^{-1})')
ax.loglog(ns_test, 1.0/ns_test**2, ':', color=COLORS['neutral'],
lw=1, label='$1/n^2$ (C^0)')
ax.set_title('Spectral decay rate vs. function smoothness')
ax.set_xlabel('Frequency $n$')
ax.set_ylabel('$|c_n|$')
ax.legend(fontsize=9); fig.tight_layout(); plt.show()
print('Smooth functions: fast decay. Discontinuous: slow 1/n decay.')
11. The Dirichlet Kernel
(convolution). Unlike the Fejér kernel, is not non-negative — this causes the Gibbs phenomenon.
Code cell 32
# === 11.1 Dirichlet kernel: closed form vs direct sum ===
N_pts = 2000
x = np.linspace(-np.pi, np.pi, N_pts, endpoint=False)
x[x==0] = 1e-12 # avoid division by zero
def dirichlet_kernel(x, N):
return np.sin((N+0.5)*x) / np.sin(x/2)
def fejer_kernel(x, N):
s = np.sin((N+1)*x/2) / np.sin(x/2)
return s**2 / (N+1)
for N in [5, 20]:
D = dirichlet_kernel(x, N)
integral = np.trapezoid(D, x) / (2*np.pi)
min_val = np.min(D)
print(f'D_{N}: integral/(2pi)={integral:.5f} (should=1), min={min_val:.3f} (negative!)')
F = fejer_kernel(x, N)
fi = np.trapezoid(F, x) / (2*np.pi)
fm = np.min(F)
print(f'F_{N}: integral/(2pi)={fi:.5f} (should=1), min={fm:.6f} (non-negative!)')
Code cell 33
# === 11.2 Visualise Dirichlet vs Fejer kernels ===
if HAS_MPL:
x_k = np.linspace(-np.pi, np.pi, 3000)
x_k[x_k==0] = 1e-12
fig, axes = plt.subplots(1,2,figsize=(14,5))
for ax, N_k in zip(axes, [5,20]):
D = dirichlet_kernel(x_k, N_k)
F = fejer_kernel(x_k, N_k)
ax.plot(x_k, D, color=COLORS['error'], lw=1.8, label=f'$D_{{{N_k}}}$ (Dirichlet)')
ax.plot(x_k, F, color=COLORS['primary'], lw=1.8, label=f'$F_{{{N_k}}}$ (Fejer)')
ax.axhline(0, color=COLORS['neutral'], lw=0.8, ls='--')
ax.set_title(f'N = {N_k}')
ax.set_xlabel('$x$'); ax.legend()
fig.suptitle('Dirichlet kernel (negative) vs Fejer kernel (non-negative)', fontsize=14)
fig.tight_layout(); plt.show()
print("audit output: 11.2 Visualise Dirichlet vs Fejer kernels === complete or optional branch skipped.")
12. Even/Odd Symmetry and Cosine/Sine Series
- Even functions : all , series is purely cosines (DCT connection)
- Odd functions : all , series is purely sines
This halves the computation and reveals the symmetry structure of the signal.
Code cell 35
# === 12.1 Even/odd decomposition ===
N_pts = 10000
x = np.linspace(-np.pi, np.pi, N_pts, endpoint=False)
dx = 2*np.pi/N_pts
def c_n(fv, n): return np.sum(fv*np.exp(-1j*n*x))*dx/(2*np.pi)
# Any function = even part + odd part
f = np.exp(np.sin(x)) # generic function
f_even = (f + f[::-1]) / 2
f_odd = (f - f[::-1]) / 2
print('For even part: imaginary Fourier coefficients should be ~0')
for n in [1,2,3]:
c = c_n(f_even, n)
print(f' c_{n}[even]: real={c.real:.5f}, imag={c.imag:.6f} (imag~0 = PASS)')
print('\nFor odd part: real Fourier coefficients should be ~0')
for n in [1,2,3]:
c = c_n(f_odd, n)
print(f' c_{n}[odd]: real={c.real:.6f} (real~0 = PASS), imag={c.imag:.5f}')
13. Fourier Series on
For a -periodic function, replace with :
The fundamental frequency is . LLM connection: The transformer context length corresponds to ; different position encoding frequencies correspond to harmonics .
Code cell 37
# === 13.1 Sinusoidal positional encoding — transformer style ===
def sinusoidal_PE(seq_len, d_model, base=10000):
"""Compute sinusoidal positional encoding matrix (seq_len, d_model)."""
PE = np.zeros((seq_len, d_model))
pos = np.arange(seq_len)[:, None] # (T,1)
i = np.arange(0, d_model, 2)[None, :] # (1, d/2)
theta = pos / base**(i / d_model) # (T, d/2)
PE[:, 0::2] = np.sin(theta)
PE[:, 1::2] = np.cos(theta)
return PE
T, d = 64, 32
PE = sinusoidal_PE(T, d)
print(f'PE shape: {PE.shape}')
print(f'PE[0]: {PE[0,:6].round(3)} ...')
print(f'PE[1]: {PE[1,:6].round(3)} ...')
print(f'PE[T-1]:{PE[-1,:6].round(3)} ...')
# Verify the shift-invariance of inner products
k = 5
print(f'\nPE[i] . PE[j] for j=i+{k} (should be constant for all i):')
dots = [PE[i] @ PE[i+k] for i in range(T - k)]
print(f' min={min(dots):.4f}, max={max(dots):.4f}, std={np.std(dots):.4f}')
print(f' (Not perfectly constant — RoPE fixes this)')
Code cell 38
# === 13.2 Visualise PE heatmap ===
if HAS_MPL:
fig, ax = plt.subplots(figsize=(12,5))
im = ax.imshow(PE.T, aspect='auto', cmap='RdBu_r', vmin=-1, vmax=1)
fig.colorbar(im, ax=ax, label='Value')
ax.set_title('Sinusoidal positional encodings (rows=dimensions, cols=positions)')
ax.set_xlabel('Position $m$')
ax.set_ylabel('Dimension $2i$ or $2i+1$')
fig.tight_layout(); plt.show()
print('Low-indexed dims: fast oscillation (high freq, local context)')
print('High-indexed dims: slow oscillation (low freq, global context)')
14. Numerical Experiment: Parseval vs Truncation
How much energy does capture? Parseval tells us exactly:
For smooth functions, near-100% energy is captured with few terms.
Code cell 40
# === 14.1 Energy captured by N-term truncation ===
N_pts = 10000
x = np.linspace(-np.pi, np.pi, N_pts, endpoint=False)
dx = 2*np.pi/N_pts
def c_n(fv, n): return np.sum(fv*np.exp(-1j*n*x))*dx/(2*np.pi)
def energy_captured(fv, N_max=200, N_truncs=[1,3,5,10,20,50]):
all_c = [c_n(fv, n) for n in range(-N_max, N_max+1)]
total_energy = sum(abs(c)**2 for c in all_c)
results = {}
for N in N_truncs:
captured = sum(abs(c_n(fv,n))**2 for n in range(-N, N+1))
results[N] = 100 * captured / total_energy
return results
f_sq = np.sign(np.sin(x))
f_tri = np.abs(x)
f_gau = np.exp(-0.5*x**2)
print('% energy captured by N-term truncation:')
print(f"{"N":>4} {"Square":>10} {"Triangle":>10} {"Gaussian":>10}")
for N in [1,3,5,10,20,50]:
e_sq = energy_captured(f_sq, N_truncs=[N])[N]
e_tri = energy_captured(f_tri, N_truncs=[N])[N]
e_gau = energy_captured(f_gau, N_truncs=[N])[N]
print(f"{N:>4} {e_sq:>9.2f}% {e_tri:>9.2f}% {e_gau:>9.2f}%")
Code cell 41
# === 14.2 Amplitude spectrum plot ===
if HAS_MPL:
n_range = np.arange(-30, 31)
fig, axes = plt.subplots(1,3, figsize=(15,5))
fig.suptitle('Amplitude spectrum $|c_n|$ for three waveforms', fontsize=14)
specs = [
('Square wave', np.sign(np.sin(x))),
('Triangle $|x|$', np.abs(x)),
('Gaussian $e^{-x^2/2}$', np.exp(-0.5*x**2)),
]
for ax, (name, fv) in zip(axes, specs):
amps = [abs(c_n(fv, n)) for n in n_range]
ax.stem(n_range, amps, linefmt='C0-',
markerfmt='o', basefmt='k-')
ax.set_title(name); ax.set_xlabel('$n$'); ax.set_ylabel('$|c_n|$')
fig.tight_layout(); plt.show()
print("audit output: 14.2 Amplitude spectrum plot === complete or optional branch skipped.")
15. Complete Example: Heat Equation Solution
The solution of on with and uses the Fourier sine series derived in notes.md §F.1:
Code cell 43
# === 15.1 Heat equation solution via Fourier series ===
def heat_solution(x_arr, t, alpha=1.0, N_terms=50):
u = np.zeros_like(x_arr, dtype=float)
for k in range(N_terms):
n = 2*k + 1
u += (1/n**3) * np.sin(n*x_arr) * np.exp(-(alpha*n)**2 * t)
return u * 8/np.pi
x_arr = np.linspace(0, np.pi, 500)
f0 = x_arr*(np.pi - x_arr) # initial condition
print('Heat equation solution at various times:')
print(f' t=0: max(u) = {np.max(heat_solution(x_arr, 0)):.5f} (true max = pi^2/4 = {np.pi**2/4:.5f})')
for t in [0.01, 0.1, 0.5, 2.0]:
u = heat_solution(x_arr, t)
print(f' t={t}: max(u) = {np.max(u):.6f}')
if HAS_MPL:
fig, ax = plt.subplots(figsize=(10,6))
ax.plot(x_arr, f0, color=COLORS['neutral'], lw=2, label='$t=0$ (initial)')
for t, col in [(0.05, COLORS['secondary']), (0.2, COLORS['primary']),
(0.5, COLORS['tertiary']), (1.0, COLORS['error'])]:
ax.plot(x_arr, heat_solution(x_arr, t), color=col, lw=1.8, label=f'$t={t}$')
ax.set_title('Heat equation: high frequencies decay fastest')
ax.set_xlabel('$x$'); ax.set_ylabel('$u(x,t)$'); ax.legend()
fig.tight_layout(); plt.show()
print('High-frequency modes (large n) decay as exp(-(alpha*n)^2 * t) — very fast.')
16. Fourier Series and the DFT Connection
The Discrete Fourier Transform (full treatment in §20-03) computes Fourier series coefficients for finite sequences. Here we preview the connection:
where is the -th Fourier coefficient of the sampled function. The FFT computes all coefficients in .
Code cell 45
# === 16.1 DFT vs direct Fourier coefficient computation ===
N_pts = 64
x = np.linspace(-np.pi, np.pi, N_pts, endpoint=False)
dx = 2*np.pi/N_pts
def c_n_rect(fv, n): return np.sum(fv*np.exp(-1j*n*x))*dx/(2*np.pi)
f_test = np.cos(3*x) + 0.5*np.sin(5*x)
# Direct coefficients
ns = np.arange(-8,9)
direct = np.array([c_n_rect(f_test, n) for n in ns])
# Via FFT
F = np.fft.fft(f_test)
# FFT gives: F[k] = N * c_k for k=0..N-1
# Negative freqs: F[N-k] = N * c_{-k}
fft_coeffs = np.array([
F[n]/N_pts if n >= 0 else F[N_pts+n]/N_pts for n in ns
])
print('Comparison: direct Fourier coefficients vs FFT/N:')
print(f"{"n":>4} {"Direct":>15} {"FFT/N":>15} Match")
for n, d, f_c in zip(ns, direct, fft_coeffs):
ok = np.isclose(d, f_c, atol=1e-8)
if abs(d) > 1e-6:
print(f"{n:>4} {d.real:>7.4f}+{d.imag:>6.4f}j {f_c.real:>7.4f}+{f_c.imag:>6.4f}j {"PASS" if ok else "FAIL"}")
17. Amplitude and Phase Spectrum
The Fourier series can be written as:
- Amplitude spectrum: — how much of each frequency is present
- Phase spectrum: — timing offset of each frequency component
Changing phases while preserving amplitudes destroys signal structure — this is why phase alignment matters in audio and image processing.
Code cell 47
# === 17.1 Phase scrambling destroys signal structure ===
np.random.seed(42)
N_pts = 10000
x = np.linspace(-np.pi, np.pi, N_pts, endpoint=False)
dx = 2*np.pi/N_pts
def c_n(fv, n): return np.sum(fv*np.exp(-1j*n*x))*dx/(2*np.pi)
f = np.sign(np.sin(x)) # square wave
N_coeff = 50
# Reconstruct from coefficients
def reconstruct(coeffs_dict):
return sum(c * np.exp(1j*n*x) for n, c in coeffs_dict.items()).real
# Original coefficients
orig = {n: c_n(f, n) for n in range(-N_coeff, N_coeff+1)}
# Phase-scrambled: preserve |c_n| but randomize phase
scrambled = {n: abs(c)*np.exp(1j*np.random.uniform(0, 2*np.pi))
for n, c in orig.items()}
f_orig = reconstruct(orig)
f_scram = reconstruct(scrambled)
# Compare: same power spectrum, different waveform
print(f'Original signal std: {np.std(f_orig):.4f}')
print(f'Scrambled signal std: {np.std(f_scram):.4f}')
print(f'Same power? {np.isclose(np.mean(f_orig**2), np.mean(f_scram**2), rtol=0.05)}')
print(f'Same signal? {np.isclose(f_orig, f_scram, atol=0.1).mean():.1%} of points match')
print('PASS - same amplitude spectrum, completely different waveform')
Code cell 48
# === 17.2 Visualise amplitude vs phase scrambling ===
if HAS_MPL:
x_plot = x[:2000]; f_true = np.sign(np.sin(x_plot))
f_o = f_orig[:2000]; f_s = f_scram[:2000]
fig, axes = plt.subplots(1,3, figsize=(15,4))
for ax, fv, name in zip(axes,
[f_true, f_o, f_s],
['True square wave','Reconstructed (N=50)','Phase-scrambled']):
ax.plot(x_plot, fv, color=COLORS['primary'], lw=1.2)
ax.set_title(name); ax.set_xlabel('$x$'); ax.set_ylim(-2,2)
fig.suptitle('Phase matters: same amplitudes, scrambled phases = random signal', fontsize=13)
fig.tight_layout(); plt.show()
print("audit output: 17.2 Visualise amplitude vs phase scrambling === complete or optional branch skipped.")
18. Basel Problem Generalisation via Parseval
Parseval's theorem turns Fourier series computations into evaluations of classical number-theory series. We compute several in one go.
Code cell 50
# === 18.1 Series evaluations via Parseval ===
# sum 1/n^2 = pi^2/6 (from f=x)
# sum 1/n^4 = pi^4/90 (from f=x^2)
# sum 1/(2k+1)^2 = pi^2/8 (from square wave)
N_sum = 100000
ns = np.arange(1, N_sum+1)
s2 = np.sum(1/ns**2)
s4 = np.sum(1/ns**4)
s_odd = np.sum(1/(2*ns-1)**2)
print('Series evaluations via Parseval:')
print(f' sum 1/n^2 = {s2:.8f} (pi^2/6 = {np.pi**2/6:.8f})')
print(f' sum 1/n^4 = {s4:.8f} (pi^4/90 = {np.pi**4/90:.8f})')
print(f' sum 1/(2k+1)^2 = {s_odd:.8f} (pi^2/8 = {np.pi**2/8:.8f})')
for val, exact in [(s2, np.pi**2/6), (s4, np.pi**4/90), (s_odd, np.pi**2/8)]:
print(f' PASS' if np.isclose(val, exact, rtol=1e-3) else f' FAIL: {val} vs {exact}')
19. Fourier Series and Convolution
The convolution of two periodic functions :
has Fourier coefficients . Convolution in time ↔ multiplication in frequency. (Full treatment: §20-04 Convolution Theorem.)
Code cell 52
# === 19.1 Convolution theorem for Fourier series ===
N_pts = 10000
x = np.linspace(-np.pi, np.pi, N_pts, endpoint=False)
dx = 2*np.pi/N_pts
def c_n(fv, n): return np.sum(fv*np.exp(-1j*n*x))*dx/(2*np.pi)
f = np.cos(2*x) + 0.5*np.sin(3*x)
g = np.sin(x) + 0.3*np.cos(2*x)
# Direct convolution (expensive O(N^2) via outer product)
# Use np.convolve on the periodic signal
h_direct = np.fft.ifft(np.fft.fft(f) * np.fft.fft(g)).real / N_pts * (2*np.pi)
# Verify: c_n[h] = c_n[f] * c_n[g]
print('Convolution theorem: c_n[f*g] = c_n[f] * c_n[g]')
for n in [1, 2, 3, 4, 5]:
lhs = c_n(h_direct, n)
rhs = c_n(f, n) * c_n(g, n)
ok = np.isclose(lhs, rhs, atol=1e-4)
print(f' n={n}: {"PASS" if ok else "FAIL"} c_n[h]={lhs:.5f} c_n[f]*c_n[g]={rhs:.5f}')
Summary: Complete Reference
| Concept | Formula | AI Application |
|---|---|---|
| Fourier coefficient | PE frequencies | |
| Orthonormality | Basis of | |
| Parseval | $\sum | c_n |
| Shift | RoPE relative position | |
| Differentiation | Spectral ODEs | |
| Gibbs | 9% overshoot, persists | Anti-aliasing, windowing |
| Spectral decay | $ | c_n |
| Convolution | CNNs, SSMs | |
| RoPE | , score | LLaMA-3, Mistral |
| RFF | Scalable kernels |
Next: §20-02 Fourier Transform