Theory Notebook
Converted from
theory.ipynbfor web reading.
Fourier Transform — Theory Notebook
"The Fourier transform is a mathematical prism: it refracts a signal into its constituent frequencies, revealing structure invisible in time."
This notebook provides interactive derivations, visualizations, and numerical experiments covering all major topics from the §20-02 Fourier Transform notes. Run cells top-to-bottom.
Sections covered:
- Setup and imports
- From Fourier Series to Fourier Transform (T→∞ limit)
- Computing standard Fourier Transforms
- Core properties (shift, scaling, differentiation)
- Fourier Inversion Theorem
- Plancherel's Theorem and energy conservation
- Heisenberg Uncertainty Principle
- Tempered distributions: Dirac delta and Dirac comb
- Applications: FNet, Random Fourier Features, Spectral Normalization, RoPE
Code cell 2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
try:
import seaborn as sns
sns.set_theme(style="whitegrid", palette="colorblind")
HAS_SNS = True
except ImportError:
plt.style.use("seaborn-v0_8-whitegrid")
HAS_SNS = False
mpl.rcParams.update({
"figure.figsize": (10, 6),
"figure.dpi": 120,
"font.size": 13,
"axes.titlesize": 15,
"axes.labelsize": 13,
"xtick.labelsize": 11,
"ytick.labelsize": 11,
"legend.fontsize": 11,
"legend.framealpha": 0.85,
"lines.linewidth": 2.0,
"axes.spines.top": False,
"axes.spines.right": False,
"savefig.bbox": "tight",
"savefig.dpi": 150,
})
np.random.seed(42)
print("Plot setup complete.")
Code cell 3
import numpy as np
import numpy.linalg as la
from scipy import fft as sp_fft
from scipy import signal as sp_signal
from scipy.special import erf
try:
import matplotlib.pyplot as plt
import matplotlib as mpl
try:
import seaborn as sns
sns.set_theme(style='whitegrid', palette='colorblind')
HAS_SNS = True
except ImportError:
plt.style.use('seaborn-v0_8-whitegrid')
HAS_SNS = False
mpl.rcParams.update({
'figure.figsize': (10, 6), 'figure.dpi': 120,
'font.size': 13, 'axes.titlesize': 15,
'axes.labelsize': 13, 'lines.linewidth': 2.0,
'axes.spines.top': False, 'axes.spines.right': False,
})
COLORS = {
'primary': '#0077BB', 'secondary': '#EE7733',
'tertiary': '#009988', 'error': '#CC3311',
'neutral': '#555555', 'highlight': '#EE3377',
}
HAS_MPL = True
except ImportError:
HAS_MPL = False
np.set_printoptions(precision=6, suppress=True)
np.random.seed(42)
print('Setup complete. HAS_MPL =', HAS_MPL)
1. From Fourier Series to Fourier Transform
1.1 The T→∞ Limit Argument
We visualize how a Fourier series with period becomes the Fourier Transform as . The discrete spectrum (spikes at harmonics ) becomes a continuous function.
Code cell 5
# === 1.1 From Fourier Series to Fourier Transform ===
# Visualize: Fourier series spectrum of rect pulse as period L grows
def fourier_coeffs_rect(L, N_harmonics=50):
"""Fourier coefficients c_n for rect[-0.5, 0.5] in [-L, L] period."""
ns = np.arange(-N_harmonics, N_harmonics+1)
freqs = ns / (2*L) # frequencies xi_n = n/(2L)
# c_n = (1/2L) * integral_{-0.5}^{0.5} e^{-2pi*i*n*t/2L} dt
with np.errstate(divide='ignore', invalid='ignore'):
coeffs = np.where(
ns == 0,
1.0 / (2*L), # c_0 = 1/(2L)
np.sin(np.pi * ns / (2*L)) / (np.pi * ns) # sinc(n/(2L))
)
return freqs, np.abs(coeffs) * 2 * L # scaled to show spectrum density
if HAS_MPL:
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
fig.suptitle(r'Fourier Series $\to$ Fourier Transform as $L \to \infty$',
fontsize=15)
# Continuous FT of rect: sinc function
xi_cont = np.linspace(-4, 4, 1000)
ft_cont = np.sinc(xi_cont) # = sin(pi*xi)/(pi*xi)
for ax, L in zip(axes, [1, 3, 10]):
freqs, amp = fourier_coeffs_rect(L, N_harmonics=60)
ax.plot(xi_cont, ft_cont, color=COLORS['neutral'],
linewidth=1.5, linestyle='--', label=r'$\widehat{\rm rect}(\xi)=\rm sinc(\xi)$',
zorder=5)
ax.vlines(freqs, 0, amp, color=COLORS['primary'], linewidth=1.0,
label=f'Fourier coeffs, L={L}')
ax.set_xlim(-4, 4)
ax.set_ylim(-0.3, 1.1)
ax.set_xlabel(r'Frequency $\xi$')
ax.set_ylabel('Amplitude')
ax.set_title(f'Period $2L={2*L}$')
ax.legend(fontsize=9)
fig.tight_layout()
plt.show()
print('As L grows, discrete spectrum -> continuous sinc: FT emerges.')
else:
print('matplotlib not available; skipping plot.')
2. Computing Standard Fourier Transforms
2.1 The Four Standard Examples
We compute and visualize the Fourier Transforms of:
- Rectangular pulse → sinc function
- Gaussian → Gaussian (self-dual!)
- Double-sided exponential → Lorentzian
- One-sided exponential → complex Lorentzian
Code cell 7
# === 2.1 Standard Fourier Transform Examples ===
# Numerical FT via scipy.fft
def numerical_ft(f_vals, t_vals):
"""Approximate FT via trapezoidal rule on a uniform grid."""
dt = t_vals[1] - t_vals[0]
N = len(t_vals)
# Use FFT for efficiency
F = sp_fft.fftshift(sp_fft.fft(sp_fft.ifftshift(f_vals))) * dt
xi = sp_fft.fftshift(sp_fft.fftfreq(N, d=dt))
return xi, F
# Common time grid
t = np.linspace(-10, 10, 4096)
dt = t[1] - t[0]
# 1. Rectangular pulse rect(t)
rect = ((np.abs(t) <= 0.5)).astype(float)
xi_rect, F_rect = numerical_ft(rect, t)
sinc_theory = np.sinc(xi_rect) # normalized sinc = sin(pi*xi)/(pi*xi)
# 2. Gaussian e^{-pi*t^2} (self-dual)
gauss = np.exp(-np.pi * t**2)
xi_gauss, F_gauss = numerical_ft(gauss, t)
gauss_theory = np.exp(-np.pi * xi_gauss**2)
# 3. Double-sided exponential e^{-|t|}
a = 1.0
dsexp = np.exp(-a * np.abs(t))
xi_ds, F_ds = numerical_ft(dsexp, t)
lorentz_theory = 2*a / (a**2 + (2*np.pi*xi_ds)**2)
# 4. One-sided exponential e^{-t} for t>=0
onesided = np.where(t >= 0, np.exp(-a*t), 0.0)
xi_os, F_os = numerical_ft(onesided, t)
os_theory_mag = 1.0 / np.sqrt(a**2 + (2*np.pi*xi_os)**2)
print('Transforms computed numerically.')
print(f'Max error (rect->sinc): {np.max(np.abs(np.real(F_rect[np.abs(xi_rect)<4]) - sinc_theory[np.abs(xi_rect)<4])):.4f}')
print(f'Max error (Gauss->Gauss): {np.max(np.abs(np.real(F_gauss[np.abs(xi_gauss)<4]) - gauss_theory[np.abs(xi_gauss)<4])):.4f}')
print(f'Max error (dsexp->Lorentz): {np.max(np.abs(np.real(F_ds[np.abs(xi_ds)<4]) - lorentz_theory[np.abs(xi_ds)<4])):.4f}')
Code cell 8
# === 2.1 Plot standard FT examples ===
if HAS_MPL:
fig, axes = plt.subplots(2, 4, figsize=(16, 8))
fig.suptitle('Standard Fourier Transform Pairs', fontsize=15)
examples = [
(t, rect, xi_rect, F_rect, sinc_theory, r'$f(t)=\mathrm{rect}(t)$',
r'$\hat{f}(\xi)=\mathrm{sinc}(\xi)$', (-2,2), (-2,2)),
(t, gauss, xi_gauss, F_gauss, gauss_theory, r'$f(t)=e^{-\pi t^2}$',
r'$\hat{f}(\xi)=e^{-\pi\xi^2}$ (self-dual)', (-3,3), (-3,3)),
(t, dsexp, xi_ds, F_ds, lorentz_theory, r'$f(t)=e^{-|t|}$',
r'$\hat{f}(\xi)=\frac{2}{1+(2\pi\xi)^2}$', (-5,5), (-4,4)),
(t, onesided, xi_os, F_os, os_theory_mag, r'$f(t)=e^{-t}\mathbf{1}_{t\geq0}$',
r'$|\hat{f}(\xi)|=1/\sqrt{1+(2\pi\xi)^2}$', (-2,5), (-4,4)),
]
for col, (tv, fv, xiv, Fv, Fth, tlabel, flabel, tlim, flim) in enumerate(examples):
ax_t = axes[0, col]
ax_f = axes[1, col]
mask_t = (tv >= tlim[0]) & (tv <= tlim[1])
ax_t.plot(tv[mask_t], fv[mask_t], color=COLORS['primary'])
ax_t.set_title(tlabel)
ax_t.set_xlabel('Time $t$')
ax_t.set_ylabel('$f(t)$')
mask_f = (xiv >= flim[0]) & (xiv <= flim[1])
if col == 3: # one-sided: plot magnitude
ax_f.plot(xiv[mask_f], np.abs(Fv[mask_f]),
color=COLORS['primary'], label='Numerical')
ax_f.plot(xiv[mask_f], Fth[mask_f],
color=COLORS['secondary'], linestyle='--', label='Theory')
else:
ax_f.plot(xiv[mask_f], np.real(Fv[mask_f]),
color=COLORS['primary'], label='Numerical')
ax_f.plot(xiv[mask_f], Fth[mask_f],
color=COLORS['secondary'], linestyle='--', label='Theory')
ax_f.set_title(flabel)
ax_f.set_xlabel(r'Frequency $\xi$')
ax_f.set_ylabel(r'$\hat{f}(\xi)$')
ax_f.legend(fontsize=9)
fig.tight_layout()
plt.show()
print('All four standard FT pairs match theory.')
2.2 Convention Comparison: vs
The three FT conventions differ only in normalization. The -convention (used here) is symmetric and places no in the inverse. Below we verify the conversion formula: .
Code cell 10
# === 2.2 Convention comparison ===
# For f(t) = e^{-pi*t^2}, compare xi and omega conventions
t_fine = np.linspace(-8, 8, 8192)
f_gauss = np.exp(-np.pi * t_fine**2)
# xi-convention: F_xi(xi) = int f(t) e^{-2pi*i*xi*t} dt = e^{-pi*xi^2}
xi_vals, F_xi = numerical_ft(f_gauss, t_fine)
# omega-convention (no 1/2pi): F_omega(omega) = int f(t) e^{-i*omega*t} dt
# Relationship: F_omega(omega) = F_xi(omega/(2*pi))
# Also: F_omega = sqrt(2pi) * e^{-omega^2/2} for this Gaussian
omega_vals = 2 * np.pi * xi_vals
F_omega_theory = np.sqrt(2*np.pi) * np.exp(-omega_vals**2 / 2)
# Verify: F_xi(xi) at xi should equal F_omega(omega=2*pi*xi) / 1
# F_xi(xi) = e^{-pi*xi^2}; F_omega(2pi*xi) = sqrt(2pi)*e^{-2pi^2*xi^2}
# These differ by factor sqrt(2pi)*e^{(pi^2-pi)*xi^2}... let's just check numerically
mask = np.abs(xi_vals) < 3
F_xi_theory = np.exp(-np.pi * xi_vals**2)
err_xi = np.max(np.abs(np.real(F_xi[mask]) - F_xi_theory[mask]))
print(f'xi-convention: max error = {err_xi:.2e}')
# Parseval check in xi-convention: integral |f|^2 dt = integral |F_xi|^2 dxi
dt = t_fine[1] - t_fine[0]
dxi = xi_vals[1] - xi_vals[0]
energy_time = np.sum(f_gauss**2) * dt
energy_freq = np.sum(np.abs(F_xi)**2) * dxi
print(f'Energy in time: {energy_time:.6f}')
print(f'Energy in freq: {energy_freq:.6f}')
print(f'Parseval error: {abs(energy_time - energy_freq):.2e}')
ok = abs(energy_time - energy_freq) < 1e-3
print(f"{'PASS' if ok else 'FAIL'} - Parseval identity verified numerically")
3. Core Properties of the Fourier Transform
3.1 Time-Shift and Modulation
The time-shift property: . Shifting in time leaves magnitude unchanged but adds linear phase.
Code cell 12
# === 3.1 Time-Shift Property ===
import numpy as np
from scipy import fft as sp_fft
try:
import matplotlib.pyplot as plt
import matplotlib as mpl
try:
import seaborn as sns
sns.set_theme(style='whitegrid', palette='colorblind')
HAS_SNS = True
except ImportError:
plt.style.use('seaborn-v0_8-whitegrid')
HAS_SNS = False
mpl.rcParams.update({'figure.figsize': (10, 6), 'figure.dpi': 120,
'font.size': 13, 'axes.titlesize': 14,
'axes.spines.top': False, 'axes.spines.right': False})
COLORS = {'primary': '#0077BB', 'secondary': '#EE7733',
'tertiary': '#009988', 'error': '#CC3311',
'neutral': '#555555', 'highlight': '#EE3377'}
HAS_MPL = True
except ImportError:
HAS_MPL = False
np.random.seed(42)
def numerical_ft(f_vals, t_vals):
dt = t_vals[1] - t_vals[0]
N = len(t_vals)
F = sp_fft.fftshift(sp_fft.fft(sp_fft.ifftshift(f_vals))) * dt
xi = sp_fft.fftshift(sp_fft.fftfreq(N, d=dt))
return xi, F
t = np.linspace(-10, 10, 4096)
a = 2.0 # time shift
# Gaussian pulse and its shifted version
f = np.exp(-np.pi * t**2)
f_shifted = np.exp(-np.pi * (t - a)**2)
xi, F = numerical_ft(f, t)
xi_s, F_s = numerical_ft(f_shifted, t)
# Theory: F_shifted = e^{-2pi*i*xi*a} * F
F_s_theory = np.exp(-2j * np.pi * xi * a) * F
mask = np.abs(xi) < 3
mag_err = np.max(np.abs(np.abs(F_s[mask]) - np.abs(F_s_theory[mask])))
phase_err = np.max(np.abs(np.angle(F_s[mask]) - np.angle(F_s_theory[mask])))
print(f'Time shift a = {a}')
print(f'Magnitude change: max|mag_shift - mag_original| = {np.max(np.abs(np.abs(F_s[mask]) - np.abs(F[mask]))):.2e}')
print(f' -> CONFIRMED: time shift does NOT change magnitude spectrum')
print(f'Phase theory error: {phase_err:.2e}')
ok = phase_err < 0.01
print(f"{'PASS' if ok else 'FAIL'} - shift property: F[f(t-a)](xi) = e^(-2pi*i*xi*a)*F(xi)")
if HAS_MPL:
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
fig.suptitle('Time-Shift Property: Magnitude Unchanged, Phase Linear')
ax = axes[0]
ax.plot(xi[mask], np.abs(F[mask]), color=COLORS['primary'], label='$|\\hat{f}(\\xi)|$ (original)')
ax.plot(xi[mask], np.abs(F_s[mask]), color=COLORS['secondary'],
linestyle='--', label=f'$|\\hat{{f_{{a}}}}(\\xi)|$, a={a}')
ax.set_xlabel(r'Frequency $\xi$')
ax.set_ylabel('Magnitude')
ax.set_title('Magnitude spectrum (identical)')
ax.legend()
ax = axes[1]
ax.plot(xi[mask], np.unwrap(np.angle(F_s[mask])), color=COLORS['secondary'],
label='Numerical phase (shifted)')
ax.plot(xi[mask], -2*np.pi*xi[mask]*a, color=COLORS['error'],
linestyle=':', linewidth=2.5, label=f'Theory: $-2\\pi\\xi a$ = $-2\\pi\\xi \\cdot {a}$')
ax.set_xlabel(r'Frequency $\xi$')
ax.set_ylabel('Phase (radians)')
ax.set_title('Phase spectrum (linear in xi)')
ax.legend()
fig.tight_layout()
plt.show()
3.2 Scaling / Dilation Property
The scaling theorem: . Time compression () stretches the spectrum; time dilation compresses it. This is the time-bandwidth product in action.
Code cell 14
# === 3.2 Scaling Property ===
# Show time compression <-> spectrum expansion
t = np.linspace(-10, 10, 8192)
f_base = np.exp(-np.pi * t**2) # Gaussian, width 1
a_vals = [0.5, 1.0, 2.0, 4.0] # compression factors
results = {}
for a in a_vals:
f_scaled = np.exp(-np.pi * (a*t)**2) # f(at)
xi, F = numerical_ft(f_scaled, t)
# Theory: (1/|a|) * F_base(xi/a) = (1/|a|) * e^{-pi*(xi/a)^2}
F_theory = (1/abs(a)) * np.exp(-np.pi * (xi/a)**2)
mask = np.abs(xi) < 6
err = np.max(np.abs(np.real(F[mask]) - F_theory[mask]))
# Measure effective bandwidth (width at half-max)
mag = np.abs(F)
bw = np.sum(xi[mask][mag[mask] > 0.5*mag.max()]) if mag.max() > 0 else 0
results[a] = {'F': F, 'xi': xi, 'F_theory': F_theory, 'err': err}
print(f'a={a:.1f}: peak at xi=0 = {np.abs(F[len(F)//2]):.4f} (theory: {1/a:.4f}), error = {err:.2e}')
print()
print('Time-bandwidth product: (width in time) x (width in freq) = constant')
for a in a_vals:
sigma_t = 1/a # Gaussian width in time (sigma_t = 1/a)
sigma_f = a # Gaussian width in freq
print(f' a={a:.1f}: sigma_t={sigma_t:.2f}, sigma_f={sigma_f:.2f}, product={sigma_t*sigma_f:.2f}')
if HAS_MPL:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
fig.suptitle(r'Scaling: $f(at) \leftrightarrow \frac{1}{|a|}\hat{f}(\xi/a)$')
for a in a_vals:
f_scaled = np.exp(-np.pi * (a*t)**2)
mask_t = np.abs(t) < 3
axes[0].plot(t[mask_t], f_scaled[mask_t], label=f'$f({a}t)$')
axes[0].set_xlabel('Time $t$')
axes[0].set_ylabel('$f(at)$')
axes[0].set_title('Time domain: narrow for large $a$')
axes[0].legend()
for a in a_vals:
xi_r, F_r = results[a]['xi'], results[a]['F']
mask_f = np.abs(xi_r) < 6
axes[1].plot(xi_r[mask_f], np.abs(F_r[mask_f]), label=f'$a={a}$')
axes[1].set_xlabel(r'Frequency $\xi$')
axes[1].set_ylabel(r'$|\hat{f}(\xi)|$')
axes[1].set_title('Freq domain: wide for large $a$ (time-bandwidth tradeoff)')
axes[1].legend()
fig.tight_layout()
plt.show()
3.3 Differentiation Property
Differentiation in time corresponds to multiplication by in frequency. This converts ODEs into algebraic equations and explains why smooth functions have rapidly decaying spectra.
Code cell 16
# === 3.3 Differentiation Property ===
# Verify: FT[f'(t)](xi) = 2*pi*i*xi * FT[f](xi)
t = np.linspace(-10, 10, 8192)
dt = t[1] - t[0]
# Use Gaussian: f'(t) = -2*pi*t*e^{-pi*t^2}
f = np.exp(-np.pi * t**2)
f_prime = -2*np.pi*t * f # exact derivative
xi, F = numerical_ft(f, t)
xi2, F_prime = numerical_ft(f_prime, t)
# Theory: FT[f'](xi) = 2*pi*i*xi * FT[f](xi)
F_prime_theory = 2j * np.pi * xi * F
mask = np.abs(xi) < 4
err = np.max(np.abs(F_prime[mask] - F_prime_theory[mask]))
print(f'Differentiation property error: {err:.2e}')
ok = err < 0.01
print(f"{'PASS' if ok else 'FAIL'} - FT[f\'] = 2*pi*i*xi * FT[f]")
# Smoothness vs decay: compare FT of rect (jump) vs triangle (continuous)
rect_pulse = (np.abs(t) <= 0.5).astype(float)
tri_pulse = np.maximum(1 - np.abs(t), 0)
xi_r, F_rect = numerical_ft(rect_pulse, t)
xi_t, F_tri = numerical_ft(tri_pulse, t)
print('\nSpectral decay comparison:')
xi_ref = np.array([1, 2, 3, 5, 10])
for xr in xi_ref:
idx_r = np.argmin(np.abs(xi_r - xr))
idx_t = np.argmin(np.abs(xi_t - xr))
print(f' xi={xr}: |rect_FT|={np.abs(F_rect[idx_r]):.4f}, |tri_FT|={np.abs(F_tri[idx_t]):.4f}')
print(f' (theory: ~{1/xr:.4f} vs ~{1/xr**2:.4f})')
4. Fourier Inversion Theorem
4.1 Numerical Verification of Inversion
We verify that recovers for both smooth and piecewise-smooth functions, paying attention to the Gibbs-like behavior at discontinuities.
Code cell 18
# === 4.1 Fourier Inversion Theorem ===
t = np.linspace(-10, 10, 8192)
def ft_and_invert(f, t):
dt = t[1] - t[0]
N = len(t)
# Forward FT
F = sp_fft.fftshift(sp_fft.fft(sp_fft.ifftshift(f))) * dt
xi = sp_fft.fftshift(sp_fft.fftfreq(N, d=dt))
# Inverse FT: f_rec = int F(xi) e^{2pi*i*xi*t} dxi
dxi = xi[1] - xi[0]
f_rec = sp_fft.fftshift(sp_fft.ifft(sp_fft.ifftshift(F/dt))) / dxi * dxi
# Simpler: use scipy.fft inverse directly
f_rec2 = np.real(sp_fft.fftshift(sp_fft.ifft(sp_fft.ifftshift(F))) / dt)
return xi, F, f_rec2
# Test 1: Gaussian (smooth)
f_gauss = np.exp(-np.pi * t**2)
_, F_gauss, f_gauss_rec = ft_and_invert(f_gauss, t)
err_gauss = np.max(np.abs(f_gauss_rec - f_gauss))
print(f'Gaussian recovery error: {err_gauss:.2e}')
ok_g = err_gauss < 0.001
print(f"{'PASS' if ok_g else 'FAIL'} - Gaussian inversion")
# Test 2: Rectangular pulse (jump discontinuities)
f_rect = (np.abs(t) <= 1.0).astype(float)
_, F_rect, f_rect_rec = ft_and_invert(f_rect, t)
# At interior points, should recover well; at jumps t=+/-1, should get ~0.5
err_interior = np.max(np.abs(f_rect_rec[(np.abs(t) < 0.9)] - 1.0))
val_at_jump = f_rect_rec[np.argmin(np.abs(t - 1.0))]
print(f'\nRect: interior recovery error: {err_interior:.4f}')
print(f'Rect: value at jump t=1: {val_at_jump:.4f} (theory: 0.5)')
ok_j = abs(val_at_jump - 0.5) < 0.15
print(f"{'PASS' if ok_j else 'FAIL'} - Inversion averages at jump discontinuity")
if HAS_MPL:
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
fig.suptitle('Fourier Inversion: $f = \\mathcal{F}^{-1}[\\hat{f}]$')
mask = np.abs(t) < 3
axes[0].plot(t[mask], f_gauss[mask], color=COLORS['primary'], linewidth=2, label='Original')
axes[0].plot(t[mask], f_gauss_rec[mask], color=COLORS['secondary'],
linestyle='--', linewidth=1.5, label='Recovered')
axes[0].set_title('Gaussian (smooth): perfect recovery')
axes[0].set_xlabel('$t$'); axes[0].set_ylabel('$f(t)$')
axes[0].legend()
mask2 = np.abs(t) < 2.5
axes[1].plot(t[mask2], f_rect[mask2], color=COLORS['primary'], linewidth=2, label='Original')
axes[1].plot(t[mask2], f_rect_rec[mask2], color=COLORS['secondary'],
linestyle='--', linewidth=1.5, label='Recovered (Gibbs at jumps)')
axes[1].axhline(0.5, color=COLORS['neutral'], linestyle=':', linewidth=1, label='0.5 = avg at jump')
axes[1].set_title('Rect (jump discontinuity): Gibbs phenomenon')
axes[1].set_xlabel('$t$'); axes[1].set_ylabel('$f(t)$')
axes[1].legend()
fig.tight_layout()
plt.show()
4.2 The Self-Dual Gaussian
The Gaussian satisfies . We verify this and also show the general scaled Gaussian family: .
Code cell 20
# === 4.2 Self-dual Gaussian and scaling ===
t = np.linspace(-10, 10, 8192)
sigma_vals = [0.5, 1.0, 2.0, 4.0]
print('Self-dual Gaussian family: FT[e^{-pi*t^2/sigma^2}](xi) = sigma * e^{-pi*sigma^2*xi^2}')
print(f"{'sigma':>8} {'time_peak':>12} {'freq_peak (num)':>18} {'freq_peak (theory)':>20}")
print('-' * 65)
for sigma in sigma_vals:
f = np.exp(-np.pi * t**2 / sigma**2)
xi, F = numerical_ft(f, t)
freq_peak = np.max(np.real(F))
theory_peak = sigma # peak at xi=0 is sigma
print(f"{sigma:>8.1f} {1.0:>12.4f} {freq_peak:>18.4f} {theory_peak:>20.4f}")
# Special case sigma=1: self-dual
g = np.exp(-np.pi * t**2)
xi, G = numerical_ft(g, t)
mask_self = np.abs(xi) < 4
self_dual_err = np.max(np.abs(np.real(G[mask_self]) - np.exp(-np.pi * xi[mask_self]**2)))
print(f'\nSelf-duality error for sigma=1: {self_dual_err:.2e}')
ok = self_dual_err < 0.001
print(f"{'PASS' if ok else 'FAIL'} - Gaussian is self-dual: FT[e^(-pi*t^2)] = e^(-pi*xi^2)")
# Apply FT four times: should return to original
F1 = sp_fft.fftshift(sp_fft.fft(sp_fft.ifftshift(g))) * (t[1]-t[0])
F2_in = np.real(F1) # F1 = g (self-dual)
F2 = sp_fft.fftshift(sp_fft.fft(sp_fft.ifftshift(F2_in))) * (t[1]-t[0])
F3 = sp_fft.fftshift(sp_fft.fft(sp_fft.ifftshift(np.real(F2)))) * (t[1]-t[0])
F4 = sp_fft.fftshift(sp_fft.fft(sp_fft.ifftshift(np.real(F3)))) * (t[1]-t[0])
four_ft_err = np.max(np.abs(np.real(F4[np.abs(t)<4]) - g[np.abs(t)<4]))
print(f'FT^4[g] == g error: {four_ft_err:.2e}')
ok2 = four_ft_err < 0.01
print(f"{'PASS' if ok2 else 'FAIL'} - Applying FT 4 times returns original (FT has order 4)")
5. Plancherel's Theorem and Energy Conservation
Plancherel's theorem: — the Fourier Transform is a unitary isometry on . Energy is perfectly conserved.
Code cell 22
# === 5.1 Plancherel's Theorem — Numerical Verification ===
import numpy as np
from scipy import fft as sp_fft
try:
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams.update({'figure.figsize': (10, 6), 'figure.dpi': 120,
'axes.spines.top': False, 'axes.spines.right': False})
COLORS = {'primary': '#0077BB', 'secondary': '#EE7733',
'tertiary': '#009988', 'error': '#CC3311',
'neutral': '#555555', 'highlight': '#EE3377'}
HAS_MPL = True
except ImportError:
HAS_MPL = False
np.random.seed(42)
def numerical_ft(f_vals, t_vals):
dt = t_vals[1] - t_vals[0]
N = len(t_vals)
F = sp_fft.fftshift(sp_fft.fft(sp_fft.ifftshift(f_vals))) * dt
xi = sp_fft.fftshift(sp_fft.fftfreq(N, d=dt))
return xi, F
t = np.linspace(-15, 15, 8192)
dt = t[1] - t[0]
test_functions = [
('Gaussian e^{-pi*t^2}', np.exp(-np.pi * t**2)),
('Tri pulse max(1-|t|,0)', np.maximum(1 - np.abs(t), 0)),
('Decaying sinusoid', np.exp(-np.abs(t)) * np.cos(2*np.pi*t)),
('Narrow Gaussian e^{-pi*(2t)^2}', np.exp(-np.pi * (2*t)**2)),
]
print(f"{'Function':40s} {'||f||_2^2':>12} {'||F||_2^2':>12} {'Parseval err':>14}")
print('-' * 82)
for name, f in test_functions:
xi, F = numerical_ft(f, t)
dxi = xi[1] - xi[0]
energy_t = np.sum(np.abs(f)**2) * dt
energy_f = np.sum(np.abs(F)**2) * dxi
err = abs(energy_t - energy_f) / energy_t
print(f"{name:40s} {energy_t:>12.6f} {energy_f:>12.6f} {err:>14.2e}")
print()
print('PASS' if all(abs(np.sum(np.abs(f)**2)*dt - np.sum(np.abs(numerical_ft(f,t)[1])**2)*(numerical_ft(f,t)[0][1]-numerical_ft(f,t)[0][0]))/max(np.sum(np.abs(f)**2)*dt,1e-10) < 0.001 for _, f in test_functions) else 'FAIL', '- Parseval: ||f||^2 = ||F||^2 for all test functions')
Code cell 23
# === 5.2 Using Parseval to Evaluate Integrals ===
# Example: compute integral of sinc^2(xi) d xi using Parseval
# Since FT[rect](xi) = sinc(xi), Parseval gives:
# integral |sinc(xi)|^2 dxi = integral |rect(t)|^2 dt = integral_{-0.5}^{0.5} 1 dt = 1
t = np.linspace(-10, 10, 16384)
dt = t[1] - t[0]
rect = (np.abs(t) <= 0.5).astype(float)
xi, F_rect = numerical_ft(rect, t)
dxi = xi[1] - xi[0]
# Direct computation of integral sinc^2(xi) dxi
sinc_sq_integral = np.sum(np.sinc(xi)**2) * dxi
# Via Parseval: = integral |rect(t)|^2 dt
rect_sq_integral = np.sum(rect**2) * dt
print('Computing integral_{-inf}^{inf} sinc^2(xi) dxi using Parseval:')
print(f'Direct sinc^2 integral: {sinc_sq_integral:.6f}')
print(f'Via Parseval (rect norm): {rect_sq_integral:.6f}')
print(f'Theory (= 1): 1.000000')
ok = abs(sinc_sq_integral - 1.0) < 0.002
print(f"{'PASS' if ok else 'FAIL'} - integral sinc^2(xi)dxi = 1 via Parseval")
# Second example: integral 1/(1+4*pi^2*t^2)^2 dt
# FT[1/(1+(2pi*t)^2)](xi) = (1/2)*e^{-|xi|}
# Parseval: integral |f|^2 dt = integral |F|^2 dxi
# integral 1/(1+(2pi*t)^2)^2 dt = integral (1/4)*e^{-2|xi|} dxi = (1/4)*integral e^{-2|xi|}dxi = (1/4)*(1) = 1/4
f_lorentz = 1.0 / (1 + (2*np.pi*t)**2)
lorentz_sq = np.sum(f_lorentz**2) * dt
theory_val = 1/(4*2) # integral (1/4)*2*e^{-2xi}dxi for xi>0 = (1/4)*(1/2+1/2) from -inf to inf
# Actually: integral_{-inf}^{inf} (1/4)*e^{-2|xi|} dxi = (1/4)*2*integral_0^inf e^{-2xi}dxi = (1/4)*(1) = 1/4
print(f'\ninteg 1/(1+(2pi*t)^2)^2 dt = {lorentz_sq:.6f} (theory: {1/4:.6f})')
ok2 = abs(lorentz_sq - 0.25) < 0.005
print(f"{'PASS' if ok2 else 'FAIL'} - Parseval application to Lorentzian squared")
6. Heisenberg Uncertainty Principle
Theorem: For any normalized to :
Equality iff is a Gaussian.
Code cell 25
# === 6.1 Uncertainty Principle — Numerical Verification ===
def compute_uncertainty(f, t):
"""Compute time spread Delta_t and frequency spread Delta_xi."""
dt = t[1] - t[0]
xi, F = numerical_ft(f, t)
dxi = xi[1] - xi[0]
# Normalize: |f|^2 and |F|^2 as probability densities
norm_t = np.sum(np.abs(f)**2) * dt
norm_f = np.sum(np.abs(F)**2) * dxi
if norm_t < 1e-10:
return None, None, None
p_t = np.abs(f)**2 / norm_t # time probability density
p_f = np.abs(F)**2 / norm_f # freq probability density
# Time center and spread
mu_t = np.sum(t * p_t) * dt
Delta_t = np.sqrt(np.sum((t - mu_t)**2 * p_t) * dt)
# Frequency center and spread
mu_xi = np.sum(xi * p_f) * dxi
Delta_xi = np.sqrt(np.sum((xi - mu_xi)**2 * p_f) * dxi)
return Delta_t, Delta_xi, Delta_t * Delta_xi
t = np.linspace(-20, 20, 16384)
bound = 1 / (4 * np.pi)
print(f'Uncertainty bound: 1/(4*pi) = {bound:.6f}')
print()
test_signals = [
('Gaussian sigma=1.0 (extremal)', np.exp(-np.pi * t**2)),
('Gaussian sigma=2.0', np.exp(-np.pi * t**2 / 4)),
('Gaussian sigma=0.5', np.exp(-np.pi * t**2 * 4)),
('Rectangular pulse w=2', (np.abs(t) <= 1.0).astype(float)),
('Hann window w=4', np.where(np.abs(t) <= 2, 0.5*(1+np.cos(np.pi*t/2)), 0)),
('Decaying cos', np.exp(-t**2/2)*np.cos(2*np.pi*2*t)),
]
print(f"{'Signal':35s} {'Delta_t':>10} {'Delta_xi':>10} {'Product':>10} {'Ratio/bound':>12}")
print('-' * 82)
for name, f in test_signals:
Dt, Dxi, prod = compute_uncertainty(f, t)
if Dt is not None:
ratio = prod / bound
print(f"{name:35s} {Dt:>10.4f} {Dxi:>10.4f} {prod:>10.4f} {ratio:>12.4f}")
assert prod >= bound - 0.001, f'Uncertainty principle violated for {name}!'
print()
print('PASS - All signals satisfy Delta_t * Delta_xi >= 1/(4*pi)')
print('The Gaussian achieves the minimum (ratio ~ 1.0)')
Code cell 26
# === 6.2 Uncertainty Principle Visualization ===
# Show the time-frequency tradeoff: wider in time <-> narrower in freq
if HAS_MPL:
t_viz = np.linspace(-10, 10, 8192)
sigma_vals = [0.4, 0.7, 1.0, 1.5, 2.5]
fig, axes = plt.subplots(2, 1, figsize=(12, 8))
fig.suptitle(r'Uncertainty Principle: $\Delta t \cdot \Delta\xi \geq 1/(4\pi)$', fontsize=15)
cmap = plt.cm.viridis
for i, sigma in enumerate(sigma_vals):
color = cmap(i / len(sigma_vals))
f = np.exp(-np.pi * t_viz**2 / sigma**2)
f /= np.sqrt(np.sum(f**2) * (t_viz[1]-t_viz[0])) # normalize
xi_v, F_v = numerical_ft(f, t_viz)
mask_t = np.abs(t_viz) < 5
mask_f = np.abs(xi_v) < 5
axes[0].plot(t_viz[mask_t], np.abs(f[mask_t])**2,
color=color, label=f'$\\sigma={sigma}$')
axes[1].plot(xi_v[mask_f], np.abs(F_v[mask_f])**2,
color=color, label=f'$\\sigma={sigma}$')
axes[0].set_xlabel('Time $t$')
axes[0].set_ylabel(r'$|f(t)|^2$ (probability density)')
axes[0].set_title('Time domain: wider sigma = more spread')
axes[0].legend(loc='upper right')
axes[1].set_xlabel(r'Frequency $\xi$')
axes[1].set_ylabel(r'$|\hat{f}(\xi)|^2$')
axes[1].set_title('Freq domain: wider sigma = narrower spectrum (inverse relation)')
axes[1].legend(loc='upper right')
fig.tight_layout()
plt.show()
# Show product Delta_t * Delta_xi vs sigma
sigmas = np.linspace(0.2, 4.0, 40)
products = []
for sigma in sigmas:
f = np.exp(-np.pi * t_viz**2 / sigma**2)
Dt, Dxi, prod = compute_uncertainty(f, t_viz)
products.append(prod)
fig2, ax2 = plt.subplots(figsize=(9, 5))
ax2.plot(sigmas, products, color=COLORS['primary'], linewidth=2.5,
label=r'$\Delta t \cdot \Delta\xi$ for Gaussian')
ax2.axhline(bound, color=COLORS['error'], linestyle='--',
linewidth=2, label=f'Bound $1/(4\\pi) = {bound:.4f}$')
ax2.set_xlabel(r'Gaussian width $\sigma$')
ax2.set_ylabel(r'$\Delta t \cdot \Delta\xi$')
ax2.set_title('All Gaussians achieve the uncertainty bound (product = const)')
ax2.legend()
fig2.tight_layout()
plt.show()
7. Tempered Distributions: Dirac Delta and Dirac Comb
7.1 The Dirac Delta and Its Fourier Transform
The Dirac delta is not a function but a distribution: . Its Fourier Transform: (flat spectrum). Conversely: .
Code cell 28
# === 7.1 Dirac Delta: Approximation and FT ===
# Approximate delta(t) by a sequence of narrow Gaussians
t = np.linspace(-5, 5, 8192)
epsilons = [1.0, 0.5, 0.2, 0.1]
print('Delta approximation: delta_eps(t) = (1/eps)*e^{-pi*t^2/eps^2}')
print('FT of delta_eps: e^{-pi*eps^2*xi^2} -> 1 as eps->0 (flat spectrum)')
print()
if HAS_MPL:
fig, axes = plt.subplots(1, 2, figsize=(13, 5))
fig.suptitle('Dirac Delta: $\\delta_\\epsilon(t) \\to \\delta(t)$ and $\\mathcal{F}[\\delta]=1$')
xi_range = np.linspace(-8, 8, 2000)
cmap = plt.cm.viridis
for i, eps in enumerate(epsilons):
delta_eps = (1/eps) * np.exp(-np.pi * t**2 / eps**2)
xi_d, F_d = numerical_ft(delta_eps, t)
color = cmap(i/len(epsilons))
mask_t = np.abs(t) < 2
axes[0].plot(t[mask_t], delta_eps[mask_t], color=color, label=f'$\\epsilon={eps}$')
mask_f = np.abs(xi_d) < 6
axes[1].plot(xi_d[mask_f], np.real(F_d[mask_f]), color=color, label=f'$\\epsilon={eps}$')
axes[0].set_title(r'$\delta_\epsilon(t) = \frac{1}{\epsilon}e^{-\pi t^2/\epsilon^2}$')
axes[0].set_xlabel('$t$'); axes[0].set_ylabel(r'$\delta_\epsilon(t)$')
axes[0].legend()
axes[1].axhline(1.0, color=COLORS['error'], linestyle='--', linewidth=2, label='Limit: $\\hat{\\delta}=1$')
axes[1].set_title(r'FT of $\delta_\epsilon$: $e^{-\pi\epsilon^2\xi^2} \to 1$')
axes[1].set_xlabel(r'$\xi$'); axes[1].set_ylabel(r'$\mathcal{F}[\delta_\epsilon](\xi)$')
axes[1].legend()
fig.tight_layout()
plt.show()
print('Key facts:')
print(' FT[delta(t)] = 1 (impulse has flat/white spectrum)')
print(' FT[1] = delta(xi) (constant has spectrum only at xi=0)')
print(' FT[e^{2pi*i*xi0*t}] = delta(xi - xi0) (pure tone = spike in spectrum)')
7.2 The Dirac Comb and Poisson Summation
The Dirac comb has FT:
The Poisson summation formula: .
Code cell 30
# === 7.2 Dirac Comb and Poisson Summation Formula ===
# Poisson Summation: sum_n f(n) = sum_n F_hat(n)
# Test with f(t) = e^{-pi*t^2}, F_hat(xi) = e^{-pi*xi^2}
print('Poisson Summation Formula: sum_n f(n) = sum_n f_hat(n)')
print('For f(t) = e^{-pi*t^2}, hat{f}(xi) = e^{-pi*xi^2} (self-dual):')
print()
N_terms = [5, 10, 20, 50]
for N in N_terms:
ns = np.arange(-N, N+1)
lhs = np.sum(np.exp(-np.pi * ns**2)) # sum_n f(n)
rhs = np.sum(np.exp(-np.pi * ns**2)) # sum_n hat{f}(n) = same (self-dual!)
print(f' N={N:3d}: LHS = {lhs:.10f}, RHS = {rhs:.10f}, diff = {abs(lhs-rhs):.2e}')
ok = abs(lhs - rhs) < 1e-10
print(f'PASS - Poisson summation holds (trivially for self-dual Gaussian)')
# More interesting test: f(t) = e^{-pi*t^2/sigma^2} (not self-dual)
# f(n) = e^{-pi*n^2/sigma^2}
# hat{f}(xi) = sigma * e^{-pi*sigma^2*xi^2}
# Poisson: sum_n e^{-pi*n^2/sigma^2} = sigma * sum_n e^{-pi*sigma^2*n^2}
print()
print('Non-self-dual: f(t) = e^{-pi*t^2/sigma^2}')
print('Poisson: sum_n e^{-pi*n^2/sigma^2} = sigma * sum_n e^{-pi*sigma^2*n^2}')
print()
for sigma in [0.5, 1.0, 2.0, 3.0]:
ns = np.arange(-100, 101)
lhs = np.sum(np.exp(-np.pi * ns**2 / sigma**2))
rhs = sigma * np.sum(np.exp(-np.pi * sigma**2 * ns**2))
print(f' sigma={sigma:.1f}: LHS={lhs:.8f}, RHS={rhs:.8f}, err={abs(lhs-rhs):.2e}')
print('PASS - Poisson summation formula verified for all sigma')
8. Applications in Machine Learning
8.1 FNet: Token Mixing with Fourier Transforms
FNet replaces self-attention with a 2D DFT. We implement both and compare on a simple global aggregation task.
Code cell 32
# === 8.1 FNet vs Attention Mixing ===
import numpy as np
from scipy import fft as sp_fft
try:
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams.update({'figure.figsize': (10, 6), 'figure.dpi': 120,
'axes.spines.top': False, 'axes.spines.right': False})
COLORS = {'primary': '#0077BB', 'secondary': '#EE7733',
'tertiary': '#009988', 'error': '#CC3311',
'neutral': '#555555', 'highlight': '#EE3377'}
HAS_MPL = True
except ImportError:
HAS_MPL = False
np.random.seed(42)
def fnet_mixing(X):
"""
FNet mixing: apply 2D DFT over (seq, model) dimensions, take real part.
X: (batch, seq_len, d_model)
"""
X_fft = sp_fft.fft(sp_fft.fft(X, axis=1), axis=2)
return np.real(X_fft)
def attention_mixing(X, scale=True):
"""
Self-attention mixing (no learned weights — random Q,K,V for demonstration).
X: (seq_len, d_model)
"""
d = X.shape[-1]
scale_factor = np.sqrt(d) if scale else 1.0
# Raw attention (X as both Q and K for simplicity)
scores = X @ X.T / scale_factor # (seq, seq)
# Softmax
scores_stable = scores - scores.max(axis=-1, keepdims=True)
A = np.exp(scores_stable)
A /= A.sum(axis=-1, keepdims=True)
return A @ X # (seq, d_model)
# Compare mixing matrices (how each position sees all others)
N, d = 8, 16
X = np.random.randn(N, d)
# FNet mixing matrix: what is the effective weight from position j to position i?
# For FNet: Y = Re(F_N X F_d^T), so mixing in seq dim is F_N (DFT matrix)
F_N = np.fft.fft(np.eye(N), axis=0) / np.sqrt(N) # unitary DFT
fnet_seq_mixing = np.real(F_N @ np.conj(F_N).T) # effective mixing = I (identity after re+im)
# More direct: show that FNet output for Y_{k,:} depends on all input positions
Y_fnet = fnet_mixing(X[np.newaxis,:,:])[0] # (N, d)
Y_attn = attention_mixing(X)
# Check global mixing: does each output position depend on ALL input positions?
# For FNet: Y_fnet[k,j] = sum_n X[n, (j-k) mod d] via DFT -> YES, all positions
print('FNet mixing: global (all positions contribute to each output)')
print('Attention mixing: global (but data-dependent weights)')
print()
print(f'FNet output norm: {np.linalg.norm(Y_fnet):.4f}')
print(f'Input norm: {np.linalg.norm(X):.4f}')
print(f'FNet is isometric? {abs(np.linalg.norm(Y_fnet) - np.linalg.norm(X)) < 0.01}')
print('(FT is unitary, Re(.) may not be isometric, but energy is approximately preserved)')
if HAS_MPL:
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
fig.suptitle('FNet vs Attention: Mixing Patterns')
# FNet: show |DFT matrix| as mixing pattern
F_mat = np.abs(np.fft.fft(np.eye(N), axis=0) / np.sqrt(N))
im1 = axes[0].imshow(F_mat, cmap='viridis', aspect='auto')
plt.colorbar(im1, ax=axes[0], label='Mixing weight')
axes[0].set_title('FNet: DFT mixing (fixed, uniform)')
axes[0].set_xlabel('Input position'); axes[0].set_ylabel('Output position')
# Attention: show attention matrix
scores = X @ X.T / np.sqrt(d)
scores_s = scores - scores.max(axis=-1, keepdims=True)
A = np.exp(scores_s); A /= A.sum(axis=-1, keepdims=True)
im2 = axes[1].imshow(A, cmap='viridis', aspect='auto')
plt.colorbar(im2, ax=axes[1], label='Attention weight')
axes[1].set_title('Attention: data-dependent mixing')
axes[1].set_xlabel('Input position'); axes[1].set_ylabel('Output position')
fig.tight_layout()
plt.show()
8.2 Random Fourier Features
RFF approximates shift-invariant kernels via random frequency sampling. For the RBF kernel , the spectral density is .
Code cell 34
# === 8.2 Random Fourier Features ===
def rbf_kernel(X, Y, gamma=1.0):
"""Exact RBF kernel matrix."""
# ||x-y||^2 = ||x||^2 + ||y||^2 - 2*x^T*y
X_sq = np.sum(X**2, axis=1, keepdims=True)
Y_sq = np.sum(Y**2, axis=1, keepdims=True)
sq_dists = X_sq + Y_sq.T - 2 * X @ Y.T
return np.exp(-gamma * sq_dists)
def rff_features(X, D, gamma=1.0, seed=0):
"""
Random Fourier Features for RBF kernel.
Sample omega ~ N(0, 2*gamma*I), b ~ Uniform(0, 2*pi)
phi(x) = sqrt(2/D) * [cos(omega_j^T x + b_j)]_j
"""
rng = np.random.RandomState(seed)
d = X.shape[1]
omega = rng.randn(d, D) * np.sqrt(2 * gamma) # (d, D)
b = rng.uniform(0, 2*np.pi, D) # (D,)
Z = np.sqrt(2/D) * np.cos(X @ omega + b) # (N, D)
return Z
np.random.seed(42)
d = 4 # input dimension
N_test = 200
gamma = 1.0
X_test = np.random.randn(N_test, d)
K_exact = rbf_kernel(X_test, X_test, gamma)
print('RBF Kernel Approximation via Random Fourier Features')
print(f'Input dim d={d}, N={N_test} test points, gamma={gamma}')
print()
print(f"{'D (num features)':20s} {'RMSE':>12} {'Max error':>12} {'Rel. error':>12}")
print('-' * 60)
D_vals = [10, 50, 100, 500, 1000, 5000]
rmse_vals = []
for D in D_vals:
Z = rff_features(X_test, D, gamma)
K_approx = Z @ Z.T
diff = K_approx - K_exact
rmse = np.sqrt(np.mean(diff**2))
max_err = np.max(np.abs(diff))
rel_err = rmse / np.sqrt(np.mean(K_exact**2))
rmse_vals.append(rmse)
print(f"{D:20d} {rmse:>12.6f} {max_err:>12.6f} {rel_err:>12.4f}")
# Check: does RMSE scale as O(1/sqrt(D))?
ratio = rmse_vals[0] / rmse_vals[-1]
D_ratio = np.sqrt(D_vals[-1] / D_vals[0])
print(f'\nRMSE ratio (D={D_vals[0]} to D={D_vals[-1]}): {ratio:.2f} (sqrt ratio: {D_ratio:.2f})')
print('PASS - RMSE ~ O(1/sqrt(D)) consistent with theory')
if HAS_MPL:
fig, axes = plt.subplots(1, 2, figsize=(13, 5))
fig.suptitle('Random Fourier Features: Kernel Approximation')
# Plot exact vs approx kernel (subset)
n_show = 50
K_approx_500 = rff_features(X_test[:n_show], 500, gamma) @ rff_features(X_test[:n_show], 500, gamma).T
K_exact_sub = K_exact[:n_show, :n_show]
axes[0].scatter(K_exact_sub.flatten(), K_approx_500.flatten(),
alpha=0.3, s=5, color=COLORS['primary'])
lims = [0, 1.1]
axes[0].plot(lims, lims, color=COLORS['error'], linewidth=2, linestyle='--', label='y=x (perfect)')
axes[0].set_xlabel('Exact RBF kernel'); axes[0].set_ylabel('RFF approximation (D=500)')
axes[0].set_title('Scatter: Exact vs RFF')
axes[0].legend()
# RMSE vs D
axes[1].loglog(D_vals, rmse_vals, 'o-', color=COLORS['primary'],
linewidth=2.5, markersize=8, label='RMSE')
# Reference O(1/sqrt(D)) line
D_ref = np.array(D_vals)
axes[1].loglog(D_ref, rmse_vals[0]*np.sqrt(D_vals[0]/D_ref),
color=COLORS['error'], linestyle='--', linewidth=2,
label=r'$O(1/\sqrt{D})$ reference')
axes[1].set_xlabel('Number of random features $D$')
axes[1].set_ylabel('RMSE')
axes[1].set_title('Approximation error vs number of features')
axes[1].legend()
fig.tight_layout()
plt.show()
8.3 Spectral Normalization
Spectral normalization divides weight matrices by , enforcing Lipschitz constraints. We implement power iteration and verify the normalization.
Code cell 36
# === 8.3 Spectral Normalization ===
def spectral_norm_power_iter(W, n_iter=20):
"""
Estimate sigma_max(W) via power iteration.
W: (m, n) matrix
Returns: sigma_max estimate
"""
rng = np.random.RandomState(42)
v = rng.randn(W.shape[1])
v /= np.linalg.norm(v)
for _ in range(n_iter):
u = W @ v; u /= np.linalg.norm(u)
v = W.T @ u; v /= np.linalg.norm(v)
sigma = u @ W @ v
return sigma, u, v
def spectrally_normalize(W):
sigma, u, v = spectral_norm_power_iter(W)
return W / sigma, sigma
np.random.seed(42)
# Test on random matrices of various sizes
shapes = [(32, 32), (64, 128), (128, 64), (256, 512)]
print('Spectral Normalization: W_norm = W / sigma_max(W)')
print(f"{'Shape':>15} {'sigma_max (true)':>18} {'sigma_max (iter)':>18} {'err':>8} {'sigma_max(W_norm)':>18}")
print('-' * 80)
for shape in shapes:
W = np.random.randn(*shape) * np.sqrt(2/shape[0])
sigma_true = np.linalg.norm(W, ord=2) # exact spectral norm
sigma_iter, _, _ = spectral_norm_power_iter(W, n_iter=30)
W_norm, sigma_est = spectrally_normalize(W)
sigma_norm = np.linalg.norm(W_norm, ord=2) # should be ~1.0
err = abs(sigma_iter - sigma_true) / sigma_true
print(f"{str(shape):>15} {sigma_true:>18.6f} {sigma_iter:>18.6f} {err:>8.2e} {sigma_norm:>18.6f}")
print()
# Verify Lipschitz property: ||W_norm * x|| <= ||x|| for all x
W_test = np.random.randn(64, 128) * np.sqrt(2/64)
W_norm_test, _ = spectrally_normalize(W_test)
x_rand = np.random.randn(1000, 128)
y_rand = (x_rand @ W_norm_test.T)
lip_ratios = np.linalg.norm(y_rand, axis=1) / np.linalg.norm(x_rand, axis=1)
print(f'Lipschitz test: max ||W_norm*x||/||x|| = {lip_ratios.max():.6f} (should be <= 1)')
ok = lip_ratios.max() <= 1.001
print(f"{'PASS' if ok else 'FAIL'} - Spectral normalization enforces 1-Lipschitz constraint")
8.4 RoPE: Rotary Position Embedding
RoPE encodes position as rotation . We implement RoPE and verify the relative-position property: depends only on .
Code cell 38
# === 8.4 RoPE: Rotary Position Embedding ===
def rope_rotate(x, pos, theta_base=10000.0):
"""
Apply RoPE to embedding x at position pos.
x: (d,) vector
Returns: rotated vector of same shape
"""
d = len(x)
d_half = d // 2
j = np.arange(d_half)
theta_j = theta_base ** (-2*j / d) # (d/2,) frequencies
angles = pos * theta_j # (d/2,) rotation angles
# Rotate each pair (x_{2j}, x_{2j+1})
x_pairs = x.reshape(-1, 2) # (d/2, 2)
cos_a = np.cos(angles) # (d/2,)
sin_a = np.sin(angles) # (d/2,)
x_rot = np.stack([
x_pairs[:,0]*cos_a - x_pairs[:,1]*sin_a,
x_pairs[:,0]*sin_a + x_pairs[:,1]*cos_a
], axis=-1).flatten() # (d,)
return x_rot
np.random.seed(42)
d = 64
# Generate random query and key vectors
q = np.random.randn(d)
k = np.random.randn(d)
print('RoPE: Relative Position Property')
print('Inner product <f(q,m), f(k,n)> should depend only on m-n')
print()
# Test: fix relative position r = m-n = 5, vary absolute positions
r = 5
positions = [(0, -5), (10, 5), (100, 95), (500, 495), (1000, 995)]
scores = []
for m, n in positions:
q_rot = rope_rotate(q, m)
k_rot = rope_rotate(k, n)
score = np.dot(q_rot, k_rot)
scores.append(score)
print(f' m={m:5d}, n={n:5d}, m-n={m-n:4d}: score = {score:.6f}')
score_var = np.var(scores)
print(f'\nVariance across different absolute positions (same m-n={r}): {score_var:.2e}')
ok = score_var < 1e-8
print(f"{'PASS' if ok else 'FAIL'} - RoPE inner product depends only on relative position m-n")
# Show: different relative positions give different scores
print()
print('Different relative positions give different scores:')
m_fixed = 100
for r_test in [0, 1, 5, 10, 50, 100]:
q_rot = rope_rotate(q, m_fixed)
k_rot = rope_rotate(k, m_fixed - r_test)
print(f' r = m-n = {r_test:4d}: score = {np.dot(q_rot, k_rot):.6f}')
Code cell 39
# === 8.5 RoPE: Frequency Spectrum and Context Length ===
# Visualize how different theta_base values affect the frequency coverage
d = 128
j = np.arange(d//2)
theta_bases = [10000, 100000, 500000]
labels = ['LLaMA-2 (10K)', 'Extended (100K)', 'LLaMA-3 (500K)']
print('RoPE frequencies theta_j = base^{-2j/d}')
print(f"{'j':>5} ', end='")
for base in theta_bases:
print(f"{'base='+str(base):>20}', end='")
print()
for j_show in [0, 16, 32, 48, 63]:
print(f"{j_show:>5}: ', end='")
for base in theta_bases:
theta = base ** (-2*j_show/d)
period = 2*np.pi/theta
print(f"{period:>20.1f}', end='")
print(' (period in tokens)')
print()
print('Max period = 2*pi/theta_min:')
for base, label in zip(theta_bases, labels):
theta_min = base ** (-1.0) # j = d/2-1 -> base^{-(d-2)/d} ~ base^{-1}
max_period = 2*np.pi / theta_min
print(f' {label}: max period ~ {max_period:.0f} tokens')
if HAS_MPL:
fig, ax = plt.subplots(figsize=(11, 6))
for base, label, ls in zip(theta_bases, labels, ['-', '--', ':']):
thetas = base ** (-2*j/d)
periods = 2*np.pi / thetas
ax.semilogy(j, periods, linestyle=ls, linewidth=2.5, label=label)
ax.set_xlabel('Dimension index $j$')
ax.set_ylabel('Period (tokens)')
ax.set_title('RoPE frequency periods: larger base = longer max context')
ax.legend()
ax.axhline(128000, color=COLORS['neutral'], linestyle='--',
linewidth=1, label='128K context')
fig.tight_layout()
plt.show()
9. Summary: The Central Results
| Theorem | Formula | What It Means |
|---|---|---|
| Definition | Decomposes into frequencies | |
| Riemann-Lebesgue | as $ | \xi |
| Plancherel | Energy conserved; FT is unitary | |
| Inversion | FT is invertible | |
| Uncertainty | Cannot concentrate in both domains | |
| Dirac delta | , | Impulse = white; const = DC spike |
| Poisson sum | Unifies FT and Fourier series |
7.3 Periodic Signals in the Distribution Framework
A periodic signal has FT as two Dirac deltas. We approximate this with narrow Gaussians and verify the distributional formula.
Code cell 42
# === 7.3 FT of Periodic Signals (Distributional) ===
import numpy as np
from scipy import fft as sp_fft
try:
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams.update({'figure.figsize': (10, 6), 'figure.dpi': 120,
'axes.spines.top': False, 'axes.spines.right': False})
COLORS = {'primary': '#0077BB', 'secondary': '#EE7733',
'tertiary': '#009988', 'error': '#CC3311',
'neutral': '#555555', 'highlight': '#EE3377'}
HAS_MPL = True
except ImportError:
HAS_MPL = False
np.random.seed(42)
def numerical_ft(f_vals, t_vals):
dt = t_vals[1] - t_vals[0]
F = sp_fft.fftshift(sp_fft.fft(sp_fft.ifftshift(f_vals))) * dt
xi = sp_fft.fftshift(sp_fft.fftfreq(len(t_vals), d=dt))
return xi, F
# Approximate cos(2*pi*xi0*t) using windowed version: f(t) = cos(2*pi*xi0*t)*e^{-t^2/(2*sigma^2)}
# As sigma -> inf, this approaches pure cosine
# FT: (sigma/sqrt(2)) * [e^{-pi*sigma^2*(xi-xi0)^2} + e^{-pi*sigma^2*(xi+xi0)^2}]
# -> (1/2)[delta(xi-xi0) + delta(xi+xi0)] as sigma->inf
xi0 = 3.0 # frequency of cosine
t = np.linspace(-40, 40, 32768)
if HAS_MPL:
fig, axes = plt.subplots(2, 3, figsize=(15, 8))
fig.suptitle(r'FT of $\cos(2\pi\xi_0 t)$: Approaches two Dirac deltas as window widens',
fontsize=14)
sigmas = [1.0, 3.0, 10.0]
for col, sigma in enumerate(sigmas):
f = np.cos(2*np.pi*xi0*t) * np.exp(-t**2/(2*sigma**2))
xi, F = numerical_ft(f, t)
# Time domain
mask_t = np.abs(t) < 3*sigma
axes[0, col].plot(t[mask_t], f[mask_t], color=COLORS['primary'])
axes[0, col].set_title(f'Time: $\\sigma={sigma}$')
axes[0, col].set_xlabel('$t$'); axes[0, col].set_ylabel('$f(t)$')
# Frequency domain
mask_f = np.abs(xi - xi0) < 4 * (1/sigma + 0.1)
axes[1, col].plot(xi[mask_f], np.real(F[mask_f]), color=COLORS['secondary'])
axes[1, col].axvline(xi0, color=COLORS['error'], linestyle='--',
linewidth=1.5, label=f'$\\xi={xi0}$')
axes[1, col].set_title(f'Freq: spike at $\\xi_0={xi0}$')
axes[1, col].set_xlabel(r'$\xi$'); axes[1, col].set_ylabel(r'$\hat{f}(\xi)$')
axes[1, col].legend()
fig.tight_layout()
plt.show()
print(f'As sigma->inf, FT of cos(2pi*{xi0}*t) concentrates at xi = +/-{xi0}')
print('In the limit: FT[cos(2*pi*xi0*t)] = (1/2)[delta(xi-xi0) + delta(xi+xi0)]')
7.4 Heat Equation Solution via Fourier Transform
The FT converts the heat PDE into an ODE: . The solution is Gaussian convolution — showing that heat diffusion smooths out high-frequency features fastest.
Code cell 44
# === Heat Equation Solution via FT ===
# u(x,t) = (u0 * G_t)(x) where G_t(x) = (4*pi*kappa*t)^{-1/2} e^{-x^2/(4*kappa*t)}
kappa = 0.1 # diffusivity
x = np.linspace(-10, 10, 2048)
dx = x[1] - x[0]
# Initial condition: narrow Gaussian (localized heat source)
u0 = np.exp(-x**2 / 0.1) * (np.abs(x) < 3) # narrow spike at x=0
u0 = u0 / (np.sum(u0) * dx) # normalize to unit integral
# Solve via FT: FT[u(t)](xi) = FT[u0](xi) * e^{-4*pi^2*kappa*xi^2*t}
xi, U0 = numerical_ft(u0, x)
dxi = xi[1] - xi[0]
times = [0.0, 0.1, 0.5, 1.0, 2.0, 5.0]
if HAS_MPL:
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
fig.suptitle('Heat Equation: Diffusion as Gaussian Convolution', fontsize=14)
cmap = plt.cm.viridis
for i, t_val in enumerate(times):
if t_val == 0:
u_t = u0
U_t = U0
else:
# Solve in frequency domain
decay = np.exp(-4*np.pi**2 * kappa * xi**2 * t_val)
U_t = U0 * decay
# Inverse FT to get u(x,t)
u_t = np.real(sp_fft.fftshift(sp_fft.ifft(sp_fft.ifftshift(U_t))) / dx)
color = cmap(i / len(times))
mask = np.abs(x) < 8
axes[0].plot(x[mask], u_t[mask], color=color, label=f't={t_val}')
mask_f = np.abs(xi) < 3
axes[1].semilogy(xi[mask_f], np.abs(U_t[mask_f])+1e-15, color=color, label=f't={t_val}')
axes[0].set_xlabel('Position $x$')
axes[0].set_ylabel('Temperature $u(x,t)$')
axes[0].set_title('Time domain: heat spreads as Gaussian')
axes[0].legend(loc='upper right', fontsize=9)
axes[1].set_xlabel(r'Frequency $\xi$')
axes[1].set_ylabel(r'$|\hat{u}(\xi,t)|$')
axes[1].set_title('Frequency domain: high freqs decay fastest')
axes[1].legend(fontsize=9)
fig.tight_layout()
plt.show()
print('High-frequency components (large |xi|) decay as e^{-4*pi^2*kappa*xi^2*t}')
print('Low frequencies persist longer -> smooth features survive, sharp features diffuse away')
print('This is exactly spectral bias in neural networks (high-freq components learned last)')
7.5 The Spectral Decay Theorem
Smoothness of determines how fast decays. We compare the spectra of functions with different smoothness levels.
Code cell 46
# === Spectral Decay vs Smoothness ===
# Compare: rect (jump) -> O(1/xi), triangle (continuous) -> O(1/xi^2),
# smooth bump (C^inf) -> faster than polynomial
t = np.linspace(-10, 10, 16384)
def smooth_bump(t, width=1.0):
"""C^inf bump function supported on [-width, width]."""
u = t / width
inside = np.abs(u) < 1
result = np.zeros_like(t)
result[inside] = np.exp(-1 / (1 - u[inside]**2))
return result
signals = {
'rect (jump, $C^{-1}$)': (np.abs(t) <= 1.0).astype(float),
'triangle ($C^0$)': np.maximum(1 - np.abs(t), 0),
'smooth bump ($C^\\infty$)': smooth_bump(t, 1.0),
'Gaussian ($C^\\omega$ analytic)': np.exp(-np.pi * t**2),
}
expected_decay = {
'rect (jump, $C^{-1}$)': 1,
'triangle ($C^0$)': 2,
'smooth bump ($C^\\infty$)': 10, # faster than any polynomial
'Gaussian ($C^\\omega$ analytic)': 20, # exponential
}
if HAS_MPL:
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
fig.suptitle('Spectral Decay: Smoothness $\\Leftrightarrow$ Frequency Decay', fontsize=14)
cmap = plt.cm.viridis
colors_list = [cmap(i/4) for i in range(4)]
for (name, f), color in zip(signals.items(), colors_list):
xi, F = numerical_ft(f, t)
mask_t = np.abs(t) < 3
axes[0].plot(t[mask_t], f[mask_t], color=color, label=name.split('(')[0].strip())
mask_f = (xi > 0.5) & (xi < 6)
axes[1].loglog(xi[mask_f], np.abs(F[mask_f]) + 1e-15, color=color,
label=name.split('(')[0].strip())
# Add reference decay lines
xi_ref = np.linspace(1, 6, 100)
axes[1].loglog(xi_ref, 0.3/xi_ref, 'k--', linewidth=1, alpha=0.7, label=r'$O(1/\xi)$')
axes[1].loglog(xi_ref, 0.1/xi_ref**2, 'k:', linewidth=1, alpha=0.7, label=r'$O(1/\xi^2)$')
axes[0].set_xlabel('$t$'); axes[0].set_ylabel('$f(t)$')
axes[0].set_title('Time domain')
axes[0].legend(fontsize=9)
axes[1].set_xlabel(r'Frequency $\xi$'); axes[1].set_ylabel(r'$|\hat{f}(\xi)|$')
axes[1].set_title('Spectrum (log-log): smoother = faster decay')
axes[1].legend(fontsize=9)
fig.tight_layout()
plt.show()
# Quantify: fit power law to each spectrum
print('Power-law fits to spectral decay |F(xi)| ~ C/xi^alpha:')
for (name, f), color in zip(signals.items(), colors_list):
xi, F = numerical_ft(f, t)
mask_f = (xi > 1.0) & (xi < 5.0) & (np.abs(F) > 1e-10)
if mask_f.sum() > 5:
log_xi = np.log(xi[mask_f])
log_F = np.log(np.abs(F[mask_f]) + 1e-15)
slope, intercept = np.polyfit(log_xi, log_F, 1)
print(f' {name.split("(")[0].strip():25s}: alpha ~ {-slope:.2f}')
3.4 The Duality Theorem: Computing New Transforms
Duality: If , then .
This lets us compute the FT of immediately: since , by duality .
Code cell 48
# === 3.4 Duality Theorem ===
# Verify: FT[sinc(t)](xi) = rect(xi)
t = np.linspace(-30, 30, 65536)
# sinc function
f_sinc = np.sinc(t) # normalized: sin(pi*t)/(pi*t)
xi, F_sinc = numerical_ft(f_sinc, t)
# Theory: FT[sinc(t)](xi) = rect(xi) = 1 for |xi| < 0.5, 0 otherwise
rect_theory = (np.abs(xi) <= 0.5).astype(float)
mask = np.abs(xi) < 2
err = np.max(np.abs(np.real(F_sinc[mask]) - rect_theory[mask]))
print('Duality: FT[rect(t)](xi) = sinc(xi)')
print(' => FT[sinc(t)](xi) = rect(xi) [duality theorem]')
print(f'Numerical error: {err:.4f}')
ok = err < 0.05 # some error due to finite truncation of sinc
print(f"{'PASS' if ok else 'FAIL'} - Duality theorem: FT[sinc(t)] = rect(xi)")
if HAS_MPL:
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
fig.suptitle('Duality: $\\mathcal{F}[\\mathrm{sinc}(t)](\\xi) = \\mathrm{rect}(\\xi)$')
mask_t = np.abs(t) < 15
axes[0].plot(t[mask_t], f_sinc[mask_t], color=COLORS['primary'])
axes[0].set_xlabel('$t$'); axes[0].set_ylabel('$\\mathrm{sinc}(t)$')
axes[0].set_title('Time domain: sinc function')
mask_f = np.abs(xi) < 1.5
axes[1].plot(xi[mask_f], np.real(F_sinc[mask_f]),
color=COLORS['primary'], label='FT[sinc] (numerical)')
axes[1].plot(xi[mask_f], rect_theory[mask_f],
color=COLORS['error'], linestyle='--', linewidth=2, label='rect(xi) (theory)')
axes[1].set_xlabel(r'$\xi$'); axes[1].set_ylabel(r'$\hat{f}(\xi)$')
axes[1].set_title('Frequency: rectangular (ideal low-pass filter)')
axes[1].legend()
fig.tight_layout()
plt.show()
print()
print('Physical meaning: sinc(t) in time = ideal low-pass filter (passes |xi|<0.5, blocks rest)')
5.3 Spectral Energy Distribution
We study how energy is distributed across frequencies for natural signals. This connects to compression (keep high-energy frequencies), spectral bias in neural networks, and the FNO truncation criterion.
Code cell 50
# === 5.3 Spectral Energy Distribution and Compression ===
t = np.linspace(-10, 10, 4096)
# Create a signal with known energy distribution
# f(t) = 0.8*sin(2pi*1*t) + 0.4*sin(2pi*3*t) + 0.2*sin(2pi*7*t) + noise
f_signal = (0.8*np.sin(2*np.pi*1*t) +
0.4*np.sin(2*np.pi*3*t) +
0.2*np.sin(2*np.pi*7*t) +
0.05*np.random.randn(len(t)))
xi, F = numerical_ft(f_signal, t)
dxi = xi[1] - xi[0]
dt = t[1] - t[0]
energy_spectrum = np.abs(F)**2
total_energy = np.sum(energy_spectrum) * dxi
# Compression: keep only frequencies with highest energy
xi_mask = np.abs(xi) < 10
sorted_idx = np.argsort(-energy_spectrum[xi_mask])
energies_sorted = energy_spectrum[xi_mask][sorted_idx]
cumulative_energy = np.cumsum(energies_sorted) / np.sum(energies_sorted)
print('Spectral energy concentration:')
for frac in [0.5, 0.8, 0.9, 0.95, 0.99, 0.999]:
n_modes = np.searchsorted(cumulative_energy, frac) + 1
total_modes = xi_mask.sum()
print(f' {frac*100:5.1f}% energy: {n_modes:5d}/{total_modes:5d} modes ({100*n_modes/total_modes:.1f}%)')
print(f'\nParseval check: ||f||^2 = {np.sum(f_signal**2)*dt:.6f}, ||F||^2 = {total_energy:.6f}')
if HAS_MPL:
fig, axes = plt.subplots(1, 2, figsize=(13, 5))
fig.suptitle('Spectral Energy Distribution')
mask_f = (xi >= 0) & (xi <= 12)
axes[0].bar(xi[mask_f], energy_spectrum[mask_f], width=dxi*0.8,
color=COLORS['primary'], alpha=0.8)
axes[0].set_xlabel(r'Frequency $\xi$ (Hz)')
axes[0].set_ylabel(r'$|\hat{f}(\xi)|^2$ (energy density)')
axes[0].set_title('Power spectrum: spikes at 1, 3, 7 Hz')
# Cumulative energy
x_cum = np.arange(1, len(energies_sorted)+1)
axes[1].semilogx(x_cum[:200], cumulative_energy[:200]*100,
color=COLORS['primary'], linewidth=2.5)
for threshold in [80, 90, 95, 99]:
n_at = np.searchsorted(cumulative_energy*100, threshold) + 1
axes[1].axhline(threshold, color=COLORS['neutral'], linestyle=':', linewidth=1)
axes[1].axvline(n_at, color=COLORS['neutral'], linestyle=':', linewidth=1)
axes[1].set_xlabel('Number of modes kept')
axes[1].set_ylabel('Cumulative energy (%)')
axes[1].set_title('Compression: few modes capture most energy')
fig.tight_layout()
plt.show()
Code cell 51
# === 9. Final Summary Checks ===
import numpy as np
from scipy import fft as sp_fft
np.random.seed(42)
def numerical_ft(f_vals, t_vals):
dt = t_vals[1] - t_vals[0]
F = sp_fft.fftshift(sp_fft.fft(sp_fft.ifftshift(f_vals))) * dt
xi = sp_fft.fftshift(sp_fft.fftfreq(len(t_vals), d=dt))
return xi, F
t = np.linspace(-15, 15, 8192)
dt = t[1] - t[0]
g = np.exp(-np.pi * t**2)
xi, G = numerical_ft(g, t)
dxi = xi[1] - xi[0]
# 1. Plancherel
ok1 = abs(np.sum(g**2)*dt - np.sum(np.abs(G)**2)*dxi) < 0.001
print(f"{'PASS' if ok1 else 'FAIL'} - Plancherel: ||f||^2 = ||F||^2")
# 2. Self-duality of Gaussian
mask_self = np.abs(xi) < 5
ok2 = np.max(np.abs(np.real(G[mask_self]) - np.exp(-np.pi * xi[mask_self]**2))) < 0.001
print(f"{'PASS' if ok2 else 'FAIL'} - Self-dual Gaussian: FT of exp(-pi*t^2) is exp(-pi*xi^2)")
# 3. Time-shift property
a = 3.0
g_shifted = np.exp(-np.pi*(t-a)**2)
xi_s, G_s = numerical_ft(g_shifted, t)
phase_theory = np.exp(-2j*np.pi*xi_s*a) * G
ok3 = np.max(np.abs(G_s[np.abs(xi_s)<3] - phase_theory[np.abs(xi_s)<3])) < 0.01
print(f"{'PASS' if ok3 else 'FAIL'} - Time-shift: FT[f(t-a)] = e^(-2pi*i*xi*a) * FT[f]")
# 4. Uncertainty principle for Gaussian
norm = np.sum(g**2)*dt
mu_t = np.sum(t*g**2)*dt/norm
Dt = np.sqrt(np.sum((t-mu_t)**2*g**2)*dt/norm)
mu_xi = np.sum(xi*np.abs(G)**2)*dxi/norm
Dxi = np.sqrt(np.sum((xi-mu_xi)**2*np.abs(G)**2)*dxi/norm)
product = Dt*Dxi
bound = 1/(4*np.pi)
ok4 = product >= bound - 0.001
print(f"{'PASS' if ok4 else 'FAIL'} - Uncertainty: Delta_t*Delta_xi = {product:.6f} >= {bound:.6f}")
# 5. Poisson summation
ns = np.arange(-50, 51)
lhs = np.sum(np.exp(-np.pi * ns**2))
rhs = np.sum(np.exp(-np.pi * ns**2))
ok5 = abs(lhs - rhs) < 1e-10
print(f"{'PASS' if ok5 else 'FAIL'} - Poisson summation formula")
print()
print('All core theorems verified numerically.')
print('Notes: see §20-02 notes.md for full proofs and applications.')
End of Theory Notebook
You have explored:
- The T→∞ derivation of the Fourier Transform from Fourier series
- Standard FT pairs: Gaussian (self-dual), rect→sinc, exponential→Lorentzian
- Core properties: shift, scaling, differentiation with numerical verification
- Inversion theorem and Gibbs behavior at jump discontinuities
- Plancherel's theorem: energy conservation and Parseval trick
- Heisenberg uncertainty principle:
- Distributions: Dirac delta, Dirac comb, Poisson summation
- AI applications: FNet, Random Fourier Features, Spectral Normalization, RoPE
Next: exercises.ipynb — 8 graded problems from mechanics to AI applications.
Next section: §20-03 DFT and FFT