Theory Notebook
Converted from
theory.ipynbfor web reading.
Limits and Continuity - Examples
This notebook demonstrates limits and continuity concepts with practical examples and visualizations.
Topics Covered
- Limit Intuition (Numerical Approach)
- One-Sided Limits
- Fundamental Limits
- L'Hôpital's Rule
- Limits at Infinity
- Continuity Concepts
- Squeeze Theorem
- Softmax Temperature Limit (ML)
- Sigmoid Saturation (ML)
- Learning Rate Decay (ML)
- Numerical Stability
Code cell 2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
try:
import seaborn as sns
sns.set_theme(style="whitegrid", palette="colorblind")
HAS_SNS = True
except ImportError:
plt.style.use("seaborn-v0_8-whitegrid")
HAS_SNS = False
mpl.rcParams.update({
"figure.figsize": (10, 6),
"figure.dpi": 120,
"font.size": 13,
"axes.titlesize": 15,
"axes.labelsize": 13,
"xtick.labelsize": 11,
"ytick.labelsize": 11,
"legend.fontsize": 11,
"legend.framealpha": 0.85,
"lines.linewidth": 2.0,
"axes.spines.top": False,
"axes.spines.right": False,
"savefig.bbox": "tight",
"savefig.dpi": 150,
})
np.random.seed(42)
print("Plot setup complete.")
Code cell 3
import numpy as np
import numpy.linalg as la
from scipy import integrate, special, stats
from math import factorial
import matplotlib.patches as patches
COLORS = {
"primary": "#0077BB",
"secondary": "#EE7733",
"tertiary": "#009988",
"error": "#CC3311",
"neutral": "#555555",
"highlight": "#EE3377",
}
HAS_MPL = True
np.set_printoptions(precision=8, suppress=True)
np.random.seed(42)
def header(title):
print("\n" + "=" * len(title))
print(title)
print("=" * len(title))
def check_true(name, cond):
ok = bool(cond)
print(f"{'PASS' if ok else 'FAIL'} - {name}")
return ok
def check_close(name, got, expected, tol=1e-8):
ok = np.allclose(got, expected, atol=tol, rtol=tol)
print(f"{'PASS' if ok else 'FAIL'} - {name}: got {got}, expected {expected}")
return ok
def centered_diff(f, x, h=1e-6):
return (f(x + h) - f(x - h)) / (2 * h)
def forward_diff(f, x, h=1e-6):
return (f(x + h) - f(x)) / h
def backward_diff(f, x, h=1e-6):
return (f(x) - f(x - h)) / h
def grad_check(f, x, analytic_grad, h=1e-6):
x = np.asarray(x, dtype=float)
analytic_grad = np.asarray(analytic_grad, dtype=float)
numeric_grad = np.zeros_like(x, dtype=float)
for idx in np.ndindex(x.shape):
x_plus = x.copy(); x_minus = x.copy()
x_plus[idx] += h; x_minus[idx] -= h
numeric_grad[idx] = (f(x_plus) - f(x_minus)) / (2 * h)
denom = la.norm(analytic_grad) + la.norm(numeric_grad) + 1e-12
return la.norm(analytic_grad - numeric_grad) / denom
def check(name, got, expected, tol=1e-8):
return check_close(name, got, expected, tol=tol)
print("Chapter helper setup complete.")
1. Limit Intuition (Numerical Approach)
We approach limits numerically by evaluating the function at points closer and closer to the target value.
Code cell 5
print("Evaluate: lim(x→2) (x² - 4)/(x - 2)")
print("="*50)
def f(x):
return (x**2 - 4) / (x - 2)
# Approach from left
print("\nApproaching from the LEFT (x < 2):")
left_vals = [1.9, 1.99, 1.999, 1.9999, 1.99999]
for x in left_vals:
print(f" f({x}) = {f(x):.8f}")
# Approach from right
print("\nApproaching from the RIGHT (x > 2):")
right_vals = [2.1, 2.01, 2.001, 2.0001, 2.00001]
for x in right_vals:
print(f" f({x}) = {f(x):.8f}")
print("\n" + "="*50)
print("Both sides approach 4!")
print("\nAlgebraic verification:")
print("(x² - 4)/(x - 2) = (x-2)(x+2)/(x-2) = x + 2")
print("lim(x→2) (x + 2) = 4 ✓")
Code cell 6
# Visualize the limit
fig, ax = plt.subplots(figsize=(10, 6))
# Plot the function (avoiding x=2)
x_left = np.linspace(0, 1.99, 100)
x_right = np.linspace(2.01, 4, 100)
ax.plot(x_left, f(x_left), 'b-', linewidth=2, label=r'$f(x) = \frac{x^2-4}{x-2}$')
ax.plot(x_right, f(x_right), 'b-', linewidth=2)
# Mark the hole at x=2
ax.plot(2, 4, 'wo', markersize=10, markeredgecolor='blue', markeredgewidth=2)
# Add arrows showing approach
ax.annotate('', xy=(2, 4), xytext=(1.5, 3.5),
arrowprops=dict(arrowstyle='->', color='red', lw=2))
ax.annotate('', xy=(2, 4), xytext=(2.5, 4.5),
arrowprops=dict(arrowstyle='->', color='red', lw=2))
ax.axhline(y=4, color='gray', linestyle='--', alpha=0.5, label='Limit = 4')
ax.axvline(x=2, color='gray', linestyle='--', alpha=0.5)
ax.set_xlabel('x', fontsize=12)
ax.set_ylabel('f(x)', fontsize=12)
ax.set_title(r'Limit: $\lim_{x \to 2} \frac{x^2-4}{x-2} = 4$', fontsize=14)
ax.legend()
ax.set_xlim(0, 4)
ax.set_ylim(0, 6)
plt.tight_layout()
plt.show()
2. One-Sided Limits
Sometimes the left-hand and right-hand limits are different. When they disagree, the two-sided limit does not exist.
Code cell 8
print("Consider f(x) = |x|/x (the sign function)")
print("="*50)
def sign_func(x):
return np.sign(x)
# From left (approaching 0 from negative side)
print("\nFrom the LEFT (x → 0⁻):")
for x in [-0.1, -0.01, -0.001, -0.0001]:
print(f" f({x}) = {sign_func(x)}")
print(" Left-hand limit = -1")
# From right (approaching 0 from positive side)
print("\nFrom the RIGHT (x → 0⁺):")
for x in [0.1, 0.01, 0.001, 0.0001]:
print(f" f({x}) = {sign_func(x)}")
print(" Right-hand limit = +1")
print("\n" + "="*50)
print("Since left limit ≠ right limit,")
print("lim(x→0) |x|/x does NOT exist!")
Code cell 9
# Visualize one-sided limits
fig, ax = plt.subplots(figsize=(10, 6))
x_neg = np.linspace(-2, -0.01, 100)
x_pos = np.linspace(0.01, 2, 100)
ax.plot(x_neg, sign_func(x_neg), 'b-', linewidth=2, label=r'$f(x) = |x|/x$')
ax.plot(x_pos, sign_func(x_pos), 'b-', linewidth=2)
# Mark the discontinuity
ax.plot(0, -1, 'wo', markersize=10, markeredgecolor='blue', markeredgewidth=2)
ax.plot(0, 1, 'wo', markersize=10, markeredgecolor='blue', markeredgewidth=2)
ax.axhline(y=0, color='gray', linestyle='-', alpha=0.3)
ax.axvline(x=0, color='gray', linestyle='-', alpha=0.3)
# Add annotations
ax.annotate('Right limit = +1', xy=(0.1, 1), xytext=(0.5, 1.3),
fontsize=11, arrowprops=dict(arrowstyle='->', color='green'))
ax.annotate('Left limit = -1', xy=(-0.1, -1), xytext=(-0.5, -1.3),
fontsize=11, arrowprops=dict(arrowstyle='->', color='red'))
ax.set_xlabel('x', fontsize=12)
ax.set_ylabel('f(x)', fontsize=12)
ax.set_title('One-Sided Limits: Sign Function', fontsize=14)
ax.legend(loc='upper right')
ax.set_xlim(-2, 2)
ax.set_ylim(-1.5, 1.5)
plt.tight_layout()
plt.show()
3. Fundamental Limits
These are essential limits that appear frequently in calculus and analysis.
Code cell 11
print("FUNDAMENTAL LIMITS")
print("="*60)
# Limit 1: sin(x)/x
print("\n1. lim(x→0) sin(x)/x = 1")
print("-" * 40)
for x in [0.1, 0.01, 0.001, 0.0001, 0.00001]:
val = np.sin(x) / x
print(f" x = {x:.5f}: sin(x)/x = {val:.12f}")
# Limit 2: (e^x - 1)/x
print("\n2. lim(x→0) (eˣ - 1)/x = 1")
print("-" * 40)
for x in [0.1, 0.01, 0.001, 0.0001, 0.00001]:
val = (np.exp(x) - 1) / x
print(f" x = {x:.5f}: (eˣ - 1)/x = {val:.12f}")
# Limit 3: (1 + 1/n)^n → e
print("\n3. lim(n→∞) (1 + 1/n)ⁿ = e")
print("-" * 40)
for n in [10, 100, 1000, 10000, 100000]:
val = (1 + 1/n)**n
error = abs(val - np.e)
print(f" n = {n:6d}: (1 + 1/n)ⁿ = {val:.12f} (error = {error:.2e})")
print(f" True e = {np.e:.12f}")
Code cell 12
# Visualize fundamental limits
fig, axes = plt.subplots(1, 3, figsize=(15, 4))
# sin(x)/x
x = np.linspace(-10, 10, 1000)
x = x[x != 0] # Avoid division by zero
y = np.sin(x) / x
axes[0].plot(x, y, 'b-', linewidth=2)
axes[0].axhline(y=1, color='r', linestyle='--', label='y = 1')
axes[0].set_xlabel('x')
axes[0].set_ylabel('sin(x)/x')
axes[0].set_title(r'$\lim_{x \to 0} \frac{\sin x}{x} = 1$')
axes[0].legend()
axes[0].set_ylim(-0.5, 1.2)
# (e^x - 1)/x
x = np.linspace(-2, 2, 1000)
x = x[np.abs(x) > 0.001]
y = (np.exp(x) - 1) / x
axes[1].plot(x, y, 'b-', linewidth=2)
axes[1].axhline(y=1, color='r', linestyle='--', label='y = 1')
axes[1].set_xlabel('x')
axes[1].set_ylabel(r'$(e^x - 1)/x$')
axes[1].set_title(r'$\lim_{x \to 0} \frac{e^x - 1}{x} = 1$')
axes[1].legend()
axes[1].set_ylim(-0.5, 4)
# (1 + 1/n)^n
n = np.arange(1, 101)
y = (1 + 1/n)**n
axes[2].plot(n, y, 'b-', linewidth=2)
axes[2].axhline(y=np.e, color='r', linestyle='--', label=f'y = e ≈ {np.e:.4f}')
axes[2].set_xlabel('n')
axes[2].set_ylabel(r'$(1 + 1/n)^n$')
axes[2].set_title(r'$\lim_{n \to \infty} (1 + 1/n)^n = e$')
axes[2].legend()
plt.tight_layout()
plt.show()
4. L'Hôpital's Rule
For indeterminate forms or :
Code cell 14
print("L'HÔPITAL'S RULE EXAMPLE")
print("="*60)
print("\nFind: lim(x→0) (eˣ - 1 - x)/x²")
print("\n--- Step 1: Check indeterminate form ---")
print("At x = 0: numerator = e⁰ - 1 - 0 = 0")
print("At x = 0: denominator = 0² = 0")
print("Form: 0/0 → Apply L'Hôpital")
print("\n--- Step 2: First application ---")
print("f(x) = eˣ - 1 - x → f'(x) = eˣ - 1")
print("g(x) = x² → g'(x) = 2x")
print("\nlim(x→0) (eˣ - 1)/(2x) → still 0/0!")
print("\n--- Step 3: Second application ---")
print("f'(x) = eˣ - 1 → f''(x) = eˣ")
print("g'(x) = 2x → g''(x) = 2")
print("\nlim(x→0) eˣ/2 = e⁰/2 = 1/2")
print("\n--- Numerical Verification ---")
def f(x):
return (np.exp(x) - 1 - x) / x**2
for x in [0.1, 0.01, 0.001, 0.0001, 0.00001]:
print(f" f({x}) = {f(x):.12f}")
print("\n Approaches 0.5 ✓")
5. Limits at Infinity
For polynomial ratios, the behavior depends on the degrees of the polynomials.
Code cell 16
print("LIMITS AT INFINITY")
print("="*60)
# Case 1: Equal degrees
print("\n1. lim(x→∞) (2x² + 3x)/(x² - 1)")
print(" Equal degrees → ratio of leading coefficients")
print(" = 2/1 = 2")
def f1(x):
return (2*x**2 + 3*x) / (x**2 - 1)
print("\n Numerical verification:")
for x in [10, 100, 1000, 10000, 100000]:
print(f" f({x:6d}) = {f1(x):.10f}")
# Case 2: Numerator degree < Denominator degree
print("\n2. lim(x→∞) x/(x² + 1)")
print(" Numerator degree < Denominator degree → limit is 0")
def f2(x):
return x / (x**2 + 1)
print("\n Numerical verification:")
for x in [10, 100, 1000, 10000, 100000]:
print(f" f({x:6d}) = {f2(x):.10f}")
# Case 3: Numerator degree > Denominator degree
print("\n3. lim(x→∞) x³/(x² + 1)")
print(" Numerator degree > Denominator degree → limit is ±∞")
def f3(x):
return x**3 / (x**2 + 1)
print("\n Numerical verification:")
for x in [10, 100, 1000, 10000]:
print(f" f({x:5d}) = {f3(x):.4f}")
6. Continuity Concepts
A function is continuous at if:
- is defined
- exists
Code cell 18
print("TYPES OF DISCONTINUITIES")
print("="*60)
print("\n1. REMOVABLE DISCONTINUITY (hole)")
print(" f(x) = (x² - 4)/(x + 2) at x = -2")
print(" f(-2) is undefined (0/0)")
print(" But: f(x) = (x-2)(x+2)/(x+2) = x - 2")
print(" lim(x→-2) = -4 exists")
print(" Can be 'fixed' by defining f(-2) = -4")
print("\n2. JUMP DISCONTINUITY")
print(" f(x) = |x|/x at x = 0")
print(" Left limit = -1, Right limit = +1")
print(" Limits exist but are different")
print("\n3. INFINITE DISCONTINUITY (vertical asymptote)")
print(" f(x) = 1/(x² - 1) at x = ±1")
print(" lim(x→1) = ±∞")
print(" Limit does not exist (infinite)")
Code cell 19
# Visualize types of discontinuities
fig, axes = plt.subplots(1, 3, figsize=(15, 4))
# Removable discontinuity
x1 = np.linspace(-5, 5, 1000)
x1 = x1[np.abs(x1 + 2) > 0.05]
y1 = (x1**2 - 4) / (x1 + 2)
axes[0].plot(x1, y1, 'b-', linewidth=2)
axes[0].plot(-2, -4, 'wo', markersize=10, markeredgecolor='blue', markeredgewidth=2)
axes[0].set_xlabel('x')
axes[0].set_ylabel('f(x)')
axes[0].set_title('Removable (hole at x = -2)')
axes[0].set_xlim(-5, 3)
axes[0].set_ylim(-6, 2)
# Jump discontinuity
x2_neg = np.linspace(-2, -0.01, 100)
x2_pos = np.linspace(0.01, 2, 100)
axes[1].plot(x2_neg, np.sign(x2_neg), 'b-', linewidth=2)
axes[1].plot(x2_pos, np.sign(x2_pos), 'b-', linewidth=2)
axes[1].plot(0, -1, 'bo', markersize=8, fillstyle='none', markeredgewidth=2)
axes[1].plot(0, 1, 'bo', markersize=8, fillstyle='none', markeredgewidth=2)
axes[1].set_xlabel('x')
axes[1].set_ylabel('f(x)')
axes[1].set_title('Jump (at x = 0)')
# Infinite discontinuity
x3 = np.linspace(-3, 3, 1000)
mask = (np.abs(x3 - 1) > 0.1) & (np.abs(x3 + 1) > 0.1)
x3_clean = x3[mask]
y3 = 1 / (x3_clean**2 - 1)
axes[2].plot(x3_clean, y3, 'b-', linewidth=2)
axes[2].axvline(x=-1, color='gray', linestyle='--', alpha=0.5)
axes[2].axvline(x=1, color='gray', linestyle='--', alpha=0.5)
axes[2].set_xlabel('x')
axes[2].set_ylabel('f(x)')
axes[2].set_title('Infinite (asymptotes at x = ±1)')
axes[2].set_ylim(-5, 5)
plt.tight_layout()
plt.show()
7. Squeeze Theorem
If near and , then .
Code cell 21
print("SQUEEZE THEOREM EXAMPLE")
print("="*60)
print("\nFind: lim(x→0) x² sin(1/x)")
print("\n--- Key insight ---")
print("-1 ≤ sin(1/x) ≤ 1 for all x ≠ 0")
print("\nMultiply by x² (positive for x ≠ 0):")
print("-x² ≤ x² sin(1/x) ≤ x²")
print("\n--- Apply squeeze ---")
print("lim(x→0) (-x²) = 0")
print("lim(x→0) (x²) = 0")
print("\nBy Squeeze Theorem: lim(x→0) x² sin(1/x) = 0")
print("\n--- Numerical verification ---")
def f(x):
return x**2 * np.sin(1/x)
for x in [0.1, 0.01, 0.001, 0.0001]:
val = f(x)
bound = x**2
print(f" x = {x}: f(x) = {val:12.6e}, bounds = ±{bound:.6e}")
Code cell 22
# Visualize squeeze theorem
fig, ax = plt.subplots(figsize=(12, 6))
x = np.linspace(0.01, 1, 1000)
y = x**2 * np.sin(1/x)
upper = x**2
lower = -x**2
ax.fill_between(x, lower, upper, alpha=0.3, color='green', label='Squeeze bounds')
ax.plot(x, y, 'b-', linewidth=2, label=r'$x^2 \sin(1/x)$')
ax.plot(x, upper, 'r--', linewidth=1.5, label=r'$x^2$')
ax.plot(x, lower, 'r--', linewidth=1.5, label=r'$-x^2$')
ax.axhline(y=0, color='gray', linestyle='-', alpha=0.3)
ax.axvline(x=0, color='gray', linestyle='-', alpha=0.3)
ax.set_xlabel('x', fontsize=12)
ax.set_ylabel('f(x)', fontsize=12)
ax.set_title(r'Squeeze Theorem: $\lim_{x \to 0} x^2 \sin(1/x) = 0$', fontsize=14)
ax.legend(loc='upper right')
ax.set_xlim(0, 1)
ax.set_ylim(-0.3, 0.3)
plt.tight_layout()
plt.show()
8. Softmax Temperature Limit (ML Application)
The softmax function with temperature parameter :
- As : becomes "hard" max (all probability on largest logit)
- As : becomes uniform distribution
Code cell 24
print("SOFTMAX TEMPERATURE LIMIT")
print("="*60)
def softmax(z, T=1.0):
"""Compute softmax with temperature."""
z_scaled = z / T
# Subtract max for numerical stability
exp_z = np.exp(z_scaled - np.max(z_scaled))
return exp_z / np.sum(exp_z)
z = np.array([1.0, 2.0, 3.0])
print(f"\nLogits z = {z}")
print("\nSoftmax at different temperatures:")
print("-" * 50)
temperatures = [100.0, 10.0, 1.0, 0.5, 0.1, 0.01]
for T in temperatures:
probs = softmax(z, T)
print(f" T = {T:6.2f}: {np.round(probs, 5)}")
print("\n" + "="*60)
print("Observations:")
print(" T → ∞: Softmax → uniform [0.333, 0.333, 0.333]")
print(" T → 0: Softmax → hard max [0, 0, 1]")
Code cell 25
# Visualize softmax temperature effect
fig, ax = plt.subplots(figsize=(12, 6))
z = np.array([1.0, 2.0, 3.0])
temps = np.logspace(-2, 2, 100)
probs = np.array([softmax(z, T) for T in temps])
for i, label in enumerate(['z₁ = 1', 'z₂ = 2', 'z₃ = 3']):
ax.semilogx(temps, probs[:, i], linewidth=2, label=label)
ax.axhline(y=1/3, color='gray', linestyle='--', alpha=0.5, label='Uniform')
ax.axhline(y=1, color='gray', linestyle=':', alpha=0.5)
ax.axhline(y=0, color='gray', linestyle=':', alpha=0.5)
ax.set_xlabel('Temperature T', fontsize=12)
ax.set_ylabel('Probability', fontsize=12)
ax.set_title('Softmax Temperature Effect', fontsize=14)
ax.legend()
ax.set_ylim(-0.05, 1.05)
# Add annotations
ax.annotate('T→0: Hard max', xy=(0.01, 0.95), fontsize=10)
ax.annotate('T→∞: Uniform', xy=(50, 0.38), fontsize=10)
plt.tight_layout()
plt.show()
9. Sigmoid Saturation (ML Application)
The sigmoid function:
Limits:
Code cell 27
print("SIGMOID SATURATION")
print("="*60)
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def sigmoid_derivative(x):
s = sigmoid(x)
return s * (1 - s)
print("\nσ(x) = 1/(1 + e^(-x))")
print("\nAs x → +∞:")
for x in [1, 5, 10, 20, 50]:
print(f" σ({x:2d}) = {sigmoid(x):.12f} σ'({x:2d}) = {sigmoid_derivative(x):.2e}")
print(" → 1")
print("\nAs x → -∞:")
for x in [-1, -5, -10, -20, -50]:
print(f" σ({x:3d}) = {sigmoid(x):.12e} σ'({x:3d}) = {sigmoid_derivative(x):.2e}")
print(" → 0")
print("\n" + "="*60)
print("KEY INSIGHT for ML:")
print("At saturation: σ'(x) ≈ 0 → Vanishing gradient problem!")
Code cell 28
# Visualize sigmoid and its derivative
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
x = np.linspace(-8, 8, 1000)
y = sigmoid(x)
dy = sigmoid_derivative(x)
# Sigmoid
axes[0].plot(x, y, 'b-', linewidth=2, label='σ(x)')
axes[0].axhline(y=1, color='r', linestyle='--', alpha=0.5, label='y = 1 (limit)')
axes[0].axhline(y=0, color='r', linestyle='--', alpha=0.5, label='y = 0 (limit)')
axes[0].axhline(y=0.5, color='gray', linestyle=':', alpha=0.5)
axes[0].fill_between(x[x > 5], 0, 1, alpha=0.2, color='red', label='Saturation zone')
axes[0].fill_between(x[x < -5], 0, 1, alpha=0.2, color='red')
axes[0].set_xlabel('x', fontsize=12)
axes[0].set_ylabel('σ(x)', fontsize=12)
axes[0].set_title('Sigmoid Function', fontsize=14)
axes[0].legend()
# Derivative
axes[1].plot(x, dy, 'g-', linewidth=2, label="σ'(x) = σ(x)(1-σ(x))")
axes[1].fill_between(x[x > 5], 0, dy[x > 5], alpha=0.3, color='red', label='Vanishing gradient')
axes[1].fill_between(x[x < -5], 0, dy[x < -5], alpha=0.3, color='red')
axes[1].axhline(y=0, color='gray', linestyle='-', alpha=0.3)
axes[1].set_xlabel('x', fontsize=12)
axes[1].set_ylabel("σ'(x)", fontsize=12)
axes[1].set_title('Sigmoid Derivative (Vanishing at Saturation)', fontsize=14)
axes[1].legend()
plt.tight_layout()
plt.show()
10. Learning Rate Decay (ML Application)
For convergence of SGD, we need:
- (can reach any point)
- (variance goes to zero)
Code cell 30
print("LEARNING RATE DECAY CONVERGENCE")
print("="*60)
print("\nFor SGD convergence, we need:")
print(" 1. Σ αₜ = ∞ (can explore the whole space)")
print(" 2. Σ αₜ² < ∞ (noise vanishes in the limit)")
print("\n--- Example: αₜ = 1/t ---")
# Check condition 1: harmonic series diverges
print("\nCondition 1: Σ(1/t)")
for n in [10, 100, 1000, 10000, 100000]:
harmonic = sum(1/t for t in range(1, n+1))
print(f" Σ(t=1 to {n:6d}) 1/t = {harmonic:.4f}")
print(" → ∞ (harmonic series diverges) ✓")
# Check condition 2: sum of squares converges
print("\nCondition 2: Σ(1/t²)")
for n in [10, 100, 1000, 10000, 100000]:
sum_sq = sum(1/t**2 for t in range(1, n+1))
print(f" Σ(t=1 to {n:6d}) 1/t² = {sum_sq:.8f}")
print(f" → π²/6 ≈ {np.pi**2/6:.8f} (converges) ✓")
Code cell 31
# Visualize learning rate decay
fig, axes = plt.subplots(1, 3, figsize=(15, 4))
t = np.arange(1, 101)
# Learning rate decay
alpha = 1 / t
axes[0].plot(t, alpha, 'b-', linewidth=2)
axes[0].set_xlabel('Iteration t', fontsize=12)
axes[0].set_ylabel('αₜ = 1/t', fontsize=12)
axes[0].set_title('Learning Rate Decay', fontsize=14)
# Cumulative sum (diverges)
cumsum = np.cumsum(alpha)
axes[1].plot(t, cumsum, 'g-', linewidth=2)
axes[1].set_xlabel('n', fontsize=12)
axes[1].set_ylabel('Σαₜ', fontsize=12)
axes[1].set_title('Σ(1/t) → ∞ (diverges)', fontsize=14)
# Cumulative sum of squares (converges)
cumsum_sq = np.cumsum(alpha**2)
axes[2].plot(t, cumsum_sq, 'r-', linewidth=2, label='Σ(1/t²)')
axes[2].axhline(y=np.pi**2/6, color='gray', linestyle='--', label=f'π²/6 ≈ {np.pi**2/6:.4f}')
axes[2].set_xlabel('n', fontsize=12)
axes[2].set_ylabel('Σαₜ²', fontsize=12)
axes[2].set_title('Σ(1/t²) → π²/6 (converges)', fontsize=14)
axes[2].legend()
plt.tight_layout()
plt.show()
11. Numerical Stability
When computing limits numerically, we must be careful about catastrophic cancellation.
Code cell 33
print("NUMERICAL STABILITY")
print("="*60)
print("\nComputing (eˣ - 1)/x as x → 0")
print("The limit is 1, but naive computation fails!")
print("\n--- Naive vs Stable Computation ---")
print(f"{'x':>12} {'Naive':>16} {'Stable':>16} {'Error (naive)':>16}")
print("-" * 64)
for x in [1e-5, 1e-8, 1e-10, 1e-12, 1e-15, 1e-16]:
naive = (np.exp(x) - 1) / x
stable = np.expm1(x) / x if x != 0 else 1.0
error = abs(naive - 1)
print(f"{x:12.0e} {naive:16.12f} {stable:16.12f} {error:16.2e}")
print("\n" + "="*60)
print("Explanation:")
print(" For x = 1e-16: e^x ≈ 1.0000000000000001")
print(" Subtracting 1 causes catastrophic cancellation!")
print("\nSolution: Use np.expm1(x) which computes e^x - 1 accurately for small x")
Code cell 34
# Another numerical stability example: log1p
print("\nAnother Example: log(1 + x) for small x")
print("="*60)
print(f"{'x':>12} {'Naive log(1+x)':>18} {'Stable log1p(x)':>18}")
print("-" * 52)
for x in [1e-5, 1e-10, 1e-15, 1e-16, 1e-17]:
naive = np.log(1 + x)
stable = np.log1p(x)
print(f"{x:12.0e} {naive:18.15f} {stable:18.15f}")
print("\nFor very small x, log(1+x) ≈ x (from Taylor series)")
12. ε-δ Definition: Interactive Visualization
The formal definition: means for every there exists such that .
Code cell 36
# === 12.1 epsilon-delta Visualization ===
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
COLORS = {'primary': '#0077BB', 'secondary': '#EE7733', 'tertiary': '#009988',
'error': '#CC3311', 'neutral': '#555555', 'highlight': '#EE3377'}
# f(x) = (x^2-4)/(x-2), lim_{x->2} = 4
def f(x):
return np.where(np.abs(x - 2) > 1e-10, (x**2 - 4)/(x - 2), np.nan)
a, L = 2.0, 4.0
eps = 0.5
delta = eps / 2 # for f(x) = x+2, |f(x)-L| = |x-2| so delta = eps works
fig, ax = plt.subplots(figsize=(10, 7))
x = np.linspace(0.5, 3.5, 1000)
y = f(x)
ax.plot(x, y, color=COLORS['primary'], lw=2.5, label=r'$f(x)=(x^2-4)/(x-2)$')
ax.plot(a, L, 'o', ms=10, color='white', markeredgecolor=COLORS['primary'],
markeredgewidth=2.5, zorder=5, label=f'Hole at x={a}')
# epsilon band
ax.axhspan(L - eps, L + eps, alpha=0.15, color=COLORS['secondary'],
label=f'ε-band: ({L-eps:.1f}, {L+eps:.1f})')
ax.axhline(L - eps, color=COLORS['secondary'], ls='--', lw=1.2)
ax.axhline(L + eps, color=COLORS['secondary'], ls='--', lw=1.2)
ax.axhline(L, color=COLORS['neutral'], ls=':', lw=1)
# delta window
ax.axvspan(a - delta, a + delta, alpha=0.12, color=COLORS['tertiary'],
label=f'δ-window: ({a-delta:.2f}, {a+delta:.2f})')
ax.axvline(a - delta, color=COLORS['tertiary'], ls='--', lw=1.2)
ax.axvline(a + delta, color=COLORS['tertiary'], ls='--', lw=1.2)
# annotations
ax.annotate(f'ε = {eps}', xy=(3.2, L+eps), fontsize=11, color=COLORS['secondary'])
ax.annotate(f'δ = {delta}', xy=(a+delta+0.02, 1.0), fontsize=11, color=COLORS['tertiary'])
ax.annotate(f'L = {L}', xy=(0.6, L+0.1), fontsize=11, color=COLORS['neutral'])
ax.set_xlabel('x', fontsize=13)
ax.set_ylabel('f(x)', fontsize=13)
ax.set_title(r'ε-δ Definition: $\lim_{x\to 2}\frac{x^2-4}{x-2} = 4$', fontsize=14)
ax.legend(fontsize=10)
ax.set_xlim(0.5, 3.5)
ax.set_ylim(1.0, 7.0)
fig.tight_layout()
plt.show()
# Verify: for x in (a-delta, a+delta), f(x) in (L-eps, L+eps)
x_test = np.linspace(a - delta + 0.001, a + delta - 0.001, 1000)
x_test = x_test[np.abs(x_test - a) > 1e-10] # exclude a itself
f_vals = f(x_test)
f_vals = f_vals[~np.isnan(f_vals)]
all_in_band = np.all((f_vals > L - eps) & (f_vals < L + eps))
print(f'For δ={delta}: all f(x) in ε-band? {all_in_band}')
print(f'f(x) range in δ-window: [{f_vals.min():.4f}, {f_vals.max():.4f}]')
print(f'ε-band: ({L-eps:.4f}, {L+eps:.4f})')
print(f'PASS: ε-δ verified for ε={eps}, δ={delta}' if all_in_band else 'FAIL')
13. Limit Laws: Numerical Verification
If and , then:
- , , (if )
Code cell 38
# === 13.1 Limit Laws Verification ===
import numpy as np
# f(x) = x^2 + 1, g(x) = sin(x)/x near x=0
# lim f(x) = 1, lim g(x) = 1
h_vals = [1e-1, 1e-2, 1e-3, 1e-4, 1e-6, 1e-8]
print('Verifying limit laws at x -> 0')
print('f(x) = x^2 + 1 => lim = 1')
print('g(x) = sin(x)/x => lim = 1')
print()
print(f'{"h":>10} | {"f(h)":>12} | {"g(h)":>12} | {"f+g":>12} | {"f*g":>12}')
print('-' * 65)
for h in h_vals:
fh = h**2 + 1
gh = np.sin(h) / h
print(f'{h:>10.2e} | {fh:>12.8f} | {gh:>12.8f} | {fh+gh:>12.8f} | {fh*gh:>12.8f}')
print()
print('Predicted by limit laws:')
print(f' lim(f+g) = 1 + 1 = 2 (observed ~{1 + np.sin(1e-8)/1e-8:.6f} for h=1e-8)')
print(f' lim(f*g) = 1 * 1 = 1 (observed ~{(1e-16+1)*(np.sin(1e-8)/1e-8):.6f} for h=1e-8)')
# Composition law: lim g(f(x)) = g(lim f(x)) when g is continuous
print()
print('Composition law: lim_{x->0} exp(sin(x)/x - 1) = exp(1-1) = exp(0) = 1')
for h in [1e-2, 1e-4, 1e-6]:
val = np.exp(np.sin(h)/h - 1)
print(f' h={h:.0e}: exp(sin(h)/h - 1) = {val:.10f}')
14. Euler's Number:
The number is both the base of the natural exponential and the limit of discrete compounding.
Code cell 40
# === 14.1 Euler's Number as a Limit ===
import numpy as np
import matplotlib.pyplot as plt
COLORS = {'primary': '#0077BB', 'secondary': '#EE7733', 'highlight': '#EE3377'}
ns = np.array([1, 2, 5, 10, 50, 100, 500, 1000, 5000, 10000, 100000, 1000000])
a_n = (1 + 1/ns)**ns
e_true = np.e
errors = np.abs(a_n - e_true)
print('Convergence of (1 + 1/n)^n to e:')
print(f'{"n":>10} | {"(1+1/n)^n":>15} | {"error":>12}')
print('-' * 45)
for n, val, err in zip(ns, a_n, errors):
print(f'{n:>10} | {val:>15.10f} | {err:>12.2e}')
print(f'True e = {e_true:.10f}')
# Convergence rate: error ~ 1/(2n) for large n
print()
print('Convergence rate ~ e/(2n):')
for n, err in zip(ns[-5:], errors[-5:]):
predicted = e_true / (2 * n)
print(f' n={n:.0e}: error={err:.2e}, predicted e/(2n)={predicted:.2e}, ratio={err/predicted:.2f}')
# Visualization
ns_fine = np.logspace(0, 6, 300)
a_fine = (1 + 1/ns_fine)**ns_fine
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
axes[0].semilogx(ns_fine, a_fine, color=COLORS['primary'], lw=2, label=r'$(1+1/n)^n$')
axes[0].axhline(e_true, color=COLORS['highlight'], ls='--', lw=1.5, label=f'e = {e_true:.5f}')
axes[0].set_xlabel('n (log scale)', fontsize=12)
axes[0].set_ylabel(r'$(1+1/n)^n$', fontsize=12)
axes[0].set_title(r'$\lim_{n\to\infty}(1+1/n)^n = e$', fontsize=13)
axes[0].legend()
axes[1].loglog(ns_fine, np.abs((1+1/ns_fine)**ns_fine - e_true),
color=COLORS['secondary'], lw=2, label='|error|')
axes[1].loglog(ns_fine, e_true/(2*ns_fine), color=COLORS['primary'],
ls='--', lw=1.5, label='e/(2n) (predicted rate)')
axes[1].set_xlabel('n', fontsize=12)
axes[1].set_ylabel('|error|', fontsize=12)
axes[1].set_title('Convergence rate: O(1/n)', fontsize=13)
axes[1].legend()
fig.tight_layout()
plt.show()
print('PASS: sequence converges to e with rate O(1/n)')
15. Intermediate Value Theorem: Bisection Root Finding
If is continuous on and , then has a root in . Bisection exploits IVT to find it.
Code cell 42
# === 15.1 IVT and Bisection Method ===
import numpy as np
import matplotlib.pyplot as plt
COLORS = {'primary': '#0077BB', 'secondary': '#EE7733',
'tertiary': '#009988', 'error': '#CC3311'}
def bisection(f, a, b, tol=1e-10, max_iter=100):
"""Bisection method: IVT guarantees root in (a,b) if f(a)*f(b) < 0."""
assert f(a) * f(b) < 0, 'f(a) and f(b) must have opposite signs'
history = []
for i in range(max_iter):
m = (a + b) / 2
history.append({'iter': i, 'a': a, 'b': b, 'm': m, 'f(m)': f(m), 'width': b-a})
if abs(f(m)) < tol or (b - a) / 2 < tol:
break
if f(a) * f(m) < 0:
b = m
else:
a = m
return m, history
# Find root of f(x) = x^3 - x - 2 (root near x=1.5214)
f = lambda x: x**3 - x - 2
a0, b0 = 1.0, 2.0
print(f'f({a0}) = {f(a0):.4f}, f({b0}) = {f(b0):.4f}')
print(f'Signs differ: {f(a0)*f(b0) < 0} => IVT guarantees a root in ({a0},{b0})')
print()
root, history = bisection(f, a0, b0)
print(f'{"Iter":>5} | {"a":>10} | {"b":>10} | {"m":>12} | {"f(m)":>12} | {"width":>12}')
print('-' * 70)
for h in history[:10]:
print(f'{h["iter"]:>5} | {h["a"]:>10.6f} | {h["b"]:>10.6f} | {h["m"]:>12.8f} | {h["f(m)"]:>12.2e} | {h["width"]:>12.2e}')
print(f'...')
print(f'Root found: {root:.12f}')
print(f'f(root) = {f(root):.2e}')
print(f'Iterations: {len(history)}')
print(f'Theoretical: log2((b-a)/tol) = {np.log2((b0-a0)/1e-10):.1f} iterations')
# Visualization
x_plot = np.linspace(0.5, 2.5, 300)
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
axes[0].plot(x_plot, f(x_plot), color=COLORS['primary'], lw=2.5)
axes[0].axhline(0, color=COLORS['neutral'] if 'neutral' in COLORS else 'gray', lw=1)
axes[0].plot(root, 0, 's', ms=10, color=COLORS['error'], zorder=5, label=f'Root ≈ {root:.4f}')
axes[0].set_xlabel('x', fontsize=12); axes[0].set_ylabel('f(x)', fontsize=12)
axes[0].set_title(r'$f(x) = x^3 - x - 2$', fontsize=13)
axes[0].legend()
iters = [h['iter'] for h in history]
widths = [h['width'] for h in history]
axes[1].semilogy(iters, widths, color=COLORS['secondary'], lw=2, label='Interval width')
axes[1].semilogy(iters, [(b0-a0)*0.5**i for i in iters], color=COLORS['primary'],
ls='--', lw=1.5, label=r'$(b-a)/2^n$ (theory)')
axes[1].set_xlabel('Iteration', fontsize=12)
axes[1].set_ylabel('Interval width (log)', fontsize=12)
axes[1].set_title('Bisection Convergence: O(1/2^n)', fontsize=13)
axes[1].legend()
fig.tight_layout(); plt.show()
assert abs(f(root)) < 1e-9, 'Root not accurate'
print('PASS: bisection converged to root, IVT verified')
16. Asymptotic Growth Hierarchy
As : . Every polynomial is dominated by every exponential; every logarithm by every power.
Code cell 44
# === 16.1 Asymptotic Growth Comparison ===
import numpy as np
import matplotlib.pyplot as plt
COLORS = {'primary': '#0077BB', 'secondary': '#EE7733',
'tertiary': '#009988', 'error': '#CC3311', 'highlight': '#EE3377'}
# Show lim_{x->inf} x^n / e^x = 0 for n=1,2,3
print('lim_{x->inf} x^n / e^x = 0 (polynomial dominated by exponential)')
x_vals = np.array([1, 5, 10, 20, 50, 100])
for n in [1, 2, 3]:
ratios = x_vals**n / np.exp(x_vals)
print(f' n={n}: ratios = {[f"{r:.2e}" for r in ratios]}')
print()
print('lim_{x->inf} ln(x) / x^p = 0 for p>0 (log dominated by any power)')
x_vals2 = np.array([10, 100, 1000, 10000])
for p in [0.1, 0.5, 1.0]:
ratios = np.log(x_vals2) / x_vals2**p
print(f' p={p}: ratios = {[f"{r:.4f}" for r in ratios]}')
# Visualization
x = np.linspace(1, 10, 500)
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
# Panel 1: functions themselves
axes[0].plot(x, np.log(x), color=COLORS['tertiary'], lw=2, label=r'$\ln x$')
axes[0].plot(x, x, color=COLORS['primary'], lw=2, label=r'$x$')
axes[0].plot(x, x**2, color=COLORS['secondary'], lw=2, label=r'$x^2$')
axes[0].plot(x, np.exp(x), color=COLORS['error'], lw=2, label=r'$e^x$')
axes[0].set_ylim(0, 150)
axes[0].set_xlabel('x', fontsize=12); axes[0].set_ylabel('f(x)', fontsize=12)
axes[0].set_title('Growth Hierarchy: $\\ln x \\ll x \\ll x^2 \\ll e^x$', fontsize=13)
axes[0].legend(fontsize=11)
# Panel 2: ratios to show dominance
x2 = np.linspace(1, 20, 500)
axes[1].semilogy(x2, x2 / np.exp(x2), color=COLORS['primary'], lw=2, label=r'$x/e^x \to 0$')
axes[1].semilogy(x2, x2**2 / np.exp(x2), color=COLORS['secondary'], lw=2, label=r'$x^2/e^x \to 0$')
axes[1].semilogy(x2, np.log(x2) / x2, color=COLORS['tertiary'], lw=2, label=r'$\ln x/x \to 0$')
axes[1].set_xlabel('x', fontsize=12)
axes[1].set_ylabel('Ratio (log scale)', fontsize=12)
axes[1].set_title('All ratios $\\to 0$: dominance hierarchy', fontsize=13)
axes[1].legend(fontsize=11)
fig.tight_layout(); plt.show()
print('PASS: asymptotic hierarchy verified numerically')
17. Gradient as a Limit: Numerical Differentiation and Gradient Checking
The derivative can be approximated numerically. Gradient checking verifies automatic differentiation implementations.
Code cell 46
# === 17.1 Finite Differences and Gradient Checking ===
import numpy as np
# Demonstrate one-sided vs centered finite differences
f = lambda x: x**3 + 2*x - 1 # f'(x) = 3x^2 + 2
f_prime = lambda x: 3*x**2 + 2
a = 2.0 # true derivative at a=2: 3*4+2 = 14
h_vals = [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8, 1e-10, 1e-12, 1e-14]
true_val = f_prime(a)
print(f'True f\'({a}) = {true_val}')
print()
print(f'{"h":>10} | {"one-sided err":>15} | {"centered err":>15}')
print('-' * 50)
one_sided_errs = []
centered_errs = []
for h in h_vals:
one_sided = (f(a + h) - f(a)) / h
centered = (f(a + h) - f(a - h)) / (2*h)
err1 = abs(one_sided - true_val)
errc = abs(centered - true_val)
one_sided_errs.append(err1)
centered_errs.append(errc)
print(f'{h:>10.2e} | {err1:>15.2e} | {errc:>15.2e}')
optimal_h1 = h_vals[np.argmin(one_sided_errs)]
optimal_hc = h_vals[np.argmin(centered_errs)]
print(f'\nOptimal h (one-sided): {optimal_h1:.0e}')
print(f'Optimal h (centered): {optimal_hc:.0e}')
print(f'Machine eps sqrt: {np.sqrt(np.finfo(float).eps):.2e} (expected for one-sided)')
print(f'Machine eps cube-root: {np.finfo(float).eps**(1/3):.2e} (expected for centered)')
# Gradient check for a neural network-like loss
print()
print('=== Gradient Check ===')
np.random.seed(42)
theta = np.array([1.0, -0.5, 2.0]) # parameters
def loss(t): # toy loss: ||Wt||^2 with W fixed
W = np.array([[1, 2, -1], [0, 1, 3]])
z = W @ t
return 0.5 * np.dot(z, z)
def grad_analytic(t): # W^T W t
W = np.array([[1, 2, -1], [0, 1, 3]], dtype=float)
return W.T @ (W @ t)
h = 1e-5
grad_fd = np.zeros(3)
for i in range(3):
tp = theta.copy(); tp[i] += h
tm = theta.copy(); tm[i] -= h
grad_fd[i] = (loss(tp) - loss(tm)) / (2*h)
grad_an = grad_analytic(theta)
rel_err = np.linalg.norm(grad_an - grad_fd) / (np.linalg.norm(grad_an) + np.linalg.norm(grad_fd))
print(f'Analytic gradient: {grad_an}')
print(f'FD gradient: {grad_fd}')
print(f'Relative error: {rel_err:.2e}')
print('PASS: gradient check passed' if rel_err < 1e-5 else 'FAIL')
18. ReLU and GELU: Continuity and Corner Behavior
ReLU is continuous but not differentiable at 0. GELU ( where is the normal CDF) is everywhere.
Code cell 48
# === 18.1 Activation Function Continuity Analysis ===
import numpy as np
import matplotlib.pyplot as plt
from scipy.special import erf
COLORS = {'primary': '#0077BB', 'secondary': '#EE7733',
'tertiary': '#009988', 'error': '#CC3311'}
def relu(x): return np.maximum(0, x)
def gelu(x): return x * 0.5 * (1 + erf(x / np.sqrt(2)))
def sigmoid(x): return 1 / (1 + np.exp(-np.clip(x, -500, 500)))
# Verify continuity at x=0: one-sided limits
print('=== Continuity at x=0 ===')
for h in [1e-1, 1e-4, 1e-8, 1e-12]:
relu_left = relu(-h)
relu_right = relu(h)
gelu_left = gelu(-h)
gelu_right = gelu(h)
print(f'h={h:.0e}: ReLU({-h:.0e})={relu_left:.2e}, ReLU({h:.0e})={relu_right:.2e} | '
f'GELU({-h:.0e})={gelu_left:.2e}, GELU({h:.0e})={gelu_right:.2e}')
print()
print('Both lim_{x->0^+} and lim_{x->0^-} equal 0 for ReLU and GELU => both continuous')
# Verify non-differentiability of ReLU at 0
print()
print('=== Differentiability at x=0 ===')
print(f'{"h":>10} | {"ReLU right deriv":>18} | {"ReLU left deriv":>18} | {"GELU deriv":>12}')
print('-' * 68)
for h in [1e-1, 1e-3, 1e-6, 1e-10]:
relu_rd = (relu(h) - relu(0)) / h
relu_ld = (relu(-h) - relu(0)) / (-h)
gelu_cd = (gelu(h) - gelu(-h)) / (2*h)
print(f'{h:>10.0e} | {relu_rd:>18.6f} | {relu_ld:>18.6f} | {gelu_cd:>12.6f}')
print()
print('ReLU: right deriv -> 1, left deriv -> 0 => NOT differentiable at 0')
print('GELU: centered deriv -> 0.5 = Phi(0) + 0*phi(0) => differentiable (C^inf)')
# Visualization
x = np.linspace(-3, 3, 1000)
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
# Functions
axes[0].plot(x, relu(x), color=COLORS['primary'], lw=2.5, label='ReLU')
axes[0].plot(x, gelu(x), color=COLORS['secondary'], lw=2.5, label='GELU')
axes[0].plot(x, sigmoid(x), color=COLORS['tertiary'], lw=2, ls='--', label='Sigmoid')
axes[0].axvline(0, color='gray', lw=0.8, ls=':')
axes[0].set_xlabel('x', fontsize=12); axes[0].set_ylabel('f(x)', fontsize=12)
axes[0].set_title('Activation Functions', fontsize=13)
axes[0].legend(); axes[0].set_ylim(-1, 3)
# Derivatives (numerical)
h = 1e-5
relu_d = (relu(x + h) - relu(x - h)) / (2*h)
gelu_d = (gelu(x + h) - gelu(x - h)) / (2*h)
sigmoid_d = sigmoid(x) * (1 - sigmoid(x))
axes[1].plot(x, relu_d, color=COLORS['primary'], lw=2.5, label="ReLU'")
axes[1].plot(x, gelu_d, color=COLORS['secondary'], lw=2.5, label="GELU'")
axes[1].plot(x, sigmoid_d, color=COLORS['tertiary'], lw=2, ls='--', label="Sigmoid'")
axes[1].axvline(0, color='gray', lw=0.8, ls=':')
axes[1].set_xlabel('x', fontsize=12); axes[1].set_ylabel("f'(x)", fontsize=12)
axes[1].set_title('Derivatives: ReLU jump vs GELU smooth', fontsize=13)
axes[1].legend()
fig.tight_layout(); plt.show()
print('PASS: continuity and derivative behavior verified')
19. Extreme Value Theorem and Uniform Continuity
EVT: continuous on attains its max and min. Uniform continuity: depends only on , not the point.
Code cell 50
# === 19.1 EVT and Uniform Continuity Demonstration ===
import numpy as np
import matplotlib.pyplot as plt
COLORS = {'primary': '#0077BB', 'secondary': '#EE7733',
'tertiary': '#009988', 'highlight': '#EE3377'}
# EVT: f(x) = sin(2x) + 0.5*cos(5x) on [0, 2*pi]
f = lambda x: np.sin(2*x) + 0.5*np.cos(5*x)
a, b = 0, 2*np.pi
x_dense = np.linspace(a, b, 10000)
y_dense = f(x_dense)
x_max = x_dense[np.argmax(y_dense)]
x_min = x_dense[np.argmin(y_dense)]
y_max = f(x_max)
y_min = f(x_min)
print('=== Extreme Value Theorem ===')
print(f'f(x) = sin(2x) + 0.5*cos(5x) on [0, 2π]')
print(f'Maximum: f({x_max:.4f}) = {y_max:.6f}')
print(f'Minimum: f({x_min:.4f}) = {y_min:.6f}')
print('Both are attained (not just approached) — EVT guarantee.')
# Uniform continuity: f(x) = sin(x) (uniformly continuous on R)
# vs f(x) = x^2 (NOT uniformly continuous on R)
print()
print('=== Uniform Continuity ===')
eps = 0.1
print(f'eps = {eps}. For sin(x): one delta works everywhere.')
# sin: |sin(x) - sin(y)| <= |x-y| (Lipschitz with L=1)
# So delta = eps works everywhere
delta_sin = eps
print(f' sin(x): delta = {delta_sin} works uniformly (Lipschitz constant = 1)')
# x^2: need delta = eps/(2|a|+1) -- depends on a
print(f' x^2 on R: delta depends on location:')
for a_pt in [1, 10, 100, 1000]:
delta_sq = eps / (2*a_pt + 1) # rough bound
print(f' near a={a_pt}: delta ~ {delta_sq:.4f} (shrinks to 0 as a->inf)')
print(' => x^2 is NOT uniformly continuous on R (delta -> 0 as a -> inf)')
# Visualization
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
axes[0].plot(x_dense, y_dense, color=COLORS['primary'], lw=2)
axes[0].plot(x_max, y_max, 'v', ms=12, color=COLORS['highlight'], zorder=5, label=f'Max = {y_max:.3f}')
axes[0].plot(x_min, y_min, '^', ms=12, color=COLORS['secondary'], zorder=5, label=f'Min = {y_min:.3f}')
axes[0].axhline(y_max, color=COLORS['highlight'], ls='--', lw=1)
axes[0].axhline(y_min, color=COLORS['secondary'], ls='--', lw=1)
axes[0].set_xlabel('x', fontsize=12); axes[0].set_ylabel('f(x)', fontsize=12)
axes[0].set_title('EVT: max and min attained on $[0, 2\\pi]$', fontsize=13)
axes[0].legend()
x2 = np.linspace(-5, 5, 400)
axes[1].plot(x2, np.sin(x2), color=COLORS['primary'], lw=2.5, label=r'$\sin x$ (uniform)')
axes[1].plot(x2, x2**2 / 10, color=COLORS['secondary'], lw=2.5, label=r'$x^2/10$ (not uniform on $\mathbb{R}$)')
axes[1].set_xlabel('x', fontsize=12)
axes[1].set_title('Uniform vs. pointwise continuity', fontsize=13)
axes[1].legend()
fig.tight_layout(); plt.show()
print('PASS: EVT and uniform continuity demonstrated')
Summary
| Concept | Key Idea | ML Application |
|---|---|---|
| Limit Definition | Convergence analysis | |
| One-Sided Limits | Left/right limits may differ | Step functions, ReLU |
| Fundamental Limits | , | Gradient approximations |
| L'Hôpital's Rule | For 0/0 or ∞/∞ forms | Analyzing loss behavior |
| Continuity | No breaks, holes, jumps | Activation smoothness |
| Squeeze Theorem | Bound oscillating functions | Convergence proofs |
| Softmax Temperature | : hard max | Knowledge distillation |
| Sigmoid Saturation | Limits at ±∞ | Vanishing gradients |
| Learning Rate Decay | , | SGD convergence |
| Numerical Stability | Avoid cancellation | Use expm1, log1p |