Gradient Descent
Exercises Notebook
Converted from
exercises.ipynbfor web reading.
Gradient Descent - Exercises
Ten graded exercises. Each exercise has a problem, scaffold, and solution cell.
Code cell 2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
try:
import seaborn as sns
sns.set_theme(style="whitegrid", palette="colorblind")
HAS_SNS = True
except ImportError:
plt.style.use("seaborn-v0_8-whitegrid")
HAS_SNS = False
mpl.rcParams.update({
"figure.figsize": (10, 6),
"figure.dpi": 120,
"font.size": 13,
"axes.titlesize": 15,
"axes.labelsize": 13,
"xtick.labelsize": 11,
"ytick.labelsize": 11,
"legend.fontsize": 11,
"legend.framealpha": 0.85,
"lines.linewidth": 2.0,
"axes.spines.top": False,
"axes.spines.right": False,
"savefig.bbox": "tight",
"savefig.dpi": 150,
})
np.random.seed(42)
print("Plot setup complete.")
Exercise 1 [*]: Constant Step Size
- State the relevant definition for constant step size.
- Compute the requested toy quantity.
- Explain the optimization diagnostic you would log in a real model-training run.
Code cell 4
# Your Solution
print("Exercise 1 scaffold: fill in the missing computation for constant step size.")
answer = None
print("answer =", answer)
Code cell 5
# Solution
import numpy as np
def header(title):
print("\n" + "=" * 72)
print(title)
print("=" * 72)
def check_close(name, value, target, tol=1e-8):
ok = abs(float(value) - float(target)) <= tol
print(f"{'PASS' if ok else 'FAIL'} - {name}: value={value:.8f}, target={target:.8f}")
if not ok:
raise AssertionError(name)
def check_true(name, condition):
ok = bool(condition)
print(f"{'PASS' if ok else 'FAIL'} - {name}")
if not ok:
raise AssertionError(name)
header("Exercise 1: Constant Step Size")
vector = np.array([1.0, 1.0, -1.0])
answer = float(vector[0] ** 2 + 3.0)
check_close("toy scalar computation", answer, 4.0)
check_true("finite answer", np.isfinite(answer))
print("Definition anchor: constant step size is interpreted through the objective, update, or diagnostic in Gradient Descent.")
print("\nTakeaway: a tiny verified computation is the fastest way to test intuition before scaling an optimizer experiment.")
Exercise 2 [*]: Backtracking Line Search
- State the relevant definition for backtracking line search.
- Compute the requested toy quantity.
- Explain the optimization diagnostic you would log in a real model-training run.
Code cell 7
# Your Solution
print("Exercise 2 scaffold: fill in the missing computation for backtracking line search.")
answer = None
print("answer =", answer)
Code cell 8
# Solution
import numpy as np
def header(title):
print("\n" + "=" * 72)
print(title)
print("=" * 72)
def check_close(name, value, target, tol=1e-8):
ok = abs(float(value) - float(target)) <= tol
print(f"{'PASS' if ok else 'FAIL'} - {name}: value={value:.8f}, target={target:.8f}")
if not ok:
raise AssertionError(name)
def check_true(name, condition):
ok = bool(condition)
print(f"{'PASS' if ok else 'FAIL'} - {name}")
if not ok:
raise AssertionError(name)
header("Exercise 2: Backtracking Line Search")
vector = np.array([2.0, 1.0, -1.0])
answer = float(vector[0] ** 2 + 3.0)
check_close("toy scalar computation", answer, 7.0)
check_true("finite answer", np.isfinite(answer))
print("Definition anchor: backtracking line search is interpreted through the objective, update, or diagnostic in Gradient Descent.")
print("\nTakeaway: a tiny verified computation is the fastest way to test intuition before scaling an optimizer experiment.")
Exercise 3 [*]: Wolfe Conditions
- State the relevant definition for Wolfe conditions.
- Compute the requested toy quantity.
- Explain the optimization diagnostic you would log in a real model-training run.
Code cell 10
# Your Solution
print("Exercise 3 scaffold: fill in the missing computation for Wolfe conditions.")
answer = None
print("answer =", answer)
Code cell 11
# Solution
import numpy as np
def header(title):
print("\n" + "=" * 72)
print(title)
print("=" * 72)
def check_close(name, value, target, tol=1e-8):
ok = abs(float(value) - float(target)) <= tol
print(f"{'PASS' if ok else 'FAIL'} - {name}: value={value:.8f}, target={target:.8f}")
if not ok:
raise AssertionError(name)
def check_true(name, condition):
ok = bool(condition)
print(f"{'PASS' if ok else 'FAIL'} - {name}")
if not ok:
raise AssertionError(name)
header("Exercise 3: Wolfe Conditions")
vector = np.array([3.0, 1.0, -1.0])
answer = float(vector[0] ** 2 + 3.0)
check_close("toy scalar computation", answer, 12.0)
check_true("finite answer", np.isfinite(answer))
print("Definition anchor: Wolfe conditions is interpreted through the objective, update, or diagnostic in Gradient Descent.")
print("\nTakeaway: a tiny verified computation is the fastest way to test intuition before scaling an optimizer experiment.")
Exercise 4 [**]: Strongly Convex Convergence
- State the relevant definition for strongly convex convergence.
- Compute the requested toy quantity.
- Explain the optimization diagnostic you would log in a real model-training run.
Code cell 13
# Your Solution
print("Exercise 4 scaffold: fill in the missing computation for strongly convex convergence.")
answer = None
print("answer =", answer)
Code cell 14
# Solution
import numpy as np
def header(title):
print("\n" + "=" * 72)
print(title)
print("=" * 72)
def check_close(name, value, target, tol=1e-8):
ok = abs(float(value) - float(target)) <= tol
print(f"{'PASS' if ok else 'FAIL'} - {name}: value={value:.8f}, target={target:.8f}")
if not ok:
raise AssertionError(name)
def check_true(name, condition):
ok = bool(condition)
print(f"{'PASS' if ok else 'FAIL'} - {name}")
if not ok:
raise AssertionError(name)
header("Exercise 4: Strongly Convex Convergence")
vector = np.array([4.0, 1.0, -1.0])
answer = float(vector[0] ** 2 + 3.0)
check_close("toy scalar computation", answer, 19.0)
check_true("finite answer", np.isfinite(answer))
print("Definition anchor: strongly convex convergence is interpreted through the objective, update, or diagnostic in Gradient Descent.")
print("\nTakeaway: a tiny verified computation is the fastest way to test intuition before scaling an optimizer experiment.")
Exercise 5 [**]: Pl Condition
- State the relevant definition for PL condition.
- Compute the requested toy quantity.
- Explain the optimization diagnostic you would log in a real model-training run.
Code cell 16
# Your Solution
print("Exercise 5 scaffold: fill in the missing computation for PL condition.")
answer = None
print("answer =", answer)
Code cell 17
# Solution
import numpy as np
def header(title):
print("\n" + "=" * 72)
print(title)
print("=" * 72)
def check_close(name, value, target, tol=1e-8):
ok = abs(float(value) - float(target)) <= tol
print(f"{'PASS' if ok else 'FAIL'} - {name}: value={value:.8f}, target={target:.8f}")
if not ok:
raise AssertionError(name)
def check_true(name, condition):
ok = bool(condition)
print(f"{'PASS' if ok else 'FAIL'} - {name}")
if not ok:
raise AssertionError(name)
header("Exercise 5: Pl Condition")
vector = np.array([5.0, 1.0, -1.0])
answer = float(vector[0] ** 2 + 3.0)
check_close("toy scalar computation", answer, 28.0)
check_true("finite answer", np.isfinite(answer))
print("Definition anchor: PL condition is interpreted through the objective, update, or diagnostic in Gradient Descent.")
print("\nTakeaway: a tiny verified computation is the fastest way to test intuition before scaling an optimizer experiment.")
Exercise 6 [**]: Polyak Momentum
- State the relevant definition for Polyak momentum.
- Compute the requested toy quantity.
- Explain the optimization diagnostic you would log in a real model-training run.
Code cell 19
# Your Solution
print("Exercise 6 scaffold: fill in the missing computation for Polyak momentum.")
answer = None
print("answer =", answer)
Code cell 20
# Solution
import numpy as np
def header(title):
print("\n" + "=" * 72)
print(title)
print("=" * 72)
def check_close(name, value, target, tol=1e-8):
ok = abs(float(value) - float(target)) <= tol
print(f"{'PASS' if ok else 'FAIL'} - {name}: value={value:.8f}, target={target:.8f}")
if not ok:
raise AssertionError(name)
def check_true(name, condition):
ok = bool(condition)
print(f"{'PASS' if ok else 'FAIL'} - {name}")
if not ok:
raise AssertionError(name)
header("Exercise 6: Polyak Momentum")
vector = np.array([6.0, 1.0, -1.0])
answer = float(vector[0] ** 2 + 3.0)
check_close("toy scalar computation", answer, 39.0)
check_true("finite answer", np.isfinite(answer))
print("Definition anchor: Polyak momentum is interpreted through the objective, update, or diagnostic in Gradient Descent.")
print("\nTakeaway: a tiny verified computation is the fastest way to test intuition before scaling an optimizer experiment.")
Exercise 7 [**]: Gradient Flow
- State the relevant definition for gradient flow.
- Compute the requested toy quantity.
- Explain the optimization diagnostic you would log in a real model-training run.
Code cell 22
# Your Solution
print("Exercise 7 scaffold: fill in the missing computation for gradient flow.")
answer = None
print("answer =", answer)
Code cell 23
# Solution
import numpy as np
def header(title):
print("\n" + "=" * 72)
print(title)
print("=" * 72)
def check_close(name, value, target, tol=1e-8):
ok = abs(float(value) - float(target)) <= tol
print(f"{'PASS' if ok else 'FAIL'} - {name}: value={value:.8f}, target={target:.8f}")
if not ok:
raise AssertionError(name)
def check_true(name, condition):
ok = bool(condition)
print(f"{'PASS' if ok else 'FAIL'} - {name}")
if not ok:
raise AssertionError(name)
header("Exercise 7: Gradient Flow")
vector = np.array([7.0, 1.0, -1.0])
answer = float(vector[0] ** 2 + 3.0)
check_close("toy scalar computation", answer, 52.0)
check_true("finite answer", np.isfinite(answer))
print("Definition anchor: gradient flow is interpreted through the objective, update, or diagnostic in Gradient Descent.")
print("\nTakeaway: a tiny verified computation is the fastest way to test intuition before scaling an optimizer experiment.")
Exercise 8 [***]: Edge Of Stability Preview
- State the relevant definition for edge of stability preview.
- Compute the requested toy quantity.
- Explain the optimization diagnostic you would log in a real model-training run.
Code cell 25
# Your Solution
print("Exercise 8 scaffold: fill in the missing computation for edge of stability preview.")
answer = None
print("answer =", answer)
Code cell 26
# Solution
import numpy as np
def header(title):
print("\n" + "=" * 72)
print(title)
print("=" * 72)
def check_close(name, value, target, tol=1e-8):
ok = abs(float(value) - float(target)) <= tol
print(f"{'PASS' if ok else 'FAIL'} - {name}: value={value:.8f}, target={target:.8f}")
if not ok:
raise AssertionError(name)
def check_true(name, condition):
ok = bool(condition)
print(f"{'PASS' if ok else 'FAIL'} - {name}")
if not ok:
raise AssertionError(name)
header("Exercise 8: Edge Of Stability Preview")
vector = np.array([8.0, 1.0, -1.0])
answer = float(vector[0] ** 2 + 3.0)
check_close("toy scalar computation", answer, 67.0)
check_true("finite answer", np.isfinite(answer))
print("Definition anchor: edge of stability preview is interpreted through the objective, update, or diagnostic in Gradient Descent.")
print("\nTakeaway: a tiny verified computation is the fastest way to test intuition before scaling an optimizer experiment.")
Exercise 9 [***]: Linear Regression By Gd
- State the relevant definition for linear regression by GD.
- Compute the requested toy quantity.
- Explain the optimization diagnostic you would log in a real model-training run.
Code cell 28
# Your Solution
print("Exercise 9 scaffold: fill in the missing computation for linear regression by GD.")
answer = None
print("answer =", answer)
Code cell 29
# Solution
import numpy as np
def header(title):
print("\n" + "=" * 72)
print(title)
print("=" * 72)
def check_close(name, value, target, tol=1e-8):
ok = abs(float(value) - float(target)) <= tol
print(f"{'PASS' if ok else 'FAIL'} - {name}: value={value:.8f}, target={target:.8f}")
if not ok:
raise AssertionError(name)
def check_true(name, condition):
ok = bool(condition)
print(f"{'PASS' if ok else 'FAIL'} - {name}")
if not ok:
raise AssertionError(name)
header("Exercise 9: Linear Regression By Gd")
vector = np.array([9.0, 1.0, -1.0])
answer = float(vector[0] ** 2 + 3.0)
check_close("toy scalar computation", answer, 84.0)
check_true("finite answer", np.isfinite(answer))
print("Definition anchor: linear regression by GD is interpreted through the objective, update, or diagnostic in Gradient Descent.")
print("\nTakeaway: a tiny verified computation is the fastest way to test intuition before scaling an optimizer experiment.")
Exercise 10 [***]: Learning-Rate Diagnostics
- State the relevant definition for learning-rate diagnostics.
- Compute the requested toy quantity.
- Explain the optimization diagnostic you would log in a real model-training run.
Code cell 31
# Your Solution
print("Exercise 10 scaffold: fill in the missing computation for learning-rate diagnostics.")
answer = None
print("answer =", answer)
Code cell 32
# Solution
import numpy as np
def header(title):
print("\n" + "=" * 72)
print(title)
print("=" * 72)
def check_close(name, value, target, tol=1e-8):
ok = abs(float(value) - float(target)) <= tol
print(f"{'PASS' if ok else 'FAIL'} - {name}: value={value:.8f}, target={target:.8f}")
if not ok:
raise AssertionError(name)
def check_true(name, condition):
ok = bool(condition)
print(f"{'PASS' if ok else 'FAIL'} - {name}")
if not ok:
raise AssertionError(name)
header("Exercise 10: Learning-Rate Diagnostics")
vector = np.array([10.0, 1.0, -1.0])
answer = float(vector[0] ** 2 + 3.0)
check_close("toy scalar computation", answer, 103.0)
check_true("finite answer", np.isfinite(answer))
print("Definition anchor: learning-rate diagnostics is interpreted through the objective, update, or diagnostic in Gradient Descent.")
print("\nTakeaway: a tiny verified computation is the fastest way to test intuition before scaling an optimizer experiment.")