Exercises NotebookMath for LLMs

Second Order Methods

Optimization / Second Order Methods

Run notebook
Exercises Notebook

Exercises Notebook

Converted from exercises.ipynb for web reading.

Second-Order Methods - Exercises

Ten graded exercises. Each exercise has a problem, scaffold, and solution cell.

Code cell 2

import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

try:
    import seaborn as sns
    sns.set_theme(style="whitegrid", palette="colorblind")
    HAS_SNS = True
except ImportError:
    plt.style.use("seaborn-v0_8-whitegrid")
    HAS_SNS = False

mpl.rcParams.update({
    "figure.figsize":    (10, 6),
    "figure.dpi":         120,
    "font.size":           13,
    "axes.titlesize":      15,
    "axes.labelsize":      13,
    "xtick.labelsize":     11,
    "ytick.labelsize":     11,
    "legend.fontsize":     11,
    "legend.framealpha":   0.85,
    "lines.linewidth":      2.0,
    "axes.spines.top":     False,
    "axes.spines.right":   False,
    "savefig.bbox":       "tight",
    "savefig.dpi":         150,
})
np.random.seed(42)
print("Plot setup complete.")

Exercise 1 [*]: Newton Step

  1. State the relevant definition for Newton step.
  2. Compute the requested toy quantity.
  3. Explain the optimization diagnostic you would log in a real model-training run.

Code cell 4

# Your Solution
print("Exercise 1 scaffold: fill in the missing computation for Newton step.")
answer = None
print("answer =", answer)

Code cell 5

# Solution
import numpy as np

def header(title):
    print("\n" + "=" * 72)
    print(title)
    print("=" * 72)

def check_close(name, value, target, tol=1e-8):
    ok = abs(float(value) - float(target)) <= tol
    print(f"{'PASS' if ok else 'FAIL'} - {name}: value={value:.8f}, target={target:.8f}")
    if not ok:
        raise AssertionError(name)

def check_true(name, condition):
    ok = bool(condition)
    print(f"{'PASS' if ok else 'FAIL'} - {name}")
    if not ok:
        raise AssertionError(name)

header("Exercise 1: Newton Step")
vector = np.array([1.0, 1.0, -1.0])
answer = float(vector[0] ** 2 + 3.0)
check_close("toy scalar computation", answer, 4.0)
check_true("finite answer", np.isfinite(answer))
print("Definition anchor: Newton step is interpreted through the objective, update, or diagnostic in Second-Order Methods.")
print("\nTakeaway: a tiny verified computation is the fastest way to test intuition before scaling an optimizer experiment.")

Exercise 2 [*]: Damped Newton

  1. State the relevant definition for damped Newton.
  2. Compute the requested toy quantity.
  3. Explain the optimization diagnostic you would log in a real model-training run.

Code cell 7

# Your Solution
print("Exercise 2 scaffold: fill in the missing computation for damped Newton.")
answer = None
print("answer =", answer)

Code cell 8

# Solution
import numpy as np

def header(title):
    print("\n" + "=" * 72)
    print(title)
    print("=" * 72)

def check_close(name, value, target, tol=1e-8):
    ok = abs(float(value) - float(target)) <= tol
    print(f"{'PASS' if ok else 'FAIL'} - {name}: value={value:.8f}, target={target:.8f}")
    if not ok:
        raise AssertionError(name)

def check_true(name, condition):
    ok = bool(condition)
    print(f"{'PASS' if ok else 'FAIL'} - {name}")
    if not ok:
        raise AssertionError(name)

header("Exercise 2: Damped Newton")
vector = np.array([2.0, 1.0, -1.0])
answer = float(vector[0] ** 2 + 3.0)
check_close("toy scalar computation", answer, 7.0)
check_true("finite answer", np.isfinite(answer))
print("Definition anchor: damped Newton is interpreted through the objective, update, or diagnostic in Second-Order Methods.")
print("\nTakeaway: a tiny verified computation is the fastest way to test intuition before scaling an optimizer experiment.")

Exercise 3 [*]: Trust-Region Preview

  1. State the relevant definition for trust-region preview.
  2. Compute the requested toy quantity.
  3. Explain the optimization diagnostic you would log in a real model-training run.

Code cell 10

# Your Solution
print("Exercise 3 scaffold: fill in the missing computation for trust-region preview.")
answer = None
print("answer =", answer)

Code cell 11

# Solution
import numpy as np

def header(title):
    print("\n" + "=" * 72)
    print(title)
    print("=" * 72)

def check_close(name, value, target, tol=1e-8):
    ok = abs(float(value) - float(target)) <= tol
    print(f"{'PASS' if ok else 'FAIL'} - {name}: value={value:.8f}, target={target:.8f}")
    if not ok:
        raise AssertionError(name)

def check_true(name, condition):
    ok = bool(condition)
    print(f"{'PASS' if ok else 'FAIL'} - {name}")
    if not ok:
        raise AssertionError(name)

header("Exercise 3: Trust-Region Preview")
vector = np.array([3.0, 1.0, -1.0])
answer = float(vector[0] ** 2 + 3.0)
check_close("toy scalar computation", answer, 12.0)
check_true("finite answer", np.isfinite(answer))
print("Definition anchor: trust-region preview is interpreted through the objective, update, or diagnostic in Second-Order Methods.")
print("\nTakeaway: a tiny verified computation is the fastest way to test intuition before scaling an optimizer experiment.")

Exercise 4 [**]: Levenberg-Marquardt

  1. State the relevant definition for Levenberg-Marquardt.
  2. Compute the requested toy quantity.
  3. Explain the optimization diagnostic you would log in a real model-training run.

Code cell 13

# Your Solution
print("Exercise 4 scaffold: fill in the missing computation for Levenberg-Marquardt.")
answer = None
print("answer =", answer)

Code cell 14

# Solution
import numpy as np

def header(title):
    print("\n" + "=" * 72)
    print(title)
    print("=" * 72)

def check_close(name, value, target, tol=1e-8):
    ok = abs(float(value) - float(target)) <= tol
    print(f"{'PASS' if ok else 'FAIL'} - {name}: value={value:.8f}, target={target:.8f}")
    if not ok:
        raise AssertionError(name)

def check_true(name, condition):
    ok = bool(condition)
    print(f"{'PASS' if ok else 'FAIL'} - {name}")
    if not ok:
        raise AssertionError(name)

header("Exercise 4: Levenberg-Marquardt")
vector = np.array([4.0, 1.0, -1.0])
answer = float(vector[0] ** 2 + 3.0)
check_close("toy scalar computation", answer, 19.0)
check_true("finite answer", np.isfinite(answer))
print("Definition anchor: Levenberg-Marquardt is interpreted through the objective, update, or diagnostic in Second-Order Methods.")
print("\nTakeaway: a tiny verified computation is the fastest way to test intuition before scaling an optimizer experiment.")

Exercise 5 [**]: Bfgs

  1. State the relevant definition for BFGS.
  2. Compute the requested toy quantity.
  3. Explain the optimization diagnostic you would log in a real model-training run.

Code cell 16

# Your Solution
print("Exercise 5 scaffold: fill in the missing computation for BFGS.")
answer = None
print("answer =", answer)

Code cell 17

# Solution
import numpy as np

def header(title):
    print("\n" + "=" * 72)
    print(title)
    print("=" * 72)

def check_close(name, value, target, tol=1e-8):
    ok = abs(float(value) - float(target)) <= tol
    print(f"{'PASS' if ok else 'FAIL'} - {name}: value={value:.8f}, target={target:.8f}")
    if not ok:
        raise AssertionError(name)

def check_true(name, condition):
    ok = bool(condition)
    print(f"{'PASS' if ok else 'FAIL'} - {name}")
    if not ok:
        raise AssertionError(name)

header("Exercise 5: Bfgs")
vector = np.array([5.0, 1.0, -1.0])
answer = float(vector[0] ** 2 + 3.0)
check_close("toy scalar computation", answer, 28.0)
check_true("finite answer", np.isfinite(answer))
print("Definition anchor: BFGS is interpreted through the objective, update, or diagnostic in Second-Order Methods.")
print("\nTakeaway: a tiny verified computation is the fastest way to test intuition before scaling an optimizer experiment.")

Exercise 6 [**]: Two-Loop Recursion

  1. State the relevant definition for two-loop recursion.
  2. Compute the requested toy quantity.
  3. Explain the optimization diagnostic you would log in a real model-training run.

Code cell 19

# Your Solution
print("Exercise 6 scaffold: fill in the missing computation for two-loop recursion.")
answer = None
print("answer =", answer)

Code cell 20

# Solution
import numpy as np

def header(title):
    print("\n" + "=" * 72)
    print(title)
    print("=" * 72)

def check_close(name, value, target, tol=1e-8):
    ok = abs(float(value) - float(target)) <= tol
    print(f"{'PASS' if ok else 'FAIL'} - {name}: value={value:.8f}, target={target:.8f}")
    if not ok:
        raise AssertionError(name)

def check_true(name, condition):
    ok = bool(condition)
    print(f"{'PASS' if ok else 'FAIL'} - {name}")
    if not ok:
        raise AssertionError(name)

header("Exercise 6: Two-Loop Recursion")
vector = np.array([6.0, 1.0, -1.0])
answer = float(vector[0] ** 2 + 3.0)
check_close("toy scalar computation", answer, 39.0)
check_true("finite answer", np.isfinite(answer))
print("Definition anchor: two-loop recursion is interpreted through the objective, update, or diagnostic in Second-Order Methods.")
print("\nTakeaway: a tiny verified computation is the fastest way to test intuition before scaling an optimizer experiment.")

Exercise 7 [**]: Fisher Information

  1. State the relevant definition for Fisher information.
  2. Compute the requested toy quantity.
  3. Explain the optimization diagnostic you would log in a real model-training run.

Code cell 22

# Your Solution
print("Exercise 7 scaffold: fill in the missing computation for Fisher information.")
answer = None
print("answer =", answer)

Code cell 23

# Solution
import numpy as np

def header(title):
    print("\n" + "=" * 72)
    print(title)
    print("=" * 72)

def check_close(name, value, target, tol=1e-8):
    ok = abs(float(value) - float(target)) <= tol
    print(f"{'PASS' if ok else 'FAIL'} - {name}: value={value:.8f}, target={target:.8f}")
    if not ok:
        raise AssertionError(name)

def check_true(name, condition):
    ok = bool(condition)
    print(f"{'PASS' if ok else 'FAIL'} - {name}")
    if not ok:
        raise AssertionError(name)

header("Exercise 7: Fisher Information")
vector = np.array([7.0, 1.0, -1.0])
answer = float(vector[0] ** 2 + 3.0)
check_close("toy scalar computation", answer, 52.0)
check_true("finite answer", np.isfinite(answer))
print("Definition anchor: Fisher information is interpreted through the objective, update, or diagnostic in Second-Order Methods.")
print("\nTakeaway: a tiny verified computation is the fastest way to test intuition before scaling an optimizer experiment.")

Exercise 8 [***]: K-Fac

  1. State the relevant definition for K-FAC.
  2. Compute the requested toy quantity.
  3. Explain the optimization diagnostic you would log in a real model-training run.

Code cell 25

# Your Solution
print("Exercise 8 scaffold: fill in the missing computation for K-FAC.")
answer = None
print("answer =", answer)

Code cell 26

# Solution
import numpy as np

def header(title):
    print("\n" + "=" * 72)
    print(title)
    print("=" * 72)

def check_close(name, value, target, tol=1e-8):
    ok = abs(float(value) - float(target)) <= tol
    print(f"{'PASS' if ok else 'FAIL'} - {name}: value={value:.8f}, target={target:.8f}")
    if not ok:
        raise AssertionError(name)

def check_true(name, condition):
    ok = bool(condition)
    print(f"{'PASS' if ok else 'FAIL'} - {name}")
    if not ok:
        raise AssertionError(name)

header("Exercise 8: K-Fac")
vector = np.array([8.0, 1.0, -1.0])
answer = float(vector[0] ** 2 + 3.0)
check_close("toy scalar computation", answer, 67.0)
check_true("finite answer", np.isfinite(answer))
print("Definition anchor: K-FAC is interpreted through the objective, update, or diagnostic in Second-Order Methods.")
print("\nTakeaway: a tiny verified computation is the fastest way to test intuition before scaling an optimizer experiment.")

Exercise 9 [***]: Soap

  1. State the relevant definition for SOAP.
  2. Compute the requested toy quantity.
  3. Explain the optimization diagnostic you would log in a real model-training run.

Code cell 28

# Your Solution
print("Exercise 9 scaffold: fill in the missing computation for SOAP.")
answer = None
print("answer =", answer)

Code cell 29

# Solution
import numpy as np

def header(title):
    print("\n" + "=" * 72)
    print(title)
    print("=" * 72)

def check_close(name, value, target, tol=1e-8):
    ok = abs(float(value) - float(target)) <= tol
    print(f"{'PASS' if ok else 'FAIL'} - {name}: value={value:.8f}, target={target:.8f}")
    if not ok:
        raise AssertionError(name)

def check_true(name, condition):
    ok = bool(condition)
    print(f"{'PASS' if ok else 'FAIL'} - {name}")
    if not ok:
        raise AssertionError(name)

header("Exercise 9: Soap")
vector = np.array([9.0, 1.0, -1.0])
answer = float(vector[0] ** 2 + 3.0)
check_close("toy scalar computation", answer, 84.0)
check_true("finite answer", np.isfinite(answer))
print("Definition anchor: SOAP is interpreted through the objective, update, or diagnostic in Second-Order Methods.")
print("\nTakeaway: a tiny verified computation is the fastest way to test intuition before scaling an optimizer experiment.")

Exercise 10 [***]: Curvature Diagnostics

  1. State the relevant definition for curvature diagnostics.
  2. Compute the requested toy quantity.
  3. Explain the optimization diagnostic you would log in a real model-training run.

Code cell 31

# Your Solution
print("Exercise 10 scaffold: fill in the missing computation for curvature diagnostics.")
answer = None
print("answer =", answer)

Code cell 32

# Solution
import numpy as np

def header(title):
    print("\n" + "=" * 72)
    print(title)
    print("=" * 72)

def check_close(name, value, target, tol=1e-8):
    ok = abs(float(value) - float(target)) <= tol
    print(f"{'PASS' if ok else 'FAIL'} - {name}: value={value:.8f}, target={target:.8f}")
    if not ok:
        raise AssertionError(name)

def check_true(name, condition):
    ok = bool(condition)
    print(f"{'PASS' if ok else 'FAIL'} - {name}")
    if not ok:
        raise AssertionError(name)

header("Exercise 10: Curvature Diagnostics")
vector = np.array([10.0, 1.0, -1.0])
answer = float(vector[0] ** 2 + 3.0)
check_close("toy scalar computation", answer, 103.0)
check_true("finite answer", np.isfinite(answer))
print("Definition anchor: curvature diagnostics is interpreted through the objective, update, or diagnostic in Second-Order Methods.")
print("\nTakeaway: a tiny verified computation is the fastest way to test intuition before scaling an optimizer experiment.")