Theory NotebookMath for LLMs

Minimax Theorem

Game Theory / Minimax Theorem

Notesnotes.md Theory Notebooktheory.ipynb Exercises Notebookexercises.ipynb

Theory Notebook

Theory Notebook

Converted from theory.ipynb for web reading.

Minimax Theorem

The minimax theorem explains why mixed strategies can equalize worst-case guarantees in finite zero-sum games.

This notebook is the executable companion to notes.md. It uses small payoff matrices and synthetic games so every cell runs without external data.

Code cell 2

import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

try:
    import seaborn as sns
    sns.set_theme(style="whitegrid", palette="colorblind")
    HAS_SNS = True
except ImportError:
    plt.style.use("seaborn-v0_8-whitegrid")
    HAS_SNS = False

mpl.rcParams.update({
    "figure.figsize":    (10, 6),
    "figure.dpi":         120,
    "font.size":           13,
    "axes.titlesize":      15,
    "axes.labelsize":      13,
    "xtick.labelsize":     11,
    "ytick.labelsize":     11,
    "legend.fontsize":     11,
    "legend.framealpha":   0.85,
    "lines.linewidth":      2.0,
    "axes.spines.top":     False,
    "axes.spines.right":   False,
    "savefig.bbox":       "tight",
    "savefig.dpi":         150,
})
np.random.seed(42)
print("Plot setup complete.")

Code cell 3


COLORS = {
    "primary":   "#0077BB",
    "secondary": "#EE7733",
    "tertiary":  "#009988",
    "error":     "#CC3311",
    "neutral":   "#555555",
    "highlight": "#EE3377",
}

def header(title):
    print("\n" + "=" * 72)
    print(title)
    print("=" * 72)

def check_true(condition, name):
    ok = bool(condition)
    print(f"{'PASS' if ok else 'FAIL'} - {name}")
    assert ok, name

def check_close(value, target, tol=1e-8, name="value"):
    ok = abs(float(value) - float(target)) <= tol
    print(f"{'PASS' if ok else 'FAIL'} - {name}: got {float(value):.6f}, expected {float(target):.6f}")
    assert ok, name

def pure_nash(payoff_a, payoff_b):
    payoff_a = np.asarray(payoff_a, dtype=float)
    payoff_b = np.asarray(payoff_b, dtype=float)
    equilibria = []
    for i in range(payoff_a.shape[0]):
        for j in range(payoff_a.shape[1]):
            row_best = payoff_a[i, j] >= np.max(payoff_a[:, j]) - 1e-12
            col_best = payoff_b[i, j] >= np.max(payoff_b[i, :]) - 1e-12
            if row_best and col_best:
                equilibria.append((i, j))
    return equilibria

def expected_payoff(payoff, p, q):
    return float(np.asarray(p) @ np.asarray(payoff, dtype=float) @ np.asarray(q))

def grid_zero_sum_value(payoff, grid=101):
    payoff = np.asarray(payoff, dtype=float)
    ps = np.linspace(0, 1, grid)
    qs = np.linspace(0, 1, grid)
    row_values = []
    for p0 in ps:
        p = np.array([p0, 1 - p0])
        row_values.append(min(expected_payoff(payoff, p, np.array([q0, 1 - q0])) for q0 in qs))
    col_values = []
    for q0 in qs:
        q = np.array([q0, 1 - q0])
        col_values.append(max(expected_payoff(payoff, np.array([p0, 1 - p0]), q) for p0 in ps))
    return float(max(row_values)), float(min(col_values))

def fictitious_play_rps(steps=200):
    payoff = np.array([[0, -1, 1], [1, 0, -1], [-1, 1, 0]], dtype=float)
    counts_a = np.ones(3)
    counts_b = np.ones(3)
    history = []
    for _ in range(steps):
        q = counts_b / counts_b.sum()
        p = counts_a / counts_a.sum()
        a = int(np.argmax(payoff @ q))
        b = int(np.argmin(p @ payoff))
        counts_a[a] += 1
        counts_b[b] += 1
        history.append(counts_a / counts_a.sum())
    return np.array(history)

def pgd_1d(theta=1.0, x=0.25, y=1.0, eps=0.5, steps=20, alpha=0.05):
    delta = 0.0
    for _ in range(steps):
        pred = theta * (x + delta)
        grad = 2 * (pred - y) * theta
        delta = np.clip(delta + alpha * np.sign(grad), -eps, eps)
    robust_loss = (theta * (x + delta) - y) ** 2
    return float(delta), float(robust_loss)

print("Helper functions ready.")

Demo 1: adversarial decision-making

This demo turns the approved TOC concept into a small executable game.

Code cell 5

header("Demo 1 - adversarial decision-making: pure Nash equilibria")
A = np.array([[3, 0], [5, 1]], dtype=float)
B = np.array([[3, 5], [0, 1]], dtype=float)
eq = pure_nash(A, B)
print("Pure Nash equilibria:", eq)
check_true((1, 1) in eq, "defect-defect is a pure equilibrium in this payoff table")
print("Game-theory lesson: equilibrium checks unilateral deviations, not social optimality.")

Demo 2: worst-case loss

This demo turns the approved TOC concept into a small executable game.

Code cell 7

header("Demo 2 - worst-case loss: matching pennies mixed strategy")
A = np.array([[1, -1], [-1, 1]], dtype=float)
p = np.array([0.5, 0.5])
q = np.array([0.5, 0.5])
value = expected_payoff(A, p, q)
print("Expected payoff at mixed equilibrium:", value)
check_close(value, 0.0, name="matching pennies value")
print("Game-theory lesson: randomization can remove profitable pure deviations.")

Demo 3: maximin vs minimax

This demo turns the approved TOC concept into a small executable game.

Code cell 9

header("Demo 3 - maximin vs minimax: minimax grid value")
A = np.array([[1, -1], [-1, 1]], dtype=float)
lower, upper = grid_zero_sum_value(A, grid=101)
print("Grid maximin:", round(lower, 4))
print("Grid minimax:", round(upper, 4))
check_true(abs(lower - upper) < 0.05, "grid approximation nearly matches minimax value")
print("Game-theory lesson: zero-sum optimal attack and defense meet at one value.")

Demo 4: zero-sum games

This demo turns the approved TOC concept into a small executable game.

Code cell 11

header("Demo 4 - zero-sum games: fictitious play in rock-paper-scissors")
hist = fictitious_play_rps(steps=180)
final_policy = hist[-1]
print("Final empirical row policy:", np.round(final_policy, 3).tolist())
check_true(np.all(np.abs(final_policy - 1/3) < 0.15), "empirical policy approaches the mixed equilibrium region")
fig, ax = plt.subplots()
ax.plot(hist[:, 0], color=COLORS["primary"], label="Rock")
ax.plot(hist[:, 1], color=COLORS["secondary"], label="Paper")
ax.plot(hist[:, 2], color=COLORS["tertiary"], label="Scissors")
ax.set_title("Fictitious play empirical strategy")
ax.set_xlabel("Iteration")
ax.set_ylabel("Probability")
ax.legend()
fig.tight_layout()
plt.show()
plt.close(fig)
print("Game-theory lesson: repeated adaptation can approximate equilibrium behavior.")

Demo 5: why minimax appears in robust ML

This demo turns the approved TOC concept into a small executable game.

Code cell 13

header("Demo 5 - why minimax appears in robust ML: adversarial inner maximization")
delta, loss = pgd_1d(theta=1.0, x=0.25, y=1.0, eps=0.5)
clean_loss = (0.25 - 1.0) ** 2
print("Adversarial delta:", delta)
print("Clean loss:", round(clean_loss, 4))
print("Adversarial loss:", round(loss, 4))
check_true(loss >= clean_loss, "inner maximization does not reduce loss")
print("Game-theory lesson: robust learning optimizes against an adaptive perturbation.")

Demo 6: two-player zero-sum game

This demo turns the approved TOC concept into a small executable game.

Code cell 15

header("Demo 6 - two-player zero-sum game: payoff heatmap")
A = np.array([[2, -1, 0], [0, 1, -2], [-1, 3, 1]], dtype=float)
fig, ax = plt.subplots()
im = ax.imshow(A, cmap="RdBu_r")
ax.set_title("Row-player payoff matrix")
ax.set_xlabel("Column action")
ax.set_ylabel("Row action")
fig.colorbar(im, ax=ax, label="Payoff")
fig.tight_layout()
plt.show()
plt.close(fig)
best_rows = np.argmax(A, axis=0)
print("Best row responses by column action:", best_rows.tolist())
check_true(len(best_rows) == A.shape[1], "one row best response per column action")
print("Game-theory lesson: payoff geometry determines best responses.")

Demo 7: payoff matrix $A$

This demo turns the approved TOC concept into a small executable game.

Code cell 17

header("Demo 7 - payoff matrix $A$: pure Nash equilibria")
A = np.array([[3, 0], [5, 1]], dtype=float)
B = np.array([[3, 5], [0, 1]], dtype=float)
eq = pure_nash(A, B)
print("Pure Nash equilibria:", eq)
check_true((1, 1) in eq, "defect-defect is a pure equilibrium in this payoff table")
print("Game-theory lesson: equilibrium checks unilateral deviations, not social optimality.")

Demo 8: mixed strategies $\mathbf{p},\mathbf{q}$

This demo turns the approved TOC concept into a small executable game.

Code cell 19

header("Demo 8 - mixed strategies $\\mathbf{p},\\mathbf{q}$: matching pennies mixed strategy")
A = np.array([[1, -1], [-1, 1]], dtype=float)
p = np.array([0.5, 0.5])
q = np.array([0.5, 0.5])
value = expected_payoff(A, p, q)
print("Expected payoff at mixed equilibrium:", value)
check_close(value, 0.0, name="matching pennies value")
print("Game-theory lesson: randomization can remove profitable pure deviations.")

Demo 9: game value $v$

This demo turns the approved TOC concept into a small executable game.

Code cell 21

header("Demo 9 - game value $v$: minimax grid value")
A = np.array([[1, -1], [-1, 1]], dtype=float)
lower, upper = grid_zero_sum_value(A, grid=101)
print("Grid maximin:", round(lower, 4))
print("Grid minimax:", round(upper, 4))
check_true(abs(lower - upper) < 0.05, "grid approximation nearly matches minimax value")
print("Game-theory lesson: zero-sum optimal attack and defense meet at one value.")

Demo 10: saddle point

This demo turns the approved TOC concept into a small executable game.

Code cell 23

header("Demo 10 - saddle point: fictitious play in rock-paper-scissors")
hist = fictitious_play_rps(steps=180)
final_policy = hist[-1]
print("Final empirical row policy:", np.round(final_policy, 3).tolist())
check_true(np.all(np.abs(final_policy - 1/3) < 0.15), "empirical policy approaches the mixed equilibrium region")
fig, ax = plt.subplots()
ax.plot(hist[:, 0], color=COLORS["primary"], label="Rock")
ax.plot(hist[:, 1], color=COLORS["secondary"], label="Paper")
ax.plot(hist[:, 2], color=COLORS["tertiary"], label="Scissors")
ax.set_title("Fictitious play empirical strategy")
ax.set_xlabel("Iteration")
ax.set_ylabel("Probability")
ax.legend()
fig.tight_layout()
plt.show()
plt.close(fig)
print("Game-theory lesson: repeated adaptation can approximate equilibrium behavior.")

Demo 11: pure saddle points

This demo turns the approved TOC concept into a small executable game.

Code cell 25

header("Demo 11 - pure saddle points: adversarial inner maximization")
delta, loss = pgd_1d(theta=1.0, x=0.25, y=1.0, eps=0.5)
clean_loss = (0.25 - 1.0) ** 2
print("Adversarial delta:", delta)
print("Clean loss:", round(clean_loss, 4))
print("Adversarial loss:", round(loss, 4))
check_true(loss >= clean_loss, "inner maximization does not reduce loss")
print("Game-theory lesson: robust learning optimizes against an adaptive perturbation.")

Demo 12: mixed strategies

This demo turns the approved TOC concept into a small executable game.

Code cell 27

header("Demo 12 - mixed strategies: payoff heatmap")
A = np.array([[2, -1, 0], [0, 1, -2], [-1, 3, 1]], dtype=float)
fig, ax = plt.subplots()
im = ax.imshow(A, cmap="RdBu_r")
ax.set_title("Row-player payoff matrix")
ax.set_xlabel("Column action")
ax.set_ylabel("Row action")
fig.colorbar(im, ax=ax, label="Payoff")
fig.tight_layout()
plt.show()
plt.close(fig)
best_rows = np.argmax(A, axis=0)
print("Best row responses by column action:", best_rows.tolist())
check_true(len(best_rows) == A.shape[1], "one row best response per column action")
print("Game-theory lesson: payoff geometry determines best responses.")

Demo 13: row player LP

This demo turns the approved TOC concept into a small executable game.

Code cell 29

header("Demo 13 - row player LP: pure Nash equilibria")
A = np.array([[3, 0], [5, 1]], dtype=float)
B = np.array([[3, 5], [0, 1]], dtype=float)
eq = pure_nash(A, B)
print("Pure Nash equilibria:", eq)
check_true((1, 1) in eq, "defect-defect is a pure equilibrium in this payoff table")
print("Game-theory lesson: equilibrium checks unilateral deviations, not social optimality.")

Demo 14: column player dual

This demo turns the approved TOC concept into a small executable game.

Code cell 31

header("Demo 14 - column player dual: matching pennies mixed strategy")
A = np.array([[1, -1], [-1, 1]], dtype=float)
p = np.array([0.5, 0.5])
q = np.array([0.5, 0.5])
value = expected_payoff(A, p, q)
print("Expected payoff at mixed equilibrium:", value)
check_close(value, 0.0, name="matching pennies value")
print("Game-theory lesson: randomization can remove profitable pure deviations.")

Demo 15: rock-paper-scissors

This demo turns the approved TOC concept into a small executable game.

Code cell 33

header("Demo 15 - rock-paper-scissors: minimax grid value")
A = np.array([[1, -1], [-1, 1]], dtype=float)
lower, upper = grid_zero_sum_value(A, grid=101)
print("Grid maximin:", round(lower, 4))
print("Grid minimax:", round(upper, 4))
check_true(abs(lower - upper) < 0.05, "grid approximation nearly matches minimax value")
print("Game-theory lesson: zero-sum optimal attack and defense meet at one value.")

Demo 16: theorem statement

This demo turns the approved TOC concept into a small executable game.

Code cell 35

header("Demo 16 - theorem statement: fictitious play in rock-paper-scissors")
hist = fictitious_play_rps(steps=180)
final_policy = hist[-1]
print("Final empirical row policy:", np.round(final_policy, 3).tolist())
check_true(np.all(np.abs(final_policy - 1/3) < 0.15), "empirical policy approaches the mixed equilibrium region")
fig, ax = plt.subplots()
ax.plot(hist[:, 0], color=COLORS["primary"], label="Rock")
ax.plot(hist[:, 1], color=COLORS["secondary"], label="Paper")
ax.plot(hist[:, 2], color=COLORS["tertiary"], label="Scissors")
ax.set_title("Fictitious play empirical strategy")
ax.set_xlabel("Iteration")
ax.set_ylabel("Probability")
ax.legend()
fig.tight_layout()
plt.show()
plt.close(fig)
print("Game-theory lesson: repeated adaptation can approximate equilibrium behavior.")

Demo 17: geometric simplex intuition

This demo turns the approved TOC concept into a small executable game.

Code cell 37

header("Demo 17 - geometric simplex intuition: adversarial inner maximization")
delta, loss = pgd_1d(theta=1.0, x=0.25, y=1.0, eps=0.5)
clean_loss = (0.25 - 1.0) ** 2
print("Adversarial delta:", delta)
print("Clean loss:", round(clean_loss, 4))
print("Adversarial loss:", round(loss, 4))
check_true(loss >= clean_loss, "inner maximization does not reduce loss")
print("Game-theory lesson: robust learning optimizes against an adaptive perturbation.")

Demo 18: LP duality proof sketch

This demo turns the approved TOC concept into a small executable game.

Code cell 39

header("Demo 18 - LP duality proof sketch: payoff heatmap")
A = np.array([[2, -1, 0], [0, 1, -2], [-1, 3, 1]], dtype=float)
fig, ax = plt.subplots()
im = ax.imshow(A, cmap="RdBu_r")
ax.set_title("Row-player payoff matrix")
ax.set_xlabel("Column action")
ax.set_ylabel("Row action")
fig.colorbar(im, ax=ax, label="Payoff")
fig.tight_layout()
plt.show()
plt.close(fig)
best_rows = np.argmax(A, axis=0)
print("Best row responses by column action:", best_rows.tolist())
check_true(len(best_rows) == A.shape[1], "one row best response per column action")
print("Game-theory lesson: payoff geometry determines best responses.")

Demo 19: relation to Nash equilibrium

This demo turns the approved TOC concept into a small executable game.

Code cell 41

header("Demo 19 - relation to Nash equilibrium: pure Nash equilibria")
A = np.array([[3, 0], [5, 1]], dtype=float)
B = np.array([[3, 5], [0, 1]], dtype=float)
eq = pure_nash(A, B)
print("Pure Nash equilibria:", eq)
check_true((1, 1) in eq, "defect-defect is a pure equilibrium in this payoff table")
print("Game-theory lesson: equilibrium checks unilateral deviations, not social optimality.")

Demo 20: finite vs continuous games preview

This demo turns the approved TOC concept into a small executable game.

Code cell 43

header("Demo 20 - finite vs continuous games preview: matching pennies mixed strategy")
A = np.array([[1, -1], [-1, 1]], dtype=float)
p = np.array([0.5, 0.5])
q = np.array([0.5, 0.5])
value = expected_payoff(A, p, q)
print("Expected payoff at mixed equilibrium:", value)
check_close(value, 0.0, name="matching pennies value")
print("Game-theory lesson: randomization can remove profitable pure deviations.")

Demo 21: linear programming solution

This demo turns the approved TOC concept into a small executable game.

Code cell 45

header("Demo 21 - linear programming solution: minimax grid value")
A = np.array([[1, -1], [-1, 1]], dtype=float)
lower, upper = grid_zero_sum_value(A, grid=101)
print("Grid maximin:", round(lower, 4))
print("Grid minimax:", round(upper, 4))
check_true(abs(lower - upper) < 0.05, "grid approximation nearly matches minimax value")
print("Game-theory lesson: zero-sum optimal attack and defense meet at one value.")

Demo 22: no-regret dynamics

This demo turns the approved TOC concept into a small executable game.

Code cell 47

header("Demo 22 - no-regret dynamics: fictitious play in rock-paper-scissors")
hist = fictitious_play_rps(steps=180)
final_policy = hist[-1]
print("Final empirical row policy:", np.round(final_policy, 3).tolist())
check_true(np.all(np.abs(final_policy - 1/3) < 0.15), "empirical policy approaches the mixed equilibrium region")
fig, ax = plt.subplots()
ax.plot(hist[:, 0], color=COLORS["primary"], label="Rock")
ax.plot(hist[:, 1], color=COLORS["secondary"], label="Paper")
ax.plot(hist[:, 2], color=COLORS["tertiary"], label="Scissors")
ax.set_title("Fictitious play empirical strategy")
ax.set_xlabel("Iteration")
ax.set_ylabel("Probability")
ax.legend()
fig.tight_layout()
plt.show()
plt.close(fig)
print("Game-theory lesson: repeated adaptation can approximate equilibrium behavior.")

Demo 23: multiplicative weights preview

This demo turns the approved TOC concept into a small executable game.

Code cell 49

header("Demo 23 - multiplicative weights preview: adversarial inner maximization")
delta, loss = pgd_1d(theta=1.0, x=0.25, y=1.0, eps=0.5)
clean_loss = (0.25 - 1.0) ** 2
print("Adversarial delta:", delta)
print("Clean loss:", round(clean_loss, 4))
print("Adversarial loss:", round(loss, 4))
check_true(loss >= clean_loss, "inner maximization does not reduce loss")
print("Game-theory lesson: robust learning optimizes against an adaptive perturbation.")

Demo 24: exploitability

This demo turns the approved TOC concept into a small executable game.

Code cell 51

header("Demo 24 - exploitability: payoff heatmap")
A = np.array([[2, -1, 0], [0, 1, -2], [-1, 3, 1]], dtype=float)
fig, ax = plt.subplots()
im = ax.imshow(A, cmap="RdBu_r")
ax.set_title("Row-player payoff matrix")
ax.set_xlabel("Column action")
ax.set_ylabel("Row action")
fig.colorbar(im, ax=ax, label="Payoff")
fig.tight_layout()
plt.show()
plt.close(fig)
best_rows = np.argmax(A, axis=0)
print("Best row responses by column action:", best_rows.tolist())
check_true(len(best_rows) == A.shape[1], "one row best response per column action")
print("Game-theory lesson: payoff geometry determines best responses.")

Demo 25: approximate equilibrium

This demo turns the approved TOC concept into a small executable game.

Code cell 53

header("Demo 25 - approximate equilibrium: pure Nash equilibria")
A = np.array([[3, 0], [5, 1]], dtype=float)
B = np.array([[3, 5], [0, 1]], dtype=float)
eq = pure_nash(A, B)
print("Pure Nash equilibria:", eq)
check_true((1, 1) in eq, "defect-defect is a pure equilibrium in this payoff table")
print("Game-theory lesson: equilibrium checks unilateral deviations, not social optimality.")

Previous lesson Next lesson