Exercises Notebook

Converted from exercises.ipynb for web reading.

Exercises: Multi-Agent Systems

There are 10 exercises. Exercises 1-3 are mechanics, 4-6 are theory, and 7-10 connect game theory to AI systems.

Code cell 2

import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

try:
    import seaborn as sns
    sns.set_theme(style="whitegrid", palette="colorblind")
    HAS_SNS = True
except ImportError:
    plt.style.use("seaborn-v0_8-whitegrid")
    HAS_SNS = False

mpl.rcParams.update({
    "figure.figsize":    (10, 6),
    "figure.dpi":         120,
    "font.size":           13,
    "axes.titlesize":      15,
    "axes.labelsize":      13,
    "xtick.labelsize":     11,
    "ytick.labelsize":     11,
    "legend.fontsize":     11,
    "legend.framealpha":   0.85,
    "lines.linewidth":      2.0,
    "axes.spines.top":     False,
    "axes.spines.right":   False,
    "savefig.bbox":       "tight",
    "savefig.dpi":         150,
})
np.random.seed(42)
print("Plot setup complete.")

Code cell 3


COLORS = {
    "primary":   "#0077BB",
    "secondary": "#EE7733",
    "tertiary":  "#009988",
    "error":     "#CC3311",
    "neutral":   "#555555",
    "highlight": "#EE3377",
}

def header(title):
    print("\n" + "=" * 72)
    print(title)
    print("=" * 72)

def check_true(condition, name):
    ok = bool(condition)
    print(f"{'PASS' if ok else 'FAIL'} - {name}")
    assert ok, name

def check_close(value, target, tol=1e-8, name="value"):
    ok = abs(float(value) - float(target)) <= tol
    print(f"{'PASS' if ok else 'FAIL'} - {name}: got {float(value):.6f}, expected {float(target):.6f}")
    assert ok, name

def pure_nash(payoff_a, payoff_b):
    payoff_a = np.asarray(payoff_a, dtype=float)
    payoff_b = np.asarray(payoff_b, dtype=float)
    equilibria = []
    for i in range(payoff_a.shape[0]):
        for j in range(payoff_a.shape[1]):
            row_best = payoff_a[i, j] >= np.max(payoff_a[:, j]) - 1e-12
            col_best = payoff_b[i, j] >= np.max(payoff_b[i, :]) - 1e-12
            if row_best and col_best:
                equilibria.append((i, j))
    return equilibria

def expected_payoff(payoff, p, q):
    return float(np.asarray(p) @ np.asarray(payoff, dtype=float) @ np.asarray(q))

def grid_zero_sum_value(payoff, grid=101):
    payoff = np.asarray(payoff, dtype=float)
    ps = np.linspace(0, 1, grid)
    qs = np.linspace(0, 1, grid)
    row_values = []
    for p0 in ps:
        p = np.array([p0, 1 - p0])
        row_values.append(min(expected_payoff(payoff, p, np.array([q0, 1 - q0])) for q0 in qs))
    col_values = []
    for q0 in qs:
        q = np.array([q0, 1 - q0])
        col_values.append(max(expected_payoff(payoff, np.array([p0, 1 - p0]), q) for p0 in ps))
    return float(max(row_values)), float(min(col_values))

def fictitious_play_rps(steps=200):
    payoff = np.array([[0, -1, 1], [1, 0, -1], [-1, 1, 0]], dtype=float)
    counts_a = np.ones(3)
    counts_b = np.ones(3)
    history = []
    for _ in range(steps):
        q = counts_b / counts_b.sum()
        p = counts_a / counts_a.sum()
        a = int(np.argmax(payoff @ q))
        b = int(np.argmin(p @ payoff))
        counts_a[a] += 1
        counts_b[b] += 1
        history.append(counts_a / counts_a.sum())
    return np.array(history)

def pgd_1d(theta=1.0, x=0.25, y=1.0, eps=0.5, steps=20, alpha=0.05):
    delta = 0.0
    for _ in range(steps):
        pred = theta * (x + delta)
        grad = 2 * (pred - y) * theta
        delta = np.clip(delta + alpha * np.sign(grad), -eps, eps)
    robust_loss = (theta * (x + delta) - y) ** 2
    return float(delta), float(robust_loss)

print("Helper functions ready.")

Exercise 1: many learners sharing one environment (*)

State the game, compute a strategic quantity, and interpret the AI relevance.

Code cell 5

# Your Solution - Exercise 1
answer = None
print("Your answer placeholder:", answer)

Code cell 6

# Solution
header("Exercise 1: Multi-Agent Systems")
A = np.array([[2, 0], [3, 1]], dtype=float)
B = np.array([[2, 3], [0, 1]], dtype=float)
eq = pure_nash(A, B)
check_true((1, 1) in eq, "bottom-right is stable")
print("Pure equilibria:", eq)
print("\nTakeaway: game-theoretic calculations ask how policies behave when other agents adapt.")

Exercise 2: nonstationarity (*)

State the game, compute a strategic quantity, and interpret the AI relevance.

Code cell 8

# Your Solution - Exercise 2
answer = None
print("Your answer placeholder:", answer)

Code cell 9

# Solution
header("Exercise 2: Multi-Agent Systems")
A = np.array([[1, -1], [-1, 1]], dtype=float)
p = q = np.array([0.5, 0.5])
value = expected_payoff(A, p, q)
check_close(value, 0.0, name="zero-sum value")
print("Value:", value)
print("\nTakeaway: game-theoretic calculations ask how policies behave when other agents adapt.")

Exercise 3: cooperation vs competition (*)

State the game, compute a strategic quantity, and interpret the AI relevance.

Code cell 11

# Your Solution - Exercise 3
answer = None
print("Your answer placeholder:", answer)

Code cell 12

# Solution
header("Exercise 3: Multi-Agent Systems")
A = np.array([[1, -1], [-1, 1]], dtype=float)
lower, upper = grid_zero_sum_value(A, grid=51)
check_true(abs(lower - upper) < 0.08, "grid minimax gap is small")
print("Lower and upper values:", round(lower, 3), round(upper, 3))
print("\nTakeaway: game-theoretic calculations ask how policies behave when other agents adapt.")

Exercise 4: communication and coordination (**)

State the game, compute a strategic quantity, and interpret the AI relevance.

Code cell 14

# Your Solution - Exercise 4
answer = None
print("Your answer placeholder:", answer)

Code cell 15

# Solution
header("Exercise 4: Multi-Agent Systems")
hist = fictitious_play_rps(steps=90)
final_policy = hist[-1]
check_true(abs(final_policy.sum() - 1.0) < 1e-8, "policy sums to one")
print("Final policy:", np.round(final_policy, 3).tolist())
print("\nTakeaway: game-theoretic calculations ask how policies behave when other agents adapt.")

Exercise 5: emergent behavior in AI systems (**)

State the game, compute a strategic quantity, and interpret the AI relevance.

Code cell 17

# Your Solution - Exercise 5
answer = None
print("Your answer placeholder:", answer)

Code cell 18

# Solution
header("Exercise 5: Multi-Agent Systems")
delta, loss = pgd_1d(theta=1.0, x=0.25, y=1.0, eps=0.5)
check_true(abs(delta) <= 0.5 + 1e-12, "attack respects threat set")
print("Delta and robust loss:", round(delta, 3), round(loss, 4))
print("\nTakeaway: game-theoretic calculations ask how policies behave when other agents adapt.")

Exercise 6: agent set $N$ (**)

State the game, compute a strategic quantity, and interpret the AI relevance.

Code cell 20

# Your Solution - Exercise 6
answer = None
print("Your answer placeholder:", answer)

Code cell 21

# Solution
header("Exercise 6: Multi-Agent Systems")
A = np.array([[2, 0], [3, 1]], dtype=float)
B = np.array([[2, 3], [0, 1]], dtype=float)
eq = pure_nash(A, B)
check_true((1, 1) in eq, "bottom-right is stable")
print("Pure equilibria:", eq)
print("\nTakeaway: game-theoretic calculations ask how policies behave when other agents adapt.")

Exercise 7: joint action $\mathbf{a}$ (***)

State the game, compute a strategic quantity, and interpret the AI relevance.

Code cell 23

# Your Solution - Exercise 7
answer = None
print("Your answer placeholder:", answer)

Code cell 24

# Solution
header("Exercise 7: Multi-Agent Systems")
A = np.array([[1, -1], [-1, 1]], dtype=float)
p = q = np.array([0.5, 0.5])
value = expected_payoff(A, p, q)
check_close(value, 0.0, name="zero-sum value")
print("Value:", value)
print("\nTakeaway: game-theoretic calculations ask how policies behave when other agents adapt.")

Exercise 8: reward vector $\mathbf{r}$ (***)

State the game, compute a strategic quantity, and interpret the AI relevance.

Code cell 26

# Your Solution - Exercise 8
answer = None
print("Your answer placeholder:", answer)

Code cell 27

# Solution
header("Exercise 8: Multi-Agent Systems")
A = np.array([[1, -1], [-1, 1]], dtype=float)
lower, upper = grid_zero_sum_value(A, grid=51)
check_true(abs(lower - upper) < 0.08, "grid minimax gap is small")
print("Lower and upper values:", round(lower, 3), round(upper, 3))
print("\nTakeaway: game-theoretic calculations ask how policies behave when other agents adapt.")

Exercise 9: Markov game (***)

State the game, compute a strategic quantity, and interpret the AI relevance.

Code cell 29

# Your Solution - Exercise 9
answer = None
print("Your answer placeholder:", answer)

Code cell 30

# Solution
header("Exercise 9: Multi-Agent Systems")
hist = fictitious_play_rps(steps=90)
final_policy = hist[-1]
check_true(abs(final_policy.sum() - 1.0) < 1e-8, "policy sums to one")
print("Final policy:", np.round(final_policy, 3).tolist())
print("\nTakeaway: game-theoretic calculations ask how policies behave when other agents adapt.")

Exercise 10: joint policy $\boldsymbol{\pi}$ (***)

State the game, compute a strategic quantity, and interpret the AI relevance.

Code cell 32

# Your Solution - Exercise 10
answer = None
print("Your answer placeholder:", answer)

Code cell 33

# Solution
header("Exercise 10: Multi-Agent Systems")
delta, loss = pgd_1d(theta=1.0, x=0.25, y=1.0, eps=0.5)
check_true(abs(delta) <= 0.5 + 1e-12, "attack respects threat set")
print("Delta and robust loss:", round(delta, 3), round(loss, 4))
print("\nTakeaway: game-theoretic calculations ask how policies behave when other agents adapt.")

Multi Agent Systems

Exercises Notebook

Exercises: Multi-Agent Systems

Code cell 2

Code cell 3

Exercise 1: many learners sharing one environment (*)

Code cell 5

Code cell 6

Exercise 2: nonstationarity (*)

Code cell 8

Code cell 9

Exercise 3: cooperation vs competition (*)

Code cell 11

Code cell 12

Exercise 4: communication and coordination (**)

Code cell 14

Code cell 15

Exercise 5: emergent behavior in AI systems (**)

Code cell 17

Code cell 18

Exercise 6: agent set NNN (**)

Code cell 20

Code cell 21

Exercise 7: joint action a\mathbf{a}a (***)

Code cell 23

Code cell 24

Exercise 8: reward vector r\mathbf{r}r (***)

Code cell 26

Code cell 27

Exercise 9: Markov game (***)

Code cell 29

Code cell 30

Exercise 10: joint policy π\boldsymbol{\pi}π (***)

Code cell 32

Code cell 33

Exercise 6: agent set $N$ (**)

Exercise 7: joint action $\mathbf{a}$ (***)

Exercise 8: reward vector $\mathbf{r}$ (***)

Exercise 10: joint policy $\boldsymbol{\pi}$ (***)