Exercises Notebook
Converted from
exercises.ipynbfor web reading.
Exercises: Embedding Space Math
There are 10 exercises. Exercises 1-3 cover lookup and similarity, 4-6 cover geometry and gradients, and 7-10 cover positions and systems cost.
Code cell 2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
try:
import seaborn as sns
sns.set_theme(style="whitegrid", palette="colorblind")
HAS_SNS = True
except ImportError:
plt.style.use("seaborn-v0_8-whitegrid")
HAS_SNS = False
mpl.rcParams.update({
"figure.figsize": (10, 6),
"figure.dpi": 120,
"font.size": 13,
"axes.titlesize": 15,
"axes.labelsize": 13,
"xtick.labelsize": 11,
"ytick.labelsize": 11,
"legend.fontsize": 11,
"legend.framealpha": 0.85,
"lines.linewidth": 2.0,
"axes.spines.top": False,
"axes.spines.right": False,
"savefig.bbox": "tight",
"savefig.dpi": 150,
})
np.random.seed(42)
print("Plot setup complete.")
Code cell 3
COLORS = {
"primary": "#0077BB",
"secondary": "#EE7733",
"tertiary": "#009988",
"error": "#CC3311",
"neutral": "#555555",
"highlight": "#EE3377",
}
def header(title):
print("\n" + "=" * 72)
print(title)
print("=" * 72)
def check_true(condition, name):
ok = bool(condition)
print(f"{'PASS' if ok else 'FAIL'} - {name}")
assert ok, name
def check_close(value, target, tol=1e-8, name="value"):
value = float(value)
target = float(target)
ok = abs(value - target) <= tol
print(f"{'PASS' if ok else 'FAIL'} - {name}: got {value:.6f}, expected {target:.6f}")
assert ok, name
def normalize_rows(X):
norms = np.linalg.norm(X, axis=1, keepdims=True)
return X / np.maximum(norms, 1e-12)
def cosine(u, v):
return float(np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v)))
def sinusoidal_positions(n_positions, d_model):
pos = np.arange(n_positions)[:, None]
i = np.arange(d_model)[None, :]
angle_rates = 1 / np.power(10000, (2 * (i // 2)) / d_model)
angles = pos * angle_rates
pe = np.zeros((n_positions, d_model))
pe[:, 0::2] = np.sin(angles[:, 0::2])
pe[:, 1::2] = np.cos(angles[:, 1::2])
return pe
def rope_rotate(x, position, base=10000.0):
x = np.asarray(x, dtype=float)
assert x.shape[0] % 2 == 0
d = x.shape[0]
out = x.copy()
for k in range(0, d, 2):
theta = position / (base ** (k / d))
c, s = np.cos(theta), np.sin(theta)
a, b = x[k], x[k + 1]
out[k] = c * a - s * b
out[k + 1] = s * a + c * b
return out
def alibi_bias(n, slope=-0.5):
i = np.arange(n)[:, None]
j = np.arange(n)[None, :]
dist = np.maximum(i - j, 0)
return slope * dist
def pca2(X):
Xc = X - X.mean(axis=0, keepdims=True)
U, S, Vt = np.linalg.svd(Xc, full_matrices=False)
return Xc @ Vt[:2].T, S
def softmax(logits):
logits = np.asarray(logits, dtype=float)
exp = np.exp(logits - logits.max())
return exp / exp.sum()
print("Embedding helpers ready.")
Exercise 1: Embedding lookup (*)
Convert token ids into a batch of vectors. State the shapes, compute the result, and explain the LLM consequence.
Code cell 5
# Your Solution - Exercise 1
answer = None
print("Your answer placeholder:", answer)
Code cell 6
# Solution - Exercise 1
header("Exercise 1: Embedding lookup")
E = np.arange(12, dtype=float).reshape(4, 3)
ids = np.array([[0, 2], [3, 1]])
X = E[ids]
print("Shape:", X.shape)
check_true(X.shape == (2, 2, 3), "B x T ids become B x T x d vectors")
print("\nTakeaway: embedding math turns token ids into geometry, and geometry controls similarity, logits, attention, and memory.")
Exercise 2: One-hot equivalence (*)
Show lookup equals one-hot matrix multiplication. State the shapes, compute the result, and explain the LLM consequence.
Code cell 8
# Your Solution - Exercise 2
answer = None
print("Your answer placeholder:", answer)
Code cell 9
# Solution - Exercise 2
header("Exercise 2: One-hot equivalence")
E = np.arange(15, dtype=float).reshape(5, 3)
idx = 4
product = np.eye(5)[idx] @ E
print("Product:", product.tolist())
check_true(np.allclose(product, E[idx]), "one-hot lookup matches direct row")
print("\nTakeaway: embedding math turns token ids into geometry, and geometry controls similarity, logits, attention, and memory.")
Exercise 3: Cosine similarity (*)
Compute a nearest neighbor by angle. State the shapes, compute the result, and explain the LLM consequence.
Code cell 11
# Your Solution - Exercise 3
answer = None
print("Your answer placeholder:", answer)
Code cell 12
# Solution - Exercise 3
header("Exercise 3: Cosine similarity")
u = np.array([1.0, 1.0])
v = np.array([2.0, 2.0])
w = np.array([-1.0, 0.0])
print("cos(u,v):", cosine(u, v), "cos(u,w):", round(cosine(u, w), 4))
check_true(cosine(u, v) > cosine(u, w), "aligned vector is more similar")
print("\nTakeaway: embedding math turns token ids into geometry, and geometry controls similarity, logits, attention, and memory.")
Exercise 4: Analogy arithmetic (**)
Recover a synthetic relation vector. State the shapes, compute the result, and explain the LLM consequence.
Code cell 14
# Your Solution - Exercise 4
answer = None
print("Your answer placeholder:", answer)
Code cell 15
# Solution - Exercise 4
header("Exercise 4: Analogy arithmetic")
a = np.array([1.0, 0.0])
b = np.array([0.0, 1.0])
c = np.array([2.0, 0.0])
target = c - a + b
print("Target:", target.tolist())
check_true(np.allclose(target, [1.0, 1.0]), "offset arithmetic is consistent")
print("\nTakeaway: embedding math turns token ids into geometry, and geometry controls similarity, logits, attention, and memory.")
Exercise 5: Centering anisotropy (**)
Remove a dominant mean direction. State the shapes, compute the result, and explain the LLM consequence.
Code cell 17
# Your Solution - Exercise 5
answer = None
print("Your answer placeholder:", answer)
Code cell 18
# Solution - Exercise 5
header("Exercise 5: Centering anisotropy")
X = np.array([[3.0, 1.0], [3.0, -1.0], [4.0, 0.0]])
centered = X - X.mean(axis=0, keepdims=True)
print("Mean after centering:", centered.mean(axis=0).tolist())
check_true(np.allclose(centered.mean(axis=0), 0.0), "centering removes mean vector")
print("\nTakeaway: embedding math turns token ids into geometry, and geometry controls similarity, logits, attention, and memory.")
Exercise 6: Softmax row gradient (**)
Compute how output rows move under cross-entropy. State the shapes, compute the result, and explain the LLM consequence.
Code cell 20
# Your Solution - Exercise 6
answer = None
print("Your answer placeholder:", answer)
Code cell 21
# Solution - Exercise 6
header("Exercise 6: Softmax row gradient")
h = np.array([1.0, 0.0])
p = np.array([0.2, 0.3, 0.5])
y = 1
grad = (p[:, None] - np.eye(3)[y][:, None]) * h[None, :]
print("Gradient rows:", grad.tolist())
check_true(grad[y, 0] < 0, "target row gradient is negative along h")
print("\nTakeaway: embedding math turns token ids into geometry, and geometry controls similarity, logits, attention, and memory.")
Exercise 7: Sinusoidal encoding (**)
Build a tiny position table. State the shapes, compute the result, and explain the LLM consequence.
Code cell 23
# Your Solution - Exercise 7
answer = None
print("Your answer placeholder:", answer)
Code cell 24
# Solution - Exercise 7
header("Exercise 7: Sinusoidal encoding")
pe = sinusoidal_positions(4, 6)
print("Shape:", pe.shape)
check_close(pe[0, 0], 0.0, name="position-zero sine")
print("\nTakeaway: embedding math turns token ids into geometry, and geometry controls similarity, logits, attention, and memory.")
Exercise 8: RoPE norm preservation (***)
Apply a rotary position transform. State the shapes, compute the result, and explain the LLM consequence.
Code cell 26
# Your Solution - Exercise 8
answer = None
print("Your answer placeholder:", answer)
Code cell 27
# Solution - Exercise 8
header("Exercise 8: RoPE norm preservation")
x = np.array([1.0, 2.0])
rot = rope_rotate(x, position=3)
print("Rotated:", np.round(rot, 4).tolist())
check_close(np.linalg.norm(rot), np.linalg.norm(x), name="RoPE preserves pair norm")
print("\nTakeaway: embedding math turns token ids into geometry, and geometry controls similarity, logits, attention, and memory.")
Exercise 9: ALiBi bias (***)
Create a causal distance-bias matrix. State the shapes, compute the result, and explain the LLM consequence.
Code cell 29
# Your Solution - Exercise 9
answer = None
print("Your answer placeholder:", answer)
Code cell 30
# Solution - Exercise 9
header("Exercise 9: ALiBi bias")
bias = alibi_bias(4, slope=-0.5)
print("Bias:", bias)
check_close(bias[3, 0], -1.5, name="distance three penalty")
print("\nTakeaway: embedding math turns token ids into geometry, and geometry controls similarity, logits, attention, and memory.")
Exercise 10: Parameter count (***)
Compute embedding parameters with and without tying. State the shapes, compute the result, and explain the LLM consequence.
Code cell 32
# Your Solution - Exercise 10
answer = None
print("Your answer placeholder:", answer)
Code cell 33
# Solution - Exercise 10
header("Exercise 10: Parameter count")
vocab, d = 50000, 4096
untied = 2 * vocab * d
tied = vocab * d
print("Untied:", untied, "Tied:", tied)
check_true(untied == 2 * tied, "tying halves input/output embedding table parameters")
print("\nTakeaway: embedding math turns token ids into geometry, and geometry controls similarity, logits, attention, and memory.")