Exercises Notebook

Converted from exercises.ipynb for web reading.

Exercises: Positional Encodings

There are 10 exercises. Exercises 1-4 cover absolute and relative basics, 5-7 cover RoPE and ALiBi, and 8-10 cover systems and long-context behavior.

Code cell 2

import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

try:
    import seaborn as sns
    sns.set_theme(style="whitegrid", palette="colorblind")
    HAS_SNS = True
except ImportError:
    plt.style.use("seaborn-v0_8-whitegrid")
    HAS_SNS = False

mpl.rcParams.update({
    "figure.figsize":    (10, 6),
    "figure.dpi":         120,
    "font.size":           13,
    "axes.titlesize":      15,
    "axes.labelsize":      13,
    "xtick.labelsize":     11,
    "ytick.labelsize":     11,
    "legend.fontsize":     11,
    "legend.framealpha":   0.85,
    "lines.linewidth":      2.0,
    "axes.spines.top":     False,
    "axes.spines.right":   False,
    "savefig.bbox":       "tight",
    "savefig.dpi":         150,
})
np.random.seed(42)
print("Plot setup complete.")

Code cell 3


COLORS = {
    "primary":   "#0077BB",
    "secondary": "#EE7733",
    "tertiary":  "#009988",
    "error":     "#CC3311",
    "neutral":   "#555555",
    "highlight": "#EE3377",
}

def header(title):
    print("\n" + "=" * 72)
    print(title)
    print("=" * 72)

def check_true(condition, name):
    ok = bool(condition)
    print(f"{'PASS' if ok else 'FAIL'} - {name}")
    assert ok, name

def check_close(value, target, tol=1e-8, name="value"):
    value = float(value)
    target = float(target)
    ok = abs(value - target) <= tol
    print(f"{'PASS' if ok else 'FAIL'} - {name}: got {value:.6f}, expected {target:.6f}")
    assert ok, name

def sinusoidal_positions(n, d):
    pos = np.arange(n)[:, None]
    i = np.arange(d)[None, :]
    rates = 1 / np.power(10000, (2 * (i // 2)) / d)
    angles = pos * rates
    pe = np.zeros((n, d))
    pe[:, 0::2] = np.sin(angles[:, 0::2])
    pe[:, 1::2] = np.cos(angles[:, 1::2])
    return pe

def relative_offsets(T):
    i = np.arange(T)[:, None]
    j = np.arange(T)[None, :]
    return i - j

def rope_rotate(x, position, base=10000.0):
    x = np.asarray(x, dtype=float)
    assert len(x) % 2 == 0
    out = x.copy()
    d = len(x)
    for k in range(0, d, 2):
        theta = position / (base ** (k / d))
        c, s = np.cos(theta), np.sin(theta)
        a, b = x[k], x[k + 1]
        out[k] = c * a - s * b
        out[k + 1] = s * a + c * b
    return out

def alibi_bias(T, slope=-0.25):
    return slope * np.maximum(relative_offsets(T), 0)

print("Positional-encoding helpers ready.")

Exercise 1: Sinusoidal row (*)

Compute a small sinusoidal encoding row. State the scheme, compute the result, and explain the LLM consequence.

Code cell 5

# Your Solution - Exercise 1
answer = None
print("Your answer placeholder:", answer)

Code cell 6

# Solution - Exercise 1
header("Exercise 1: Sinusoidal row")
pe = sinusoidal_positions(2, 4)
print("Position 1:", np.round(pe[1], 4).tolist())
check_true(pe.shape == (2, 4), "table has requested shape")
print("\nTakeaway: positional encoding choices control order, distance, extrapolation, and decode-time correctness.")

Exercise 2: Additive position (*)

Add token and position vectors. State the scheme, compute the result, and explain the LLM consequence.

Code cell 8

# Your Solution - Exercise 2
answer = None
print("Your answer placeholder:", answer)

Code cell 9

# Solution - Exercise 2
header("Exercise 2: Additive position")
x = np.ones(4)
p = sinusoidal_positions(1, 4)[0]
h = x + p
print("h:", np.round(h, 4).tolist())
check_true(h.shape == (4,), "addition keeps width")
print("\nTakeaway: positional encoding choices control order, distance, extrapolation, and decode-time correctness.")

Exercise 3: Relative offsets (*)

Build query-minus-key offsets. State the scheme, compute the result, and explain the LLM consequence.

Code cell 11

# Your Solution - Exercise 3
answer = None
print("Your answer placeholder:", answer)

Code cell 12

# Solution - Exercise 3
header("Exercise 3: Relative offsets")
R = relative_offsets(3)
print(R)
check_true(R[2, 0] == 2 and R[0, 2] == -2, "offset signs are correct")
print("\nTakeaway: positional encoding choices control order, distance, extrapolation, and decode-time correctness.")

Exercise 4: Relative bias (**)

Convert offsets to score penalties. State the scheme, compute the result, and explain the LLM consequence.

Code cell 14

# Your Solution - Exercise 4
answer = None
print("Your answer placeholder:", answer)

Code cell 15

# Solution - Exercise 4
header("Exercise 4: Relative bias")
bias = -np.abs(relative_offsets(3))
print(bias)
check_close(bias[0, 2], -2.0, name="distance penalty")
print("\nTakeaway: positional encoding choices control order, distance, extrapolation, and decode-time correctness.")

Exercise 5: RoPE norm (**)

Verify rotation preserves vector norm. State the scheme, compute the result, and explain the LLM consequence.

Code cell 17

# Your Solution - Exercise 5
answer = None
print("Your answer placeholder:", answer)

Code cell 18

# Solution - Exercise 5
header("Exercise 5: RoPE norm")
x = np.array([2.0, 1.0])
y = rope_rotate(x, 5)
print("rotated:", np.round(y, 4).tolist())
check_close(np.linalg.norm(y), np.linalg.norm(x), name="rotation norm")
print("\nTakeaway: positional encoding choices control order, distance, extrapolation, and decode-time correctness.")

Exercise 6: RoPE relative dot (**)

Check a two-dimensional relative identity. State the scheme, compute the result, and explain the LLM consequence.

Code cell 20

# Your Solution - Exercise 6
answer = None
print("Your answer placeholder:", answer)

Code cell 21

# Solution - Exercise 6
header("Exercise 6: RoPE relative dot")
q = np.array([1.0, 0.0])
k = np.array([0.0, 1.0])
left = np.dot(rope_rotate(q, 5), rope_rotate(k, 2))
right = np.dot(q, rope_rotate(k, -3))
print(left, right)
check_close(left, right, tol=1e-8, name="relative identity")
print("\nTakeaway: positional encoding choices control order, distance, extrapolation, and decode-time correctness.")

Exercise 7: ALiBi matrix (**)

Build a causal distance-bias table. State the scheme, compute the result, and explain the LLM consequence.

Code cell 23

# Your Solution - Exercise 7
answer = None
print("Your answer placeholder:", answer)

Code cell 24

# Solution - Exercise 7
header("Exercise 7: ALiBi matrix")
bias = alibi_bias(4, slope=-0.5)
print(bias)
check_close(bias[3, 0], -1.5, name="distance three")
print("\nTakeaway: positional encoding choices control order, distance, extrapolation, and decode-time correctness.")

Exercise 8: Learned table size (***)

Compute position-embedding parameters. State the scheme, compute the result, and explain the LLM consequence.

Code cell 26

# Your Solution - Exercise 8
answer = None
print("Your answer placeholder:", answer)

Code cell 27

# Solution - Exercise 8
header("Exercise 8: Learned table size")
params = 2048 * 1024
print("params:", params)
check_true(params == 2097152, "Tmax times width")
print("\nTakeaway: positional encoding choices control order, distance, extrapolation, and decode-time correctness.")

Exercise 9: Decode position ids (***)

Track generated token positions. State the scheme, compute the result, and explain the LLM consequence.

Code cell 29

# Your Solution - Exercise 9
answer = None
print("Your answer placeholder:", answer)

Code cell 30

# Solution - Exercise 9
header("Exercise 9: Decode position ids")
prefix = 8
pos = prefix + np.arange(3)
print("positions:", pos.tolist())
check_true(pos.tolist() == [8, 9, 10], "decode positions continue prefix")
print("\nTakeaway: positional encoding choices control order, distance, extrapolation, and decode-time correctness.")

Exercise 10: Length interpolation (***)

Map long positions into trained range. State the scheme, compute the result, and explain the LLM consequence.

Code cell 32

# Your Solution - Exercise 10
answer = None
print("Your answer placeholder:", answer)

Code cell 33

# Solution - Exercise 10
header("Exercise 10: Length interpolation")
scale = 2048 / 8192
mapped = 8191 * scale
print("mapped last position:", mapped)
check_true(mapped < 2048, "interpolation keeps mapped id in training range")
print("\nTakeaway: positional encoding choices control order, distance, extrapolation, and decode-time correctness.")