Exercises Notebook
Converted from
exercises.ipynbfor web reading.
Exercises: Positional Encodings
There are 10 exercises. Exercises 1-4 cover absolute and relative basics, 5-7 cover RoPE and ALiBi, and 8-10 cover systems and long-context behavior.
Code cell 2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
try:
import seaborn as sns
sns.set_theme(style="whitegrid", palette="colorblind")
HAS_SNS = True
except ImportError:
plt.style.use("seaborn-v0_8-whitegrid")
HAS_SNS = False
mpl.rcParams.update({
"figure.figsize": (10, 6),
"figure.dpi": 120,
"font.size": 13,
"axes.titlesize": 15,
"axes.labelsize": 13,
"xtick.labelsize": 11,
"ytick.labelsize": 11,
"legend.fontsize": 11,
"legend.framealpha": 0.85,
"lines.linewidth": 2.0,
"axes.spines.top": False,
"axes.spines.right": False,
"savefig.bbox": "tight",
"savefig.dpi": 150,
})
np.random.seed(42)
print("Plot setup complete.")
Code cell 3
COLORS = {
"primary": "#0077BB",
"secondary": "#EE7733",
"tertiary": "#009988",
"error": "#CC3311",
"neutral": "#555555",
"highlight": "#EE3377",
}
def header(title):
print("\n" + "=" * 72)
print(title)
print("=" * 72)
def check_true(condition, name):
ok = bool(condition)
print(f"{'PASS' if ok else 'FAIL'} - {name}")
assert ok, name
def check_close(value, target, tol=1e-8, name="value"):
value = float(value)
target = float(target)
ok = abs(value - target) <= tol
print(f"{'PASS' if ok else 'FAIL'} - {name}: got {value:.6f}, expected {target:.6f}")
assert ok, name
def sinusoidal_positions(n, d):
pos = np.arange(n)[:, None]
i = np.arange(d)[None, :]
rates = 1 / np.power(10000, (2 * (i // 2)) / d)
angles = pos * rates
pe = np.zeros((n, d))
pe[:, 0::2] = np.sin(angles[:, 0::2])
pe[:, 1::2] = np.cos(angles[:, 1::2])
return pe
def relative_offsets(T):
i = np.arange(T)[:, None]
j = np.arange(T)[None, :]
return i - j
def rope_rotate(x, position, base=10000.0):
x = np.asarray(x, dtype=float)
assert len(x) % 2 == 0
out = x.copy()
d = len(x)
for k in range(0, d, 2):
theta = position / (base ** (k / d))
c, s = np.cos(theta), np.sin(theta)
a, b = x[k], x[k + 1]
out[k] = c * a - s * b
out[k + 1] = s * a + c * b
return out
def alibi_bias(T, slope=-0.25):
return slope * np.maximum(relative_offsets(T), 0)
print("Positional-encoding helpers ready.")
Exercise 1: Sinusoidal row (*)
Compute a small sinusoidal encoding row. State the scheme, compute the result, and explain the LLM consequence.
Code cell 5
# Your Solution - Exercise 1
answer = None
print("Your answer placeholder:", answer)
Code cell 6
# Solution - Exercise 1
header("Exercise 1: Sinusoidal row")
pe = sinusoidal_positions(2, 4)
print("Position 1:", np.round(pe[1], 4).tolist())
check_true(pe.shape == (2, 4), "table has requested shape")
print("\nTakeaway: positional encoding choices control order, distance, extrapolation, and decode-time correctness.")
Exercise 2: Additive position (*)
Add token and position vectors. State the scheme, compute the result, and explain the LLM consequence.
Code cell 8
# Your Solution - Exercise 2
answer = None
print("Your answer placeholder:", answer)
Code cell 9
# Solution - Exercise 2
header("Exercise 2: Additive position")
x = np.ones(4)
p = sinusoidal_positions(1, 4)[0]
h = x + p
print("h:", np.round(h, 4).tolist())
check_true(h.shape == (4,), "addition keeps width")
print("\nTakeaway: positional encoding choices control order, distance, extrapolation, and decode-time correctness.")
Exercise 3: Relative offsets (*)
Build query-minus-key offsets. State the scheme, compute the result, and explain the LLM consequence.
Code cell 11
# Your Solution - Exercise 3
answer = None
print("Your answer placeholder:", answer)
Code cell 12
# Solution - Exercise 3
header("Exercise 3: Relative offsets")
R = relative_offsets(3)
print(R)
check_true(R[2, 0] == 2 and R[0, 2] == -2, "offset signs are correct")
print("\nTakeaway: positional encoding choices control order, distance, extrapolation, and decode-time correctness.")
Exercise 4: Relative bias (**)
Convert offsets to score penalties. State the scheme, compute the result, and explain the LLM consequence.
Code cell 14
# Your Solution - Exercise 4
answer = None
print("Your answer placeholder:", answer)
Code cell 15
# Solution - Exercise 4
header("Exercise 4: Relative bias")
bias = -np.abs(relative_offsets(3))
print(bias)
check_close(bias[0, 2], -2.0, name="distance penalty")
print("\nTakeaway: positional encoding choices control order, distance, extrapolation, and decode-time correctness.")
Exercise 5: RoPE norm (**)
Verify rotation preserves vector norm. State the scheme, compute the result, and explain the LLM consequence.
Code cell 17
# Your Solution - Exercise 5
answer = None
print("Your answer placeholder:", answer)
Code cell 18
# Solution - Exercise 5
header("Exercise 5: RoPE norm")
x = np.array([2.0, 1.0])
y = rope_rotate(x, 5)
print("rotated:", np.round(y, 4).tolist())
check_close(np.linalg.norm(y), np.linalg.norm(x), name="rotation norm")
print("\nTakeaway: positional encoding choices control order, distance, extrapolation, and decode-time correctness.")
Exercise 6: RoPE relative dot (**)
Check a two-dimensional relative identity. State the scheme, compute the result, and explain the LLM consequence.
Code cell 20
# Your Solution - Exercise 6
answer = None
print("Your answer placeholder:", answer)
Code cell 21
# Solution - Exercise 6
header("Exercise 6: RoPE relative dot")
q = np.array([1.0, 0.0])
k = np.array([0.0, 1.0])
left = np.dot(rope_rotate(q, 5), rope_rotate(k, 2))
right = np.dot(q, rope_rotate(k, -3))
print(left, right)
check_close(left, right, tol=1e-8, name="relative identity")
print("\nTakeaway: positional encoding choices control order, distance, extrapolation, and decode-time correctness.")
Exercise 7: ALiBi matrix (**)
Build a causal distance-bias table. State the scheme, compute the result, and explain the LLM consequence.
Code cell 23
# Your Solution - Exercise 7
answer = None
print("Your answer placeholder:", answer)
Code cell 24
# Solution - Exercise 7
header("Exercise 7: ALiBi matrix")
bias = alibi_bias(4, slope=-0.5)
print(bias)
check_close(bias[3, 0], -1.5, name="distance three")
print("\nTakeaway: positional encoding choices control order, distance, extrapolation, and decode-time correctness.")
Exercise 8: Learned table size (***)
Compute position-embedding parameters. State the scheme, compute the result, and explain the LLM consequence.
Code cell 26
# Your Solution - Exercise 8
answer = None
print("Your answer placeholder:", answer)
Code cell 27
# Solution - Exercise 8
header("Exercise 8: Learned table size")
params = 2048 * 1024
print("params:", params)
check_true(params == 2097152, "Tmax times width")
print("\nTakeaway: positional encoding choices control order, distance, extrapolation, and decode-time correctness.")
Exercise 9: Decode position ids (***)
Track generated token positions. State the scheme, compute the result, and explain the LLM consequence.
Code cell 29
# Your Solution - Exercise 9
answer = None
print("Your answer placeholder:", answer)
Code cell 30
# Solution - Exercise 9
header("Exercise 9: Decode position ids")
prefix = 8
pos = prefix + np.arange(3)
print("positions:", pos.tolist())
check_true(pos.tolist() == [8, 9, 10], "decode positions continue prefix")
print("\nTakeaway: positional encoding choices control order, distance, extrapolation, and decode-time correctness.")
Exercise 10: Length interpolation (***)
Map long positions into trained range. State the scheme, compute the result, and explain the LLM consequence.
Code cell 32
# Your Solution - Exercise 10
answer = None
print("Your answer placeholder:", answer)
Code cell 33
# Solution - Exercise 10
header("Exercise 10: Length interpolation")
scale = 2048 / 8192
mapped = 8191 * scale
print("mapped last position:", mapped)
check_true(mapped < 2048, "interpolation keeps mapped id in training range")
print("\nTakeaway: positional encoding choices control order, distance, extrapolation, and decode-time correctness.")