Exercises NotebookMath for LLMs

Matrix Operations

Linear Algebra Basics / Matrix Operations

Run notebook
Exercises Notebook

Exercises Notebook

Converted from exercises.ipynb for web reading.

Matrix Operations - Exercises

10 graded exercises covering the full linear algebra basics arc, from computation to ML-facing matrix workflows.

FormatDescription
ProblemMarkdown cell with task description
Your SolutionCode cell for learner work
SolutionReference solution with checks

Difficulty: straightforward -> moderate -> challenging.

Code cell 2

import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

try:
    import seaborn as sns
    sns.set_theme(style="whitegrid", palette="colorblind")
    HAS_SNS = True
except ImportError:
    plt.style.use("seaborn-v0_8-whitegrid")
    HAS_SNS = False

mpl.rcParams.update({
    "figure.figsize":    (10, 6),
    "figure.dpi":         120,
    "font.size":           13,
    "axes.titlesize":      15,
    "axes.labelsize":      13,
    "xtick.labelsize":     11,
    "ytick.labelsize":     11,
    "legend.fontsize":     11,
    "legend.framealpha":   0.85,
    "lines.linewidth":      2.0,
    "axes.spines.top":     False,
    "axes.spines.right":   False,
    "savefig.bbox":       "tight",
    "savefig.dpi":         150,
})
np.random.seed(42)
print("Plot setup complete.")

Code cell 3

import numpy as np
import numpy.linalg as la
import scipy.linalg as sla
from scipy import stats

COLORS = {
    "primary": "#0077BB",
    "secondary": "#EE7733",
    "tertiary": "#009988",
    "error": "#CC3311",
    "neutral": "#555555",
    "highlight": "#EE3377",
}
HAS_MPL = True
np.set_printoptions(precision=8, suppress=True)
np.random.seed(42)

def header(title):
    print("\n" + "=" * len(title))
    print(title)
    print("=" * len(title))

def check_true(name, cond):
    ok = bool(cond)
    print(f"{'PASS' if ok else 'FAIL'} - {name}")
    return ok

def check_close(name, got, expected, tol=1e-8):
    ok = np.allclose(got, expected, atol=tol, rtol=tol)
    print(f"{'PASS' if ok else 'FAIL'} - {name}: got {got}, expected {expected}")
    return ok

def check(name, got, expected, tol=1e-8):
    return check_close(name, got, expected, tol=tol)

def softmax(z, axis=-1, tau=1.0):
    z = np.asarray(z, dtype=float) / float(tau)
    z = z - np.max(z, axis=axis, keepdims=True)
    e = np.exp(z)
    return e / np.sum(e, axis=axis, keepdims=True)

def cosine_similarity(a, b):
    a = np.asarray(a, dtype=float); b = np.asarray(b, dtype=float)
    return float(a @ b / (la.norm(a) * la.norm(b) + 1e-12))

def numerical_rank(A, tol=1e-10):
    return int(np.sum(la.svd(A, compute_uv=False) > tol))

def orthonormal_basis(A, tol=1e-10):
    Q, R = la.qr(A)
    keep = np.abs(np.diag(R)) > tol
    return Q[:, keep]

def null_space(A, tol=1e-10):
    U, S, Vt = la.svd(A)
    return Vt[S.size:,:].T if S.size < Vt.shape[0] else Vt[S <= tol,:].T



# Compatibility helpers used by the Chapter 02 theory and exercise cells.
def null_space(A, tol=1e-10):
    A = np.asarray(A, dtype=float)
    U, S, Vt = la.svd(A, full_matrices=True)
    rank = int(np.sum(S > tol))
    return Vt[rank:].T

svd_null_space = null_space

def gram_schmidt(vectors, tol=1e-10):
    A = np.asarray(vectors, dtype=float)
    if A.ndim == 1:
        A = A.reshape(1, -1)
    basis = []
    for v in A:
        w = v.astype(float).copy()
        for q in basis:
            w = w - np.dot(w, q) * q
        norm = la.norm(w)
        if norm > tol:
            basis.append(w / norm)
    return np.array(basis)

def projection_matrix_from_columns(A, tol=1e-10):
    Q = orthonormal_basis(np.asarray(A, dtype=float), tol=tol)
    return Q @ Q.T


def random_unit_vectors(n, d):
    X = np.random.randn(n, d)
    return X / np.maximum(la.norm(X, axis=1, keepdims=True), 1e-12)

def pairwise_distances(X):
    X = np.asarray(X, dtype=float)
    diff = X[:, None, :] - X[None, :, :]
    return la.norm(diff, axis=-1)


def normalize(x, axis=None, tol=1e-12):
    x = np.asarray(x, dtype=float)
    norm = la.norm(x, axis=axis, keepdims=True)
    return x / np.maximum(norm, tol)

def frobenius_inner(A, B):
    return float(np.sum(np.asarray(A, dtype=float) * np.asarray(B, dtype=float)))

def outer_sum_product(A, B):
    A = np.asarray(A, dtype=float)
    B = np.asarray(B, dtype=float)
    return sum(np.outer(A[:, k], B[k, :]) for k in range(A.shape[1]))

def softmax_rows(X):
    return softmax(X, axis=1)

def col_space(A, tol=1e-10):
    return orthonormal_basis(np.asarray(A, dtype=float), tol=tol)

def row_space(A, tol=1e-10):
    return orthonormal_basis(np.asarray(A, dtype=float).T, tol=tol).T

def rref(A, tol=1e-10):
    R = np.array(A, dtype=float, copy=True)
    m, n = R.shape
    pivots = []
    row = 0
    for col in range(n):
        pivot = row + int(np.argmax(np.abs(R[row:, col]))) if row < m else row
        if row >= m or abs(R[pivot, col]) <= tol:
            continue
        if pivot != row:
            R[[row, pivot]] = R[[pivot, row]]
        R[row] = R[row] / R[row, col]
        for r in range(m):
            if r != row:
                R[r] = R[r] - R[r, col] * R[row]
        pivots.append(col)
        row += 1
        if row == m:
            break
    R[np.abs(R) < tol] = 0.0
    return R, pivots

def nullspace_basis(A, tol=1e-10):
    A = np.asarray(A, dtype=float)
    U, S, Vt = la.svd(A, full_matrices=True)
    rank = int(np.sum(S > tol))
    return Vt[rank:].T, rank

print("Chapter helper setup complete.")

Exercise 1: Shapes, Rows, Columns, and Special Matrices

Task: Build fluency with the basic objects before doing any heavy algebra.

Requirements:

  • Create one rectangular matrix, one identity matrix, one diagonal matrix, and one symmetric matrix.
  • Write helpers row(A, i) and col(A, j).
  • Verify which matrices are square and which are symmetric.

Written part:

  • Explain why only square matrices can even be candidates for ordinary inversion.

Code cell 5

# Your Solution
# Exercise 1 - learner workspace
# Write your solution here, then run the reference solution below to compare.
print("Learner workspace ready for Exercise 1.")

Code cell 6

# Solution
# Exercise 1 - reference solution

def row(A, i):
    return np.asarray(A)[i, :]


def col(A, j):
    return np.asarray(A)[:, j]


def is_square(A):
    A = np.asarray(A)
    return A.shape[0] == A.shape[1]


def is_symmetric(A):
    A = np.asarray(A, dtype=float)
    return is_square(A) and np.allclose(A, A.T)


rect = np.array([[1., 2., 3.], [4., 5., 6.]])
I = np.eye(3)
D = np.diag([2., -1., 4.])
S = np.array([[2., 1., 3.], [1., 4., 5.], [3., 5., 6.]])

print("row(rect, 1) =", row(rect, 1))
print("col(rect, 2) =", col(rect, 2))
print("is_square(rect) =", is_square(rect))
print("is_square(I) =", is_square(I))
print("is_symmetric(D) =", is_symmetric(D))
print("is_symmetric(S) =", is_symmetric(S))

print("Exercise 1 solution complete.")

Exercise 2: Elementwise Operations, Trace, and Transpose

Task: Implement the basic non-GEMM matrix operations directly.

Requirements:

  • Implement matrix addition and Hadamard product without using + or * on whole arrays.
  • Implement trace_manual(A).
  • Verify (AB)^T = B^T A^T on one concrete example.

Written part:

  • Explain why transpose reverses order but addition does not.

Code cell 8

# Your Solution
# Exercise 2 - learner workspace
# Write your solution here, then run the reference solution below to compare.
print("Learner workspace ready for Exercise 2.")

Code cell 9

# Solution
# Exercise 2 - reference solution

def add_manual(A, B):
    A = np.asarray(A, dtype=float)
    B = np.asarray(B, dtype=float)
    C = np.zeros_like(A)
    for i in range(A.shape[0]):
        for j in range(A.shape[1]):
            C[i, j] = A[i, j] + B[i, j]
    return C


def hadamard_manual(A, B):
    A = np.asarray(A, dtype=float)
    B = np.asarray(B, dtype=float)
    C = np.zeros_like(A)
    for i in range(A.shape[0]):
        for j in range(A.shape[1]):
            C[i, j] = A[i, j] * B[i, j]
    return C


def trace_manual(A):
    A = np.asarray(A, dtype=float)
    total = 0.0
    for i in range(A.shape[0]):
        total += A[i, i]
    return total


A = np.array([[1., 2.], [3., 4.]])
B = np.array([[5., 6.], [7., 8.]])
print("manual A+B =\n", add_manual(A, B))
print("manual A odot B =\n", hadamard_manual(A, B))
print("trace_manual(A) =", trace_manual(A))
print("trace numpy(A) =", np.trace(A))
print("(AB)^T == B^T A^T ?", np.allclose((A @ B).T, B.T @ A.T))

print("Exercise 2 solution complete.")

Exercise 3: Matrix Multiplication by Formula and by Outer Products

Task: Implement matrix multiplication manually and then reproduce the same result as a sum of outer products.

Requirements:

  • Implement matmul_manual(A, B) with three loops.
  • Implement matmul_outer(A, B) using outer products of columns/rows.
  • Verify both agree with A @ B.

Written part:

  • Explain why rank(AB) cannot exceed either rank(A) or rank(B).

Code cell 11

# Your Solution
# Exercise 3 - learner workspace
# Write your solution here, then run the reference solution below to compare.
print("Learner workspace ready for Exercise 3.")

Code cell 12

# Solution
# Exercise 3 - reference solution

def matmul_manual(A, B):
    A = np.asarray(A, dtype=float)
    B = np.asarray(B, dtype=float)
    m, p = A.shape
    p2, n = B.shape
    if p != p2:
        raise ValueError("inner dimensions must match")
    C = np.zeros((m, n), dtype=float)
    for i in range(m):
        for j in range(n):
            for k in range(p):
                C[i, j] += A[i, k] * B[k, j]
    return C


def matmul_outer(A, B):
    return outer_sum_product(A, B)


A = np.array([[1., 2., 3.], [4., 5., 6.]])
B = np.array([[1., 0.], [0., 1.], [1., 1.]])
C_manual = matmul_manual(A, B)
C_outer = matmul_outer(A, B)
C_np = A @ B
print("manual =\n", C_manual)
print("outer-sum =\n", C_outer)
print("numpy =\n", C_np)
print("manual matches numpy ?", np.allclose(C_manual, C_np))
print("outer matches numpy ?", np.allclose(C_outer, C_np))
print("rank(A), rank(B), rank(AB) =", np.linalg.matrix_rank(A), np.linalg.matrix_rank(B), np.linalg.matrix_rank(C_np))

print("Exercise 3 solution complete.")

Exercise 4: Dense Layer and Attention Shape Tracking

Task: Read a neural-network computation as a chain of matrix operations.

Requirements:

  • Implement a batched dense layer Y = X W^T + b.
  • Implement a single-head attention pipeline.
  • Print the shape of every intermediate object.
  • Verify every row of the attention matrix sums to 1.

Written part:

  • Explain why Q K^T is square when Q and K have the same number of tokens.

Code cell 14

# Your Solution
# Exercise 4 - learner workspace
# Write your solution here, then run the reference solution below to compare.
print("Learner workspace ready for Exercise 4.")

Code cell 15

# Solution
# Exercise 4 - reference solution

def dense_layer(X, W, b):
    return X @ W.T + b


def single_head_attention(Q, K, V):
    dk = Q.shape[1]
    S = (Q @ K.T) / np.sqrt(dk)
    A = softmax_rows(S)
    O = A @ V
    return S, A, O


X = np.array([[1.0, 0.5, -1.0], [0.0, 2.0, 1.0]])
W = np.array([[1.0, -1.0, 0.5], [0.0, 2.0, 1.0]])
b = np.array([0.1, -0.2])
Y = dense_layer(X, W, b)
print("X shape", X.shape, "W shape", W.shape, "b shape", b.shape, "Y shape", Y.shape)
print("Y =\n", Y)

Q = np.array([[1.0, 0.0], [0.5, 1.0], [1.0, 1.0], [0.0, 1.0]])
K = np.array([[1.0, 0.0], [1.0, 1.0], [0.0, 1.0], [0.5, 0.5]])
V = np.array([[2.0, 0.0], [1.0, 1.0], [0.0, 2.0], [1.5, 0.5]])
S, A, O = single_head_attention(Q, K, V)
print("\nQ shape", Q.shape, "K shape", K.shape, "V shape", V.shape)
print("S shape", S.shape)
print("A shape", A.shape)
print("O shape", O.shape)
print("row sums of A =", A.sum(axis=1))
print("QK^T symmetric when Q = K ?", np.allclose(Q @ Q.T, (Q @ Q.T).T))

print("Exercise 4 solution complete.")

Exercise 5: Inverse and the 2x2 Formula

Task: Implement the explicit inverse formula for a 2x2 matrix and verify it.

Requirements:

  • Implement inverse_2x2(A).
  • Raise an error or return None if the determinant is zero.
  • Verify A A^{-1} = I and A^{-1} A = I numerically.

Written part:

  • Explain why determinant zero means inverse cannot exist.

Code cell 17

# Your Solution
# Exercise 5 - learner workspace
# Write your solution here, then run the reference solution below to compare.
print("Learner workspace ready for Exercise 5.")

Code cell 18

# Solution
# Exercise 5 - reference solution

def inverse_2x2(A):
    A = np.asarray(A, dtype=float)
    a, b = A[0, 0], A[0, 1]
    c, d = A[1, 0], A[1, 1]
    det = a * d - b * c
    if abs(det) < 1e-12:
        raise ValueError("matrix is singular")
    return (1.0 / det) * np.array([[d, -b], [-c, a]], dtype=float)


A = np.array([[1.0, 2.0], [3.0, 4.0]])
A_inv = inverse_2x2(A)
print("A^{-1} =\n", A_inv)
print("A A^{-1} =\n", A @ A_inv)
print("A^{-1} A =\n", A_inv @ A)

singular = np.array([[1.0, 2.0], [2.0, 4.0]])
try:
    inverse_2x2(singular)
except ValueError as e:
    print("singular example correctly failed:", e)

print("Exercise 5 solution complete.")

Exercise 6: Solve vs Explicit Inverse and Conditioning

Task: Compare solving a system directly with using an explicit inverse, and inspect the role of condition number.

Requirements:

  • Solve A x = b with both np.linalg.solve and np.linalg.inv(A) @ b.
  • Compute the condition number of a well-conditioned and an ill-conditioned matrix.
  • Show that a tiny perturbation to b causes a much bigger change in the solution when the matrix is ill-conditioned.

Written part:

  • Explain why ill-conditioning is a geometry problem, not just a floating-point problem.

Code cell 20

# Your Solution
# Exercise 6 - learner workspace
# Write your solution here, then run the reference solution below to compare.
print("Learner workspace ready for Exercise 6.")

Code cell 21

# Solution
# Exercise 6 - reference solution

A_good = np.array([[3.0, 1.0], [1.0, 2.0]])
b_good = np.array([1.0, 0.0])
print("good matrix condition number =", np.linalg.cond(A_good))
print("solve =", np.linalg.solve(A_good, b_good))
print("inv @ b =", np.linalg.inv(A_good) @ b_good)

A_bad = np.array([[1.0, 1.0], [1.0, 1.0001]])
b_bad = np.array([2.0, 2.0001])
x_bad = np.linalg.solve(A_bad, b_bad)
x_bad_perturbed = np.linalg.solve(A_bad, b_bad + np.array([0.0, 1e-4]))
print("\nbad matrix condition number =", np.linalg.cond(A_bad))
print("x_bad =", x_bad)
print("x_bad_perturbed =", x_bad_perturbed)
print("solution change =", x_bad_perturbed - x_bad)

print("Exercise 6 solution complete.")

Exercise 7: Pseudo-Inverse and Least Squares Geometry

Task: Use the pseudo-inverse to solve an overdetermined system and verify the projection geometry.

Requirements:

  • Compute x* = A^+ b.
  • Compute the projection A x*.
  • Verify the residual is orthogonal to the column space by checking A^T (b - A x*).

Written part:

  • Explain why A A^+ behaves like a projection matrix.

Code cell 23

# Your Solution
# Exercise 7 - learner workspace
# Write your solution here, then run the reference solution below to compare.
print("Learner workspace ready for Exercise 7.")

Code cell 24

# Solution
# Exercise 7 - reference solution

A = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
b = np.array([1.0, 2.0, 2.5])
A_pinv = np.linalg.pinv(A)
x_star = A_pinv @ b
proj = A @ x_star
residual = b - proj
P = A @ A_pinv

print("A^+ =\n", A_pinv)
print("x* =", x_star)
print("projection A x* =", proj)
print("residual =", residual)
print("A^T residual =", A.T @ residual)
print("P^2 == P ?", np.allclose(P @ P, P))
print("P^T == P ?", np.allclose(P.T, P))

print("Exercise 7 solution complete.")

Exercise 8: Determinant and Row Operations

Task: Verify determinant identities experimentally.

Requirements:

  • Compute determinant of a matrix.
  • Swap two rows and show the sign flips.
  • Add a multiple of one row to another and show the determinant is unchanged.
  • Verify det(AB) = det(A) det(B) on one example.

Written part:

  • Explain why determinant zero means the map collapses volume.

Code cell 26

# Your Solution
# Exercise 8 - learner workspace
# Write your solution here, then run the reference solution below to compare.
print("Learner workspace ready for Exercise 8.")

Code cell 27

# Solution
# Exercise 8 - reference solution

A = np.array([[2.0, 1.0, 1.0], [4.0, 3.0, 3.0], [8.0, 7.0, 9.0]])
det_A = np.linalg.det(A)

A_swap = A[[1, 0, 2], :]
A_row_add = A.copy()
A_row_add[1, :] = A_row_add[1, :] + 2 * A_row_add[0, :]

B = np.array([[1.0, 2.0, 0.0], [0.0, 1.0, 1.0], [1.0, 0.0, 1.0]])

print("det(A) =", det_A)
print("det(row-swapped A) =", np.linalg.det(A_swap))
print("sign flipped?", np.allclose(np.linalg.det(A_swap), -det_A))
print("det(row-added A) =", np.linalg.det(A_row_add))
print("unchanged?", np.allclose(np.linalg.det(A_row_add), det_A))
print("det(AB) =", np.linalg.det(A @ B))
print("det(A)det(B) =", np.linalg.det(A) * np.linalg.det(B))

print("Exercise 8 solution complete.")

Exercise 9 (★★★): Attention as Matrix Multiplication

For query, key, and value matrices Q,K,VQ,K,V, scaled dot-product attention is

A=softmax(QKd),O=AV.A=\operatorname{softmax}\left(\frac{QK^\top}{\sqrt{d}}\right),\qquad O=AV.

Verify all shapes and show that each row of AA sums to one.

Code cell 29

# Your Solution
# Exercise 9 - learner workspace
# Compute attention scores, weights, and output shapes.
print("Learner workspace ready for Exercise 9.")

Code cell 30

# Solution
# Exercise 9 - attention matrix products
header("Exercise 9: attention matrix products")

Q = np.array([[1.0, 0.0], [0.5, 1.0], [-0.5, 0.2]])
K = np.array([[0.2, 1.0], [1.0, -0.3], [0.0, 0.5], [0.7, 0.7]])
V = np.array([[1.0, 0.0], [0.0, 2.0], [1.0, 1.0], [-1.0, 0.5]])
S = Q @ K.T / np.sqrt(Q.shape[1])
A = softmax(S, axis=1)
O = A @ V
print("scores shape:", S.shape, "weights shape:", A.shape, "output shape:", O.shape)
check_close("attention rows sum to 1", A.sum(axis=1), np.ones(Q.shape[0]), tol=1e-12)
check_true("output has query length and value width", O.shape == (Q.shape[0], V.shape[1]))
print("Takeaway: attention is a sequence of ordinary matrix products plus row-wise normalization.")

Exercise 10 (★★★): Low-Rank Adapter Update

A LoRA-style update writes

Wnew=W+BA,W_{\text{new}}=W + BA,

where BRm×rB\in\mathbb{R}^{m\times r} and ARr×nA\in\mathbb{R}^{r\times n}. Verify that the update rank is at most rr and compare parameter counts.

Code cell 32

# Your Solution
# Exercise 10 - learner workspace
# Form BA and compare rank and parameter counts.
print("Learner workspace ready for Exercise 10.")

Code cell 33

# Solution
# Exercise 10 - LoRA low-rank update
header("Exercise 10: low-rank adapter update")

m, n, r = 6, 5, 2
rng = np.random.default_rng(42)
W = rng.normal(size=(m, n))
B = rng.normal(size=(m, r))
A = rng.normal(size=(r, n))
Delta = B @ A
print("rank(Delta):", la.matrix_rank(Delta), "rank bound:", r)
print("full params:", m*n, "LoRA params:", m*r + r*n)
check_true("rank bound", la.matrix_rank(Delta) <= r)
check_true("adapter has fewer params", m*r + r*n < m*n)
print("Takeaway: low-rank matrix multiplication is a parameter-efficient update family.")