Exercises Notebook

Converted from exercises.ipynb for web reading.

Matrix Operations - Exercises

10 graded exercises covering the full linear algebra basics arc, from computation to ML-facing matrix workflows.

Format	Description
Problem	Markdown cell with task description
Your Solution	Code cell for learner work
Solution	Reference solution with checks

Difficulty: straightforward -> moderate -> challenging.

Code cell 2

import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

try:
    import seaborn as sns
    sns.set_theme(style="whitegrid", palette="colorblind")
    HAS_SNS = True
except ImportError:
    plt.style.use("seaborn-v0_8-whitegrid")
    HAS_SNS = False

mpl.rcParams.update({
    "figure.figsize":    (10, 6),
    "figure.dpi":         120,
    "font.size":           13,
    "axes.titlesize":      15,
    "axes.labelsize":      13,
    "xtick.labelsize":     11,
    "ytick.labelsize":     11,
    "legend.fontsize":     11,
    "legend.framealpha":   0.85,
    "lines.linewidth":      2.0,
    "axes.spines.top":     False,
    "axes.spines.right":   False,
    "savefig.bbox":       "tight",
    "savefig.dpi":         150,
})
np.random.seed(42)
print("Plot setup complete.")

Code cell 3

import numpy as np
import numpy.linalg as la
import scipy.linalg as sla
from scipy import stats

COLORS = {
    "primary": "#0077BB",
    "secondary": "#EE7733",
    "tertiary": "#009988",
    "error": "#CC3311",
    "neutral": "#555555",
    "highlight": "#EE3377",
}
HAS_MPL = True
np.set_printoptions(precision=8, suppress=True)
np.random.seed(42)

def header(title):
    print("\n" + "=" * len(title))
    print(title)
    print("=" * len(title))

def check_true(name, cond):
    ok = bool(cond)
    print(f"{'PASS' if ok else 'FAIL'} - {name}")
    return ok

def check_close(name, got, expected, tol=1e-8):
    ok = np.allclose(got, expected, atol=tol, rtol=tol)
    print(f"{'PASS' if ok else 'FAIL'} - {name}: got {got}, expected {expected}")
    return ok

def check(name, got, expected, tol=1e-8):
    return check_close(name, got, expected, tol=tol)

def softmax(z, axis=-1, tau=1.0):
    z = np.asarray(z, dtype=float) / float(tau)
    z = z - np.max(z, axis=axis, keepdims=True)
    e = np.exp(z)
    return e / np.sum(e, axis=axis, keepdims=True)

def cosine_similarity(a, b):
    a = np.asarray(a, dtype=float); b = np.asarray(b, dtype=float)
    return float(a @ b / (la.norm(a) * la.norm(b) + 1e-12))

def numerical_rank(A, tol=1e-10):
    return int(np.sum(la.svd(A, compute_uv=False) > tol))

def orthonormal_basis(A, tol=1e-10):
    Q, R = la.qr(A)
    keep = np.abs(np.diag(R)) > tol
    return Q[:, keep]

def null_space(A, tol=1e-10):
    U, S, Vt = la.svd(A)
    return Vt[S.size:,:].T if S.size < Vt.shape[0] else Vt[S <= tol,:].T



# Compatibility helpers used by the Chapter 02 theory and exercise cells.
def null_space(A, tol=1e-10):
    A = np.asarray(A, dtype=float)
    U, S, Vt = la.svd(A, full_matrices=True)
    rank = int(np.sum(S > tol))
    return Vt[rank:].T

svd_null_space = null_space

def gram_schmidt(vectors, tol=1e-10):
    A = np.asarray(vectors, dtype=float)
    if A.ndim == 1:
        A = A.reshape(1, -1)
    basis = []
    for v in A:
        w = v.astype(float).copy()
        for q in basis:
            w = w - np.dot(w, q) * q
        norm = la.norm(w)
        if norm > tol:
            basis.append(w / norm)
    return np.array(basis)

def projection_matrix_from_columns(A, tol=1e-10):
    Q = orthonormal_basis(np.asarray(A, dtype=float), tol=tol)
    return Q @ Q.T


def random_unit_vectors(n, d):
    X = np.random.randn(n, d)
    return X / np.maximum(la.norm(X, axis=1, keepdims=True), 1e-12)

def pairwise_distances(X):
    X = np.asarray(X, dtype=float)
    diff = X[:, None, :] - X[None, :, :]
    return la.norm(diff, axis=-1)


def normalize(x, axis=None, tol=1e-12):
    x = np.asarray(x, dtype=float)
    norm = la.norm(x, axis=axis, keepdims=True)
    return x / np.maximum(norm, tol)

def frobenius_inner(A, B):
    return float(np.sum(np.asarray(A, dtype=float) * np.asarray(B, dtype=float)))

def outer_sum_product(A, B):
    A = np.asarray(A, dtype=float)
    B = np.asarray(B, dtype=float)
    return sum(np.outer(A[:, k], B[k, :]) for k in range(A.shape[1]))

def softmax_rows(X):
    return softmax(X, axis=1)

def col_space(A, tol=1e-10):
    return orthonormal_basis(np.asarray(A, dtype=float), tol=tol)

def row_space(A, tol=1e-10):
    return orthonormal_basis(np.asarray(A, dtype=float).T, tol=tol).T

def rref(A, tol=1e-10):
    R = np.array(A, dtype=float, copy=True)
    m, n = R.shape
    pivots = []
    row = 0
    for col in range(n):
        pivot = row + int(np.argmax(np.abs(R[row:, col]))) if row < m else row
        if row >= m or abs(R[pivot, col]) <= tol:
            continue
        if pivot != row:
            R[[row, pivot]] = R[[pivot, row]]
        R[row] = R[row] / R[row, col]
        for r in range(m):
            if r != row:
                R[r] = R[r] - R[r, col] * R[row]
        pivots.append(col)
        row += 1
        if row == m:
            break
    R[np.abs(R) < tol] = 0.0
    return R, pivots

def nullspace_basis(A, tol=1e-10):
    A = np.asarray(A, dtype=float)
    U, S, Vt = la.svd(A, full_matrices=True)
    rank = int(np.sum(S > tol))
    return Vt[rank:].T, rank

print("Chapter helper setup complete.")

Exercise 1: Shapes, Rows, Columns, and Special Matrices

Task: Build fluency with the basic objects before doing any heavy algebra.

Requirements:

Create one rectangular matrix, one identity matrix, one diagonal matrix, and one symmetric matrix.
Write helpers row(A, i) and col(A, j).
Verify which matrices are square and which are symmetric.

Written part:

Explain why only square matrices can even be candidates for ordinary inversion.

Code cell 5

# Your Solution
# Exercise 1 - learner workspace
# Write your solution here, then run the reference solution below to compare.
print("Learner workspace ready for Exercise 1.")

Code cell 6

# Solution
# Exercise 1 - reference solution

def row(A, i):
    return np.asarray(A)[i, :]


def col(A, j):
    return np.asarray(A)[:, j]


def is_square(A):
    A = np.asarray(A)
    return A.shape[0] == A.shape[1]


def is_symmetric(A):
    A = np.asarray(A, dtype=float)
    return is_square(A) and np.allclose(A, A.T)


rect = np.array([[1., 2., 3.], [4., 5., 6.]])
I = np.eye(3)
D = np.diag([2., -1., 4.])
S = np.array([[2., 1., 3.], [1., 4., 5.], [3., 5., 6.]])

print("row(rect, 1) =", row(rect, 1))
print("col(rect, 2) =", col(rect, 2))
print("is_square(rect) =", is_square(rect))
print("is_square(I) =", is_square(I))
print("is_symmetric(D) =", is_symmetric(D))
print("is_symmetric(S) =", is_symmetric(S))

print("Exercise 1 solution complete.")

Exercise 2: Elementwise Operations, Trace, and Transpose

Task: Implement the basic non-GEMM matrix operations directly.

Requirements:

Implement matrix addition and Hadamard product without using + or * on whole arrays.
Implement trace_manual(A).
Verify (AB)^T = B^T A^T on one concrete example.

Written part:

Explain why transpose reverses order but addition does not.

Code cell 8

# Your Solution
# Exercise 2 - learner workspace
# Write your solution here, then run the reference solution below to compare.
print("Learner workspace ready for Exercise 2.")

Code cell 9

# Solution
# Exercise 2 - reference solution

def add_manual(A, B):
    A = np.asarray(A, dtype=float)
    B = np.asarray(B, dtype=float)
    C = np.zeros_like(A)
    for i in range(A.shape[0]):
        for j in range(A.shape[1]):
            C[i, j] = A[i, j] + B[i, j]
    return C


def hadamard_manual(A, B):
    A = np.asarray(A, dtype=float)
    B = np.asarray(B, dtype=float)
    C = np.zeros_like(A)
    for i in range(A.shape[0]):
        for j in range(A.shape[1]):
            C[i, j] = A[i, j] * B[i, j]
    return C


def trace_manual(A):
    A = np.asarray(A, dtype=float)
    total = 0.0
    for i in range(A.shape[0]):
        total += A[i, i]
    return total


A = np.array([[1., 2.], [3., 4.]])
B = np.array([[5., 6.], [7., 8.]])
print("manual A+B =\n", add_manual(A, B))
print("manual A odot B =\n", hadamard_manual(A, B))
print("trace_manual(A) =", trace_manual(A))
print("trace numpy(A) =", np.trace(A))
print("(AB)^T == B^T A^T ?", np.allclose((A @ B).T, B.T @ A.T))

print("Exercise 2 solution complete.")

Exercise 3: Matrix Multiplication by Formula and by Outer Products

Task: Implement matrix multiplication manually and then reproduce the same result as a sum of outer products.

Requirements:

Implement matmul_manual(A, B) with three loops.
Implement matmul_outer(A, B) using outer products of columns/rows.
Verify both agree with A @ B.

Written part:

Explain why rank(AB) cannot exceed either rank(A) or rank(B).

Code cell 11

# Your Solution
# Exercise 3 - learner workspace
# Write your solution here, then run the reference solution below to compare.
print("Learner workspace ready for Exercise 3.")

Code cell 12

# Solution
# Exercise 3 - reference solution

def matmul_manual(A, B):
    A = np.asarray(A, dtype=float)
    B = np.asarray(B, dtype=float)
    m, p = A.shape
    p2, n = B.shape
    if p != p2:
        raise ValueError("inner dimensions must match")
    C = np.zeros((m, n), dtype=float)
    for i in range(m):
        for j in range(n):
            for k in range(p):
                C[i, j] += A[i, k] * B[k, j]
    return C


def matmul_outer(A, B):
    return outer_sum_product(A, B)


A = np.array([[1., 2., 3.], [4., 5., 6.]])
B = np.array([[1., 0.], [0., 1.], [1., 1.]])
C_manual = matmul_manual(A, B)
C_outer = matmul_outer(A, B)
C_np = A @ B
print("manual =\n", C_manual)
print("outer-sum =\n", C_outer)
print("numpy =\n", C_np)
print("manual matches numpy ?", np.allclose(C_manual, C_np))
print("outer matches numpy ?", np.allclose(C_outer, C_np))
print("rank(A), rank(B), rank(AB) =", np.linalg.matrix_rank(A), np.linalg.matrix_rank(B), np.linalg.matrix_rank(C_np))

print("Exercise 3 solution complete.")

Exercise 4: Dense Layer and Attention Shape Tracking

Task: Read a neural-network computation as a chain of matrix operations.

Requirements:

Implement a batched dense layer Y = X W^T + b.
Implement a single-head attention pipeline.
Print the shape of every intermediate object.
Verify every row of the attention matrix sums to 1.

Written part:

Explain why Q K^T is square when Q and K have the same number of tokens.

Code cell 14

# Your Solution
# Exercise 4 - learner workspace
# Write your solution here, then run the reference solution below to compare.
print("Learner workspace ready for Exercise 4.")

Code cell 15

# Solution
# Exercise 4 - reference solution

def dense_layer(X, W, b):
    return X @ W.T + b


def single_head_attention(Q, K, V):
    dk = Q.shape[1]
    S = (Q @ K.T) / np.sqrt(dk)
    A = softmax_rows(S)
    O = A @ V
    return S, A, O


X = np.array([[1.0, 0.5, -1.0], [0.0, 2.0, 1.0]])
W = np.array([[1.0, -1.0, 0.5], [0.0, 2.0, 1.0]])
b = np.array([0.1, -0.2])
Y = dense_layer(X, W, b)
print("X shape", X.shape, "W shape", W.shape, "b shape", b.shape, "Y shape", Y.shape)
print("Y =\n", Y)

Q = np.array([[1.0, 0.0], [0.5, 1.0], [1.0, 1.0], [0.0, 1.0]])
K = np.array([[1.0, 0.0], [1.0, 1.0], [0.0, 1.0], [0.5, 0.5]])
V = np.array([[2.0, 0.0], [1.0, 1.0], [0.0, 2.0], [1.5, 0.5]])
S, A, O = single_head_attention(Q, K, V)
print("\nQ shape", Q.shape, "K shape", K.shape, "V shape", V.shape)
print("S shape", S.shape)
print("A shape", A.shape)
print("O shape", O.shape)
print("row sums of A =", A.sum(axis=1))
print("QK^T symmetric when Q = K ?", np.allclose(Q @ Q.T, (Q @ Q.T).T))

print("Exercise 4 solution complete.")

Exercise 5: Inverse and the 2x2 Formula

Task: Implement the explicit inverse formula for a 2x2 matrix and verify it.

Requirements:

Implement inverse_2x2(A).
Raise an error or return None if the determinant is zero.
Verify A A^{-1} = I and A^{-1} A = I numerically.

Written part:

Explain why determinant zero means inverse cannot exist.

Code cell 17

# Your Solution
# Exercise 5 - learner workspace
# Write your solution here, then run the reference solution below to compare.
print("Learner workspace ready for Exercise 5.")

Code cell 18

# Solution
# Exercise 5 - reference solution

def inverse_2x2(A):
    A = np.asarray(A, dtype=float)
    a, b = A[0, 0], A[0, 1]
    c, d = A[1, 0], A[1, 1]
    det = a * d - b * c
    if abs(det) < 1e-12:
        raise ValueError("matrix is singular")
    return (1.0 / det) * np.array([[d, -b], [-c, a]], dtype=float)


A = np.array([[1.0, 2.0], [3.0, 4.0]])
A_inv = inverse_2x2(A)
print("A^{-1} =\n", A_inv)
print("A A^{-1} =\n", A @ A_inv)
print("A^{-1} A =\n", A_inv @ A)

singular = np.array([[1.0, 2.0], [2.0, 4.0]])
try:
    inverse_2x2(singular)
except ValueError as e:
    print("singular example correctly failed:", e)

print("Exercise 5 solution complete.")

Exercise 6: Solve vs Explicit Inverse and Conditioning

Task: Compare solving a system directly with using an explicit inverse, and inspect the role of condition number.

Requirements:

Solve A x = b with both np.linalg.solve and np.linalg.inv(A) @ b.
Compute the condition number of a well-conditioned and an ill-conditioned matrix.
Show that a tiny perturbation to b causes a much bigger change in the solution when the matrix is ill-conditioned.

Written part:

Explain why ill-conditioning is a geometry problem, not just a floating-point problem.

Code cell 20

# Your Solution
# Exercise 6 - learner workspace
# Write your solution here, then run the reference solution below to compare.
print("Learner workspace ready for Exercise 6.")

Code cell 21

# Solution
# Exercise 6 - reference solution

A_good = np.array([[3.0, 1.0], [1.0, 2.0]])
b_good = np.array([1.0, 0.0])
print("good matrix condition number =", np.linalg.cond(A_good))
print("solve =", np.linalg.solve(A_good, b_good))
print("inv @ b =", np.linalg.inv(A_good) @ b_good)

A_bad = np.array([[1.0, 1.0], [1.0, 1.0001]])
b_bad = np.array([2.0, 2.0001])
x_bad = np.linalg.solve(A_bad, b_bad)
x_bad_perturbed = np.linalg.solve(A_bad, b_bad + np.array([0.0, 1e-4]))
print("\nbad matrix condition number =", np.linalg.cond(A_bad))
print("x_bad =", x_bad)
print("x_bad_perturbed =", x_bad_perturbed)
print("solution change =", x_bad_perturbed - x_bad)

print("Exercise 6 solution complete.")

Exercise 7: Pseudo-Inverse and Least Squares Geometry

Task: Use the pseudo-inverse to solve an overdetermined system and verify the projection geometry.

Requirements:

Compute x* = A^+ b.
Compute the projection A x*.
Verify the residual is orthogonal to the column space by checking A^T (b - A x*).

Written part:

Explain why A A^+ behaves like a projection matrix.

Code cell 23

# Your Solution
# Exercise 7 - learner workspace
# Write your solution here, then run the reference solution below to compare.
print("Learner workspace ready for Exercise 7.")

Code cell 24

# Solution
# Exercise 7 - reference solution

A = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
b = np.array([1.0, 2.0, 2.5])
A_pinv = np.linalg.pinv(A)
x_star = A_pinv @ b
proj = A @ x_star
residual = b - proj
P = A @ A_pinv

print("A^+ =\n", A_pinv)
print("x* =", x_star)
print("projection A x* =", proj)
print("residual =", residual)
print("A^T residual =", A.T @ residual)
print("P^2 == P ?", np.allclose(P @ P, P))
print("P^T == P ?", np.allclose(P.T, P))

print("Exercise 7 solution complete.")

Exercise 8: Determinant and Row Operations

Task: Verify determinant identities experimentally.

Requirements:

Compute determinant of a matrix.
Swap two rows and show the sign flips.
Add a multiple of one row to another and show the determinant is unchanged.
Verify det(AB) = det(A) det(B) on one example.

Written part:

Explain why determinant zero means the map collapses volume.

Code cell 26

# Your Solution
# Exercise 8 - learner workspace
# Write your solution here, then run the reference solution below to compare.
print("Learner workspace ready for Exercise 8.")

Code cell 27

# Solution
# Exercise 8 - reference solution

A = np.array([[2.0, 1.0, 1.0], [4.0, 3.0, 3.0], [8.0, 7.0, 9.0]])
det_A = np.linalg.det(A)

A_swap = A[[1, 0, 2], :]
A_row_add = A.copy()
A_row_add[1, :] = A_row_add[1, :] + 2 * A_row_add[0, :]

B = np.array([[1.0, 2.0, 0.0], [0.0, 1.0, 1.0], [1.0, 0.0, 1.0]])

print("det(A) =", det_A)
print("det(row-swapped A) =", np.linalg.det(A_swap))
print("sign flipped?", np.allclose(np.linalg.det(A_swap), -det_A))
print("det(row-added A) =", np.linalg.det(A_row_add))
print("unchanged?", np.allclose(np.linalg.det(A_row_add), det_A))
print("det(AB) =", np.linalg.det(A @ B))
print("det(A)det(B) =", np.linalg.det(A) * np.linalg.det(B))

print("Exercise 8 solution complete.")

Exercise 9 (★★★): Attention as Matrix Multiplication

For query, key, and value matrices $Q,K,V$ , scaled dot-product attention is

A=\operatorname{softmax}\left(\frac{QK^\top}{\sqrt{d}}\right),\qquad O=AV.

Verify all shapes and show that each row of $A$ sums to one.

Code cell 29

# Your Solution
# Exercise 9 - learner workspace
# Compute attention scores, weights, and output shapes.
print("Learner workspace ready for Exercise 9.")

Code cell 30

# Solution
# Exercise 9 - attention matrix products
header("Exercise 9: attention matrix products")

Q = np.array([[1.0, 0.0], [0.5, 1.0], [-0.5, 0.2]])
K = np.array([[0.2, 1.0], [1.0, -0.3], [0.0, 0.5], [0.7, 0.7]])
V = np.array([[1.0, 0.0], [0.0, 2.0], [1.0, 1.0], [-1.0, 0.5]])
S = Q @ K.T / np.sqrt(Q.shape[1])
A = softmax(S, axis=1)
O = A @ V
print("scores shape:", S.shape, "weights shape:", A.shape, "output shape:", O.shape)
check_close("attention rows sum to 1", A.sum(axis=1), np.ones(Q.shape[0]), tol=1e-12)
check_true("output has query length and value width", O.shape == (Q.shape[0], V.shape[1]))
print("Takeaway: attention is a sequence of ordinary matrix products plus row-wise normalization.")

Exercise 10 (★★★): Low-Rank Adapter Update

A LoRA-style update writes

W_{\text{new}}=W + BA,

where $B\in\mathbb{R}^{m\times r}$ and $A\in\mathbb{R}^{r\times n}$ . Verify that the update rank is at most $r$ and compare parameter counts.

Code cell 32

# Your Solution
# Exercise 10 - learner workspace
# Form BA and compare rank and parameter counts.
print("Learner workspace ready for Exercise 10.")

Code cell 33

# Solution
# Exercise 10 - LoRA low-rank update
header("Exercise 10: low-rank adapter update")

m, n, r = 6, 5, 2
rng = np.random.default_rng(42)
W = rng.normal(size=(m, n))
B = rng.normal(size=(m, r))
A = rng.normal(size=(r, n))
Delta = B @ A
print("rank(Delta):", la.matrix_rank(Delta), "rank bound:", r)
print("full params:", m*n, "LoRA params:", m*r + r*n)
check_true("rank bound", la.matrix_rank(Delta) <= r)
check_true("adapter has fewer params", m*r + r*n < m*n)
print("Takeaway: low-rank matrix multiplication is a parameter-efficient update family.")