Systems of Equations

Exercises Notebook

Converted from exercises.ipynb for web reading.

Systems of Equations - Exercises

10 graded exercises covering the full linear algebra basics arc, from computation to ML-facing matrix workflows.

Format	Description
Problem	Markdown cell with task description
Your Solution	Code cell for learner work
Solution	Reference solution with checks

Difficulty: straightforward -> moderate -> challenging.

Code cell 2

import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

try:
    import seaborn as sns
    sns.set_theme(style="whitegrid", palette="colorblind")
    HAS_SNS = True
except ImportError:
    plt.style.use("seaborn-v0_8-whitegrid")
    HAS_SNS = False

mpl.rcParams.update({
    "figure.figsize":    (10, 6),
    "figure.dpi":         120,
    "font.size":           13,
    "axes.titlesize":      15,
    "axes.labelsize":      13,
    "xtick.labelsize":     11,
    "ytick.labelsize":     11,
    "legend.fontsize":     11,
    "legend.framealpha":   0.85,
    "lines.linewidth":      2.0,
    "axes.spines.top":     False,
    "axes.spines.right":   False,
    "savefig.bbox":       "tight",
    "savefig.dpi":         150,
})
np.random.seed(42)
print("Plot setup complete.")

Code cell 3

import numpy as np
import numpy.linalg as la
import scipy.linalg as sla
from scipy import stats

COLORS = {
    "primary": "#0077BB",
    "secondary": "#EE7733",
    "tertiary": "#009988",
    "error": "#CC3311",
    "neutral": "#555555",
    "highlight": "#EE3377",
}
HAS_MPL = True
np.set_printoptions(precision=8, suppress=True)
np.random.seed(42)

def header(title):
    print("\n" + "=" * len(title))
    print(title)
    print("=" * len(title))

def check_true(name, cond):
    ok = bool(cond)
    print(f"{'PASS' if ok else 'FAIL'} - {name}")
    return ok

def check_close(name, got, expected, tol=1e-8):
    ok = np.allclose(got, expected, atol=tol, rtol=tol)
    print(f"{'PASS' if ok else 'FAIL'} - {name}: got {got}, expected {expected}")
    return ok

def check(name, got, expected, tol=1e-8):
    return check_close(name, got, expected, tol=tol)

def softmax(z, axis=-1, tau=1.0):
    z = np.asarray(z, dtype=float) / float(tau)
    z = z - np.max(z, axis=axis, keepdims=True)
    e = np.exp(z)
    return e / np.sum(e, axis=axis, keepdims=True)

def cosine_similarity(a, b):
    a = np.asarray(a, dtype=float); b = np.asarray(b, dtype=float)
    return float(a @ b / (la.norm(a) * la.norm(b) + 1e-12))

def numerical_rank(A, tol=1e-10):
    return int(np.sum(la.svd(A, compute_uv=False) > tol))

def orthonormal_basis(A, tol=1e-10):
    Q, R = la.qr(A)
    keep = np.abs(np.diag(R)) > tol
    return Q[:, keep]

def null_space(A, tol=1e-10):
    U, S, Vt = la.svd(A)
    return Vt[S.size:,:].T if S.size < Vt.shape[0] else Vt[S <= tol,:].T



# Compatibility helpers used by the Chapter 02 theory and exercise cells.
def null_space(A, tol=1e-10):
    A = np.asarray(A, dtype=float)
    U, S, Vt = la.svd(A, full_matrices=True)
    rank = int(np.sum(S > tol))
    return Vt[rank:].T

svd_null_space = null_space

def gram_schmidt(vectors, tol=1e-10):
    A = np.asarray(vectors, dtype=float)
    if A.ndim == 1:
        A = A.reshape(1, -1)
    basis = []
    for v in A:
        w = v.astype(float).copy()
        for q in basis:
            w = w - np.dot(w, q) * q
        norm = la.norm(w)
        if norm > tol:
            basis.append(w / norm)
    return np.array(basis)

def projection_matrix_from_columns(A, tol=1e-10):
    Q = orthonormal_basis(np.asarray(A, dtype=float), tol=tol)
    return Q @ Q.T


def random_unit_vectors(n, d):
    X = np.random.randn(n, d)
    return X / np.maximum(la.norm(X, axis=1, keepdims=True), 1e-12)

def pairwise_distances(X):
    X = np.asarray(X, dtype=float)
    diff = X[:, None, :] - X[None, :, :]
    return la.norm(diff, axis=-1)


def normalize(x, axis=None, tol=1e-12):
    x = np.asarray(x, dtype=float)
    norm = la.norm(x, axis=axis, keepdims=True)
    return x / np.maximum(norm, tol)

def frobenius_inner(A, B):
    return float(np.sum(np.asarray(A, dtype=float) * np.asarray(B, dtype=float)))

def outer_sum_product(A, B):
    A = np.asarray(A, dtype=float)
    B = np.asarray(B, dtype=float)
    return sum(np.outer(A[:, k], B[k, :]) for k in range(A.shape[1]))

def softmax_rows(X):
    return softmax(X, axis=1)

def col_space(A, tol=1e-10):
    return orthonormal_basis(np.asarray(A, dtype=float), tol=tol)

def row_space(A, tol=1e-10):
    return orthonormal_basis(np.asarray(A, dtype=float).T, tol=tol).T

def rref(A, tol=1e-10):
    R = np.array(A, dtype=float, copy=True)
    m, n = R.shape
    pivots = []
    row = 0
    for col in range(n):
        pivot = row + int(np.argmax(np.abs(R[row:, col]))) if row < m else row
        if row >= m or abs(R[pivot, col]) <= tol:
            continue
        if pivot != row:
            R[[row, pivot]] = R[[pivot, row]]
        R[row] = R[row] / R[row, col]
        for r in range(m):
            if r != row:
                R[r] = R[r] - R[r, col] * R[row]
        pivots.append(col)
        row += 1
        if row == m:
            break
    R[np.abs(R) < tol] = 0.0
    return R, pivots

def nullspace_basis(A, tol=1e-10):
    A = np.asarray(A, dtype=float)
    U, S, Vt = la.svd(A, full_matrices=True)
    rank = int(np.sum(S > tol))
    return Vt[rank:].T, rank

print("Chapter helper setup complete.")

Exercise 1: Classify the System Type *

For a system $Ax=b$ , classify it as unique, infinite, or inconsistent using the rank conditions.

Task:

Implement classify_system(A, b) using rank logic
Test it on the three canonical cases

What this is training: reading solution structure from algebra before trying to solve numerically.

Code cell 5

# Your Solution
# Exercise 1 - learner workspace
# Write your solution here, then run the reference solution below to compare.
print("Learner workspace ready for Exercise 1.")

Code cell 6

# Solution
# Exercise 1 - reference solution

A_unique = np.array([[2.0, 1.0], [1.0, -1.0]])
b_unique = np.array([5.0, 1.0])
A_inf = np.array([[1.0, 2.0], [2.0, 4.0]])
b_inf = np.array([3.0, 6.0])
A_none = np.array([[1.0, 2.0], [2.0, 4.0]])
b_none = np.array([3.0, 7.0])

def classify_system(A, b):
    A = np.array(A, dtype=float)
    b = np.array(b, dtype=float).reshape(-1, 1)
    aug = np.hstack([A, b])
    rank_A = np.linalg.matrix_rank(A)
    rank_aug = np.linalg.matrix_rank(aug)
    n = A.shape[1]

    if rank_A < rank_aug:
        return 'inconsistent'
    if rank_A == n:
        return 'unique'
    return 'infinite'


header('Exercise 1 checks')
check_true('unique case', classify_system(A_unique, b_unique) == 'unique')
check_true('infinite case', classify_system(A_inf, b_inf) == 'infinite')
check_true('inconsistent case', classify_system(A_none, b_none) == 'inconsistent')
print('\nTakeaway: rank(A) versus rank([A|b]) is the full classification rule.')

print("Exercise 1 solution complete.")

Exercise 2: Row Reduction to RREF *

Implement a small RREF routine for augmented matrices.

Task:

write rref(M)
return both the reduced matrix and the pivot columns
apply it to an underdetermined system and identify free columns

Why this matters: RREF turns an equation system into an explicit map from pivots to free variables.

Code cell 8

# Your Solution
# Exercise 2 - learner workspace
# Write your solution here, then run the reference solution below to compare.
print("Learner workspace ready for Exercise 2.")

Code cell 9

# Solution
# Exercise 2 - reference solution

A = np.array([
    [1.0, 2.0, 0.0, 1.0],
    [0.0, 0.0, 1.0, -1.0],
    [0.0, 0.0, 0.0, 0.0],
])
b = np.array([3.0, 2.0, 0.0])
M = np.column_stack([A, b])

def rref(M, tol=1e-12):
    M = np.array(M, dtype=float).copy()
    rows, cols = M.shape
    pivot_cols = []
    r = 0

    for c in range(cols):
        if r >= rows:
            break
        pivot = r + np.argmax(np.abs(M[r:, c]))
        if abs(M[pivot, c]) < tol:
            continue
        if pivot != r:
            M[[r, pivot]] = M[[pivot, r]]
        M[r] = M[r] / M[r, c]
        for i in range(rows):
            if i != r and abs(M[i, c]) > tol:
                M[i] -= M[i, c] * M[r]
        M[np.abs(M) < tol] = 0.0
        pivot_cols.append(c)
        r += 1

    return M, pivot_cols


R, pivots = rref(M)
free_cols = [j for j in range(M.shape[1] - 1) if j not in pivots]

header('Exercise 2 checks')
print('RREF([A|b]) =\n', R)
print('pivot columns:', pivots)
print('free columns :', free_cols)
check_true('two pivot columns', pivots == [0, 2])
check_true('free columns are 1 and 3', free_cols == [1, 3])
print('\nTakeaway: pivot structure, not raw shape, determines free-variable count.')

print("Exercise 2 solution complete.")

Exercise 3: Particular Solution Plus Null Space *

Use the system from Exercise 2 and recover:

one particular solution $x_p$
a basis for $\operatorname{null}(A)$
one other exact solution of the form $x_p + x_h$

Goal: make the formula general solution = particular + homogeneous concrete.

Code cell 11

# Your Solution
# Exercise 3 - learner workspace
# Write your solution here, then run the reference solution below to compare.
print("Learner workspace ready for Exercise 3.")

Code cell 12

# Solution
# Exercise 3 - reference solution

A = np.array([
    [1.0, 2.0, 0.0, 1.0],
    [0.0, 0.0, 1.0, -1.0],
    [0.0, 0.0, 0.0, 0.0],
])
b = np.array([3.0, 2.0, 0.0])

def nullspace(A, tol=1e-12):
    U, s, Vt = np.linalg.svd(A)
    rank = np.sum(s > tol)
    return Vt[rank:].T


x_p = np.linalg.lstsq(A, b, rcond=None)[0]
N = nullspace(A)
coeffs = np.array([0.5, -1.25])[:N.shape[1]]
x_alt = x_p + N @ coeffs

header('Exercise 3 checks')
print('particular solution x_p =', x_p)
print('nullspace basis =\n', N)
print('alternative exact solution =', x_alt)
check_close('A x_p = b', A @ x_p, b)
check_close('A x_alt = b', A @ x_alt, b)
check_true('rank + nullity = 4', np.linalg.matrix_rank(A) + N.shape[1] == 4)
print('\nTakeaway: all exact solutions differ by a null-space direction.')

print("Exercise 3 solution complete.")

Exercise 4: Forward and Back Substitution *

Implement the two cheap inner-loop solvers used everywhere in LU and Cholesky workflows.

Task:

forward_substitution(L, b) for lower triangular systems
back_substitution(U, b) for upper triangular systems

Why this matters: factorization is the expensive part; triangular solves are the reusable part.

Code cell 14

# Your Solution
# Exercise 4 - learner workspace
# Write your solution here, then run the reference solution below to compare.
print("Learner workspace ready for Exercise 4.")

Code cell 15

# Solution
# Exercise 4 - reference solution

L = np.array([
    [2.0, 0.0, 0.0],
    [1.0, 3.0, 0.0],
    [-1.0, 2.0, 4.0],
])
U = np.array([
    [3.0, -1.0, 2.0],
    [0.0, 2.0, 1.0],
    [0.0, 0.0, -4.0],
])
b1 = L @ np.array([1.0, -2.0, 0.5])
b2 = U @ np.array([2.0, -1.0, 0.25])

def forward_substitution(L, b):
    L = np.array(L, dtype=float)
    b = np.array(b, dtype=float)
    x = np.zeros_like(b)
    for i in range(len(b)):
        x[i] = (b[i] - L[i, :i] @ x[:i]) / L[i, i]
    return x


def back_substitution(U, b):
    U = np.array(U, dtype=float)
    b = np.array(b, dtype=float)
    x = np.zeros_like(b)
    for i in range(len(b) - 1, -1, -1):
        x[i] = (b[i] - U[i, i + 1:] @ x[i + 1:]) / U[i, i]
    return x


x_forward = forward_substitution(L, b1)
x_back = back_substitution(U, b2)

header('Exercise 4 checks')
check_close('forward substitution', L @ x_forward, b1)
check_close('back substitution', U @ x_back, b2)
print('x_forward =', x_forward)
print('x_back    =', x_back)
print('\nTakeaway: direct solvers are built from these small triangular kernels.')

print("Exercise 4 solution complete.")

Exercise 5: Gaussian Elimination with Partial Pivoting **

Write a solver for a square system using elimination plus back substitution.

Task:

perform partial pivoting each column
eliminate below the pivot
recover the solution by back substitution

AI connection: this is the direct-solve primitive behind many exact medium-size linear algebra workflows.

Code cell 17

# Your Solution
# Exercise 5 - learner workspace
# Write your solution here, then run the reference solution below to compare.
print("Learner workspace ready for Exercise 5.")

Code cell 18

# Solution
# Exercise 5 - reference solution

A5 = np.array([
    [0.0, 2.0, 1.0],
    [3.0, -1.0, 2.0],
    [1.0, 1.0, 1.0],
])
b5 = A5 @ np.array([1.0, -2.0, 3.0])

def gaussian_elimination(A, b, tol=1e-12):
    A = np.array(A, dtype=float).copy()
    b = np.array(b, dtype=float).copy()
    n = len(b)

    for k in range(n - 1):
        pivot = k + np.argmax(np.abs(A[k:, k]))
        if abs(A[pivot, k]) < tol:
            raise ValueError('matrix is singular to working precision')
        if pivot != k:
            A[[k, pivot]] = A[[pivot, k]]
            b[[k, pivot]] = b[[pivot, k]]
        for i in range(k + 1, n):
            m = A[i, k] / A[k, k]
            A[i, k:] -= m * A[k, k:]
            b[i] -= m * b[k]

    return back_substitution(A, b)


x5 = gaussian_elimination(A5, b5)
header('Exercise 5 checks')
print('solution =', x5)
check_close('A x = b', A5 @ x5, b5)
check_close('matches np.linalg.solve', x5, np.linalg.solve(A5, b5))
print('\nTakeaway: elimination becomes a practical solver only once pivoting is handled carefully.')

print("Exercise 5 solution complete.")

Exercise 6: Least Squares and Ridge **

Fit a line to noisy data with both ordinary least squares and ridge regression.

Task:

solve the normal equations
compute the residual
verify orthogonality of the residual to the column space
compare with ridge regularization

What to notice: least squares is projection; ridge changes both geometry and conditioning.

Code cell 20

# Your Solution
# Exercise 6 - learner workspace
# Write your solution here, then run the reference solution below to compare.
print("Learner workspace ready for Exercise 6.")

Code cell 21

# Solution
# Exercise 6 - reference solution

t = np.array([0.0, 1.0, 2.0, 3.0, 4.0])
X = np.column_stack([np.ones_like(t), t])
y = np.array([1.0, 2.2, 2.9, 4.1, 4.8])
lam = 0.5

XtX = X.T @ X
Xty = X.T @ y
w_ols = np.linalg.solve(XtX, Xty)
residual = y - X @ w_ols
w_ridge = np.linalg.solve(XtX + lam * np.eye(X.shape[1]), Xty)

header('Exercise 6 checks')
print('OLS solution   =', w_ols)
print('Ridge solution =', w_ridge)
print('Residual       =', residual)
check_close('X^T residual = 0', X.T @ residual, np.zeros(X.shape[1]))
check_true('ridge shrinks parameter norm', np.linalg.norm(w_ridge) < np.linalg.norm(w_ols))
print('\nTakeaway: OLS finds the projection; ridge trades exact projection for better-controlled parameters.')

print("Exercise 6 solution complete.")

Exercise 7: Conditioning and Sensitivity **

A system can be exactly solvable and still be numerically fragile.

Task:

compute the condition number of an ill-conditioned matrix
solve with one right-hand side
perturb the right-hand side slightly
measure the amplification in the solution

AI connection: this is the same phenomenon behind unstable Hessian solves and fragile kernel systems.

Code cell 23

# Your Solution
# Exercise 7 - learner workspace
# Write your solution here, then run the reference solution below to compare.
print("Learner workspace ready for Exercise 7.")

Code cell 24

# Solution
# Exercise 7 - reference solution

A = np.array([[1.0, 1.0], [1.0, 1.00001]])
b = np.array([2.0, 2.00001])
b2 = np.array([2.0, 2.00002])

condA = np.linalg.cond(A)
x = np.linalg.solve(A, b)
x2 = np.linalg.solve(A, b2)
rhs_rel = np.linalg.norm(b2 - b) / np.linalg.norm(b)
x_rel = np.linalg.norm(x2 - x) / np.linalg.norm(x)

header('Exercise 7 checks')
print('det(A)     =', np.linalg.det(A))
print('cond_2(A)  =', condA)
print('x          =', x)
print('x perturbed=', x2)
print('relative rhs change =', rhs_rel)
print('relative x change   =', x_rel)
print('amplification       =', x_rel / rhs_rel)
check_true('system is ill-conditioned', condA > 1e5)
print('\nTakeaway: exact solvability says nothing about numerical safety.')

print("Exercise 7 solution complete.")

Exercise 8: Jacobi and Gauss-Seidel **

Compare two stationary iterative methods on a small SPD system.

Task:

implement jacobi_steps and gauss_seidel_steps
run 3 iterations from the zero vector
record the residual after each step

What to notice: using fresh values usually improves convergence.

Code cell 26

# Your Solution
# Exercise 8 - learner workspace
# Write your solution here, then run the reference solution below to compare.
print("Learner workspace ready for Exercise 8.")

Code cell 27

# Solution
# Exercise 8 - reference solution

A = np.array([[4.0, 1.0], [1.0, 3.0]])
b = np.array([1.0, 2.0])
x0 = np.zeros_like(b)

def jacobi_steps(A, b, x0, steps):
    D = np.diag(np.diag(A))
    R = A - D
    x = x0.copy()
    hist = []
    for _ in range(steps):
        x = np.linalg.solve(D, b - R @ x)
        hist.append((x.copy(), np.linalg.norm(b - A @ x)))
    return hist


def gauss_seidel_steps(A, b, x0, steps):
    x = x0.copy()
    hist = []
    for _ in range(steps):
        for i in range(len(b)):
            x[i] = (b[i] - A[i, :i] @ x[:i] - A[i, i+1:] @ x[i+1:]) / A[i, i]
        hist.append((x.copy(), np.linalg.norm(b - A @ x)))
    return hist


jac_hist = jacobi_steps(A, b, x0, 3)
gs_hist = gauss_seidel_steps(A, b, x0, 3)

header('Exercise 8 checks')
print('Jacobi history:')
for k, (xk, rk) in enumerate(jac_hist, start=1):
    print(f'  iter {k}: x = {xk}, residual = {rk:.6e}')
print('\nGauss-Seidel history:')
for k, (xk, rk) in enumerate(gs_hist, start=1):
    print(f'  iter {k}: x = {xk}, residual = {rk:.6e}')
check_true('Gauss-Seidel residual is smaller by iter 3', gs_hist[-1][1] < jac_hist[-1][1])
print('\nTakeaway: structure-aware reuse of fresh information speeds convergence.')

print("Exercise 8 solution complete.")

Exercise 9 (★★★): Least Squares as an Overdetermined System

For $Ax\approx b$ with more equations than unknowns, solve

\min_x \|Ax-b\|_2^2

using both normal equations and np.linalg.lstsq, then compare residual orthogonality.

Code cell 29

# Your Solution
# Exercise 9 - learner workspace
# Solve least squares and check A.T @ residual = 0.
print("Learner workspace ready for Exercise 9.")

Code cell 30

# Solution
# Exercise 9 - least squares system
header("Exercise 9: least squares")

A = np.array([[1.0, 0.0], [1.0, 1.0], [1.0, 2.0], [1.0, 3.0]])
b = np.array([1.0, 2.0, 1.9, 3.2])
x_normal = la.solve(A.T @ A, A.T @ b)
x_lstsq = la.lstsq(A, b, rcond=None)[0]
resid = A @ x_lstsq - b
print("normal equations:", x_normal)
print("lstsq:", x_lstsq)
check_close("solutions agree", x_normal, x_lstsq, tol=1e-10)
check_close("normal-equation residual orthogonal", A.T @ resid, np.zeros(A.shape[1]), tol=1e-10)
print("Takeaway: least squares solves the nearest consistent system in column space.")

Exercise 10 (★★★): Iterative Refinement for a Linear System

Solve an ill-conditioned system $Ax=b$ , compute the residual $r=b-Ax$ , then solve $A\Delta x=r$ and update $x\leftarrow x+\Delta x$ .

Show that refinement can reduce residual error.

Code cell 32

# Your Solution
# Exercise 10 - learner workspace
# Implement one step of iterative refinement.
print("Learner workspace ready for Exercise 10.")

Code cell 33

# Solution
# Exercise 10 - iterative refinement
header("Exercise 10: iterative refinement")

A = np.array([[1.0, 0.999], [0.999, 0.998]])
x_true = np.array([1.0, -1.0])
b = A @ x_true
x0 = la.solve(A.astype(np.float32), b.astype(np.float32)).astype(float)
r0 = b - A @ x0
dx = la.solve(A, r0)
x1 = x0 + dx
r1 = b - A @ x1
print("initial x:", x0, "residual norm:", la.norm(r0))
print("refined x:", x1, "residual norm:", la.norm(r1))
check_true("residual decreases", la.norm(r1) < la.norm(r0))
check_close("refined solution near truth", x1, x_true, tol=1e-6)
print("Takeaway: residual correction can recover accuracy lost to finite precision.")