Theory NotebookMath for LLMs

Determinants

Linear Algebra Basics / Determinants

Run notebook
Theory Notebook

Theory Notebook

Converted from theory.ipynb for web reading.

Determinants — From Volume Scaling to Log-Det in AI

A determinant is not just a formula for small matrices. It is the scalar that tells you whether a linear map preserves dimension, how it rescales volume, and why flow models and Gaussian likelihoods need log-determinants.

This notebook is the interactive companion to notes.md. It follows a teaching-first path rather than mirroring the note section-by-section.

BlockTopicWhat you will build
1Geometry firstsigned area, volume scaling, and orientation
2Computationdirect formulas, elimination-based determinant computation
3Invertibilitydeterminant, rank, and condition number together
4Spectral bridgecharacteristic polynomial, eigenvalues, Cayley-Hamilton
5Cofactorsadjugate identity and inverse recovery
6Geometry in higher dimensionGram determinants and special transformations
7Special classesSPD, orthogonal, and Vandermonde determinants
8Determinant identitiesmatrix determinant lemma, Sylvester, Schur complement
9ML log-detcoupling-layer Jacobians, Gaussian log-likelihood, tiny GP term
10Diversity and low rankDPP-style diversity scores and LoRA-style determinant updates

This notebook was shaped using the repository skill guide, the local Number Systems and Systems of Equations notebook style, and standard public references such as MIT 18.06, Stanford EE263, Real NVP, Glow, FFJORD, GPyTorch, and the DPP monograph by Kulesza and Taskar.


Code cell 3

import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

try:
    import seaborn as sns
    sns.set_theme(style="whitegrid", palette="colorblind")
    HAS_SNS = True
except ImportError:
    plt.style.use("seaborn-v0_8-whitegrid")
    HAS_SNS = False

mpl.rcParams.update({
    "figure.figsize":    (10, 6),
    "figure.dpi":         120,
    "font.size":           13,
    "axes.titlesize":      15,
    "axes.labelsize":      13,
    "xtick.labelsize":     11,
    "ytick.labelsize":     11,
    "legend.fontsize":     11,
    "legend.framealpha":   0.85,
    "lines.linewidth":      2.0,
    "axes.spines.top":     False,
    "axes.spines.right":   False,
    "savefig.bbox":       "tight",
    "savefig.dpi":         150,
})
np.random.seed(42)
print("Plot setup complete.")

Code cell 4

import numpy as np
import numpy.linalg as la
import scipy.linalg as sla
from scipy import stats

COLORS = {
    "primary": "#0077BB",
    "secondary": "#EE7733",
    "tertiary": "#009988",
    "error": "#CC3311",
    "neutral": "#555555",
    "highlight": "#EE3377",
}
HAS_MPL = True
np.set_printoptions(precision=8, suppress=True)
np.random.seed(42)

def header(title):
    print("\n" + "=" * len(title))
    print(title)
    print("=" * len(title))

def check_true(name, cond):
    ok = bool(cond)
    print(f"{'PASS' if ok else 'FAIL'} - {name}")
    return ok

def check_close(name, got, expected, tol=1e-8):
    ok = np.allclose(got, expected, atol=tol, rtol=tol)
    print(f"{'PASS' if ok else 'FAIL'} - {name}: got {got}, expected {expected}")
    return ok

def check(name, got, expected, tol=1e-8):
    return check_close(name, got, expected, tol=tol)

def softmax(z, axis=-1, tau=1.0):
    z = np.asarray(z, dtype=float) / float(tau)
    z = z - np.max(z, axis=axis, keepdims=True)
    e = np.exp(z)
    return e / np.sum(e, axis=axis, keepdims=True)

def cosine_similarity(a, b):
    a = np.asarray(a, dtype=float); b = np.asarray(b, dtype=float)
    return float(a @ b / (la.norm(a) * la.norm(b) + 1e-12))

def numerical_rank(A, tol=1e-10):
    return int(np.sum(la.svd(A, compute_uv=False) > tol))

def orthonormal_basis(A, tol=1e-10):
    Q, R = la.qr(A)
    keep = np.abs(np.diag(R)) > tol
    return Q[:, keep]

def null_space(A, tol=1e-10):
    U, S, Vt = la.svd(A)
    return Vt[S.size:,:].T if S.size < Vt.shape[0] else Vt[S <= tol,:].T



# Compatibility helpers used by the Chapter 02 theory and exercise cells.
def null_space(A, tol=1e-10):
    A = np.asarray(A, dtype=float)
    U, S, Vt = la.svd(A, full_matrices=True)
    rank = int(np.sum(S > tol))
    return Vt[rank:].T

svd_null_space = null_space

def gram_schmidt(vectors, tol=1e-10):
    A = np.asarray(vectors, dtype=float)
    if A.ndim == 1:
        A = A.reshape(1, -1)
    basis = []
    for v in A:
        w = v.astype(float).copy()
        for q in basis:
            w = w - np.dot(w, q) * q
        norm = la.norm(w)
        if norm > tol:
            basis.append(w / norm)
    return np.array(basis)

def projection_matrix_from_columns(A, tol=1e-10):
    Q = orthonormal_basis(np.asarray(A, dtype=float), tol=tol)
    return Q @ Q.T


def random_unit_vectors(n, d):
    X = np.random.randn(n, d)
    return X / np.maximum(la.norm(X, axis=1, keepdims=True), 1e-12)

def pairwise_distances(X):
    X = np.asarray(X, dtype=float)
    diff = X[:, None, :] - X[None, :, :]
    return la.norm(diff, axis=-1)


def normalize(x, axis=None, tol=1e-12):
    x = np.asarray(x, dtype=float)
    norm = la.norm(x, axis=axis, keepdims=True)
    return x / np.maximum(norm, tol)

def frobenius_inner(A, B):
    return float(np.sum(np.asarray(A, dtype=float) * np.asarray(B, dtype=float)))

def outer_sum_product(A, B):
    A = np.asarray(A, dtype=float)
    B = np.asarray(B, dtype=float)
    return sum(np.outer(A[:, k], B[k, :]) for k in range(A.shape[1]))

def softmax_rows(X):
    return softmax(X, axis=1)

def col_space(A, tol=1e-10):
    return orthonormal_basis(np.asarray(A, dtype=float), tol=tol)

def row_space(A, tol=1e-10):
    return orthonormal_basis(np.asarray(A, dtype=float).T, tol=tol).T

def rref(A, tol=1e-10):
    R = np.array(A, dtype=float, copy=True)
    m, n = R.shape
    pivots = []
    row = 0
    for col in range(n):
        pivot = row + int(np.argmax(np.abs(R[row:, col]))) if row < m else row
        if row >= m or abs(R[pivot, col]) <= tol:
            continue
        if pivot != row:
            R[[row, pivot]] = R[[pivot, row]]
        R[row] = R[row] / R[row, col]
        for r in range(m):
            if r != row:
                R[r] = R[r] - R[r, col] * R[row]
        pivots.append(col)
        row += 1
        if row == m:
            break
    R[np.abs(R) < tol] = 0.0
    return R, pivots

def nullspace_basis(A, tol=1e-10):
    A = np.asarray(A, dtype=float)
    U, S, Vt = la.svd(A, full_matrices=True)
    rank = int(np.sum(S > tol))
    return Vt[rank:].T, rank

print("Chapter helper setup complete.")

1. Signed Area, Orientation, and the 2x2 Determinant

The fastest way to understand determinants is to stop thinking of them as mysterious scalar outputs and start thinking of them as signed area / volume scaling factors.

For two vectors u,vR2u, v \in \mathbb{R}^2, the matrix A=[u  v]A = [u\;v] has determinant

det(A)=u1v2u2v1.\det(A) = u_1 v_2 - u_2 v_1.

Its absolute value is the area of the parallelogram spanned by uu and vv. Its sign tells you orientation:

  • positive: orientation preserved
  • negative: orientation reversed
  • zero: the vectors are dependent, so the parallelogram collapses

1. Intuition — Signed Area and Orientation

Code cell 7

# ======================================================================
# 1.1 Signed area and orientation in 2D
# ======================================================================

def signed_area_2d(u, v):
    A = np.column_stack([u, v])
    return np.linalg.det(A)


examples = {
    'positive orientation': (np.array([3.0, 1.0]), np.array([1.0, 2.0])),
    'negative orientation': (np.array([3.0, 1.0]), np.array([1.0, -2.0])),
    'collapsed / dependent': (np.array([2.0, 1.0]), np.array([4.0, 2.0])),
}

for name, (u, v) in examples.items():
    area = signed_area_2d(u, v)
    print(f"{name:>22}: det([u v]) = {area: .6f} | area = {abs(area):.6f}")

u = np.array([3.0, 1.0])
v = np.array([1.0, 2.0])
A = np.column_stack([u, v])
unit_square = np.array([[0.0, 0.0], [1.0, 0.0], [1.0, 1.0], [0.0, 1.0]])
mapped = unit_square @ A.T

print("\nMatrix A = [u v]:\n", A)
print("Mapped unit-square corners:\n", mapped)
print("Interpretation: |det(A)| is the area scaling factor from the unit square to the image parallelogram.")

2. From Formula to Computation

For tiny matrices, explicit formulas are useful. For real computation, elimination wins.

Three levels matter:

  1. closed form for 2×22 \times 2 and 3×33 \times 3
  2. cofactor expansion for theory and hand work on small sparse matrices
  3. row reduction / LU-style elimination for anything practical

2. Computing Determinants — Small Matrices

Code cell 10

# ======================================================================
# 2.1 Small-matrix formulas
# ======================================================================

def det_2x2(A):
    A = np.array(A, dtype=float)
    return A[0, 0] * A[1, 1] - A[0, 1] * A[1, 0]


def det_3x3_sarrus(A):
    A = np.array(A, dtype=float)
    a, b, c = A[0]
    d, e, f = A[1]
    g, h, i = A[2]
    return a * e * i + b * f * g + c * d * h - c * e * g - b * d * i - a * f * h


A2 = np.array([[5.0, 3.0], [2.0, 4.0]])
A3 = np.array([[1.0, 2.0, 3.0], [0.0, 4.0, 1.0], [2.0, 1.0, 0.0]])

print("2x2 formula vs NumPy:")
print(det_2x2(A2), np.linalg.det(A2))

print("\n3x3 Sarrus vs NumPy:")
print(det_3x3_sarrus(A3), np.linalg.det(A3))

assert np.isclose(det_2x2(A2), np.linalg.det(A2))
assert np.isclose(det_3x3_sarrus(A3), np.linalg.det(A3))

2. Computing Determinants — Via Elimination

Code cell 12

# ======================================================================
# 2.2 Determinant via elimination (practical path)
# ======================================================================

def det_via_elimination(A):
    A = np.array(A, dtype=float).copy()
    n = A.shape[0]
    sign = 1.0

    for k in range(n):
        pivot = k + np.argmax(np.abs(A[k:, k]))
        if np.isclose(A[pivot, k], 0.0):
            return 0.0, A

        if pivot != k:
            A[[k, pivot]] = A[[pivot, k]]
            sign *= -1.0

        for i in range(k + 1, n):
            factor = A[i, k] / A[k, k]
            A[i, k:] -= factor * A[k, k:]

    return sign * np.prod(np.diag(A)), A


A = np.array([[1.0, 2.0, 0.0, 1.0], [2.0, 1.0, 1.0, 0.0], [0.0, 3.0, 2.0, 1.0], [1.0, 0.0, 1.0, 2.0]])
det_elim, U = det_via_elimination(A)
det_np = np.linalg.det(A)

print("Upper-triangular form produced by elimination:\n", U)
print(f"\nDeterminant via elimination: {det_elim:.6f}")
print(f"Determinant via NumPy:       {det_np:.6f}")
print("\nKey idea: after elimination, determinant is the signed product of pivots.")

assert np.isclose(det_elim, det_np)

3. Determinant, Rank, Invertibility, and Conditioning

A determinant tells you whether a square matrix is invertible. It does not tell you whether that matrix is numerically well-conditioned.

This distinction matters in ML because tiny determinants can arise simply from scale, while bad conditioning comes from a large singular-value ratio.


3. Determinant, Rank, and Conditioning

Code cell 15

# ======================================================================
# 3.1 det, rank, and condition number are related but different
# ======================================================================

matrices = {
    'invertible': np.array([[2.0, 1.0], [1.0, 3.0]]),
    'singular': np.array([[1.0, 2.0], [2.0, 4.0]]),
    'tiny determinant, well-conditioned': 0.1 * np.eye(4),
    'ill-conditioned': np.array([[1.0, 1.0], [1.0, 1.000001]]),
}

for name, M in matrices.items():
    det = np.linalg.det(M)
    rank = np.linalg.matrix_rank(M)
    try:
        cond = np.linalg.cond(M)
    except np.linalg.LinAlgError:
        cond = np.inf
    print(f"{name:>32}: det = {det: .6e}, rank = {rank}, cond = {cond: .6e}")

print("\nTakeaway:")
print("- det = 0 is the exact symbolic singularity test for square matrices")
print("- condition number diagnoses numerical fragility")
print("- a small determinant alone does not imply bad conditioning")

4. The Spectral Bridge: Characteristic Polynomial and Cayley-Hamilton

Determinant theory becomes spectral theory when we ask:

det(λIA)=0.\det(\lambda I - A) = 0.

The roots of that polynomial are the eigenvalues. This is the transition point from scalar matrix summaries to full decomposition theory.


4. Characteristic Polynomial and Eigenvalues

Code cell 18

# ======================================================================
# 4.1 Characteristic polynomial, eigenvalues, and Cayley-Hamilton
# ======================================================================

A = np.array([[4.0, 2.0], [1.0, 3.0]])
trace_A = np.trace(A)
det_A = np.linalg.det(A)

def p(lam):
    return lam**2 - trace_A * lam + det_A

eigvals, eigvecs = np.linalg.eig(A)
resolvent_point = 6.0  # pick a lambda outside the spectrum so (lambda I - A) is invertible
resolvent = np.linalg.inv(resolvent_point * np.eye(2) - A)

print("A =\n", A)
print(f"trace(A) = {trace_A:.6f}")
print(f"det(A)   = {det_A:.6f}")
print("eigenvalues:", eigvals)
print("check p(lambda_i):", [p(lam) for lam in eigvals])

cayley_hamilton = A @ A - trace_A * A + det_A * np.eye(2)
print("\nCayley-Hamilton residual A^2 - tr(A)A + det(A)I =\n", cayley_hamilton)
print(f"\nResolvent at lambda = {resolvent_point:g}:")
print(resolvent)

assert np.allclose(cayley_hamilton, np.zeros((2, 2)))
assert np.allclose([p(lam) for lam in eigvals], [0.0, 0.0])

5. Cofactors, Adjugate, and Explicit Inverses

Cofactors are usually introduced as a hand-computation device. But they also encode a deep identity:

Aadj(A)=det(A)I.A\operatorname{adj}(A) = \det(A)I.

That identity explains both the inverse formula and the derivative of the determinant.


5. Cofactor Matrix and the Adjugate

Code cell 21

# ======================================================================
# 5.1 Cofactor matrix and adjugate identity
# ======================================================================

def minor(A, i, j):
    mask_rows = [r for r in range(A.shape[0]) if r != i]
    mask_cols = [c for c in range(A.shape[1]) if c != j]
    return np.linalg.det(A[np.ix_(mask_rows, mask_cols)])


def cofactor_matrix(A):
    A = np.array(A, dtype=float)
    n = A.shape[0]
    C = np.zeros_like(A)
    for i in range(n):
        for j in range(n):
            C[i, j] = ((-1) ** (i + j)) * minor(A, i, j)
    return C


A = np.array([[1.0, 2.0, 0.0], [3.0, 1.0, 1.0], [0.0, 2.0, 1.0]])
C = cofactor_matrix(A)
adj = C.T
det_A = np.linalg.det(A)
lhs = A @ adj
rhs = det_A * np.eye(3)
A_inv_from_adj = adj / det_A

print("A =\n", A)
print("\nCofactor matrix =\n", C)
print("\nAdjugate =\n", adj)
print("\nA @ adj(A) =\n", lhs)
print("\ndet(A) I =\n", rhs)
print("\nInverse from adjugate =\n", A_inv_from_adj)
print("\nNumPy inverse =\n", np.linalg.inv(A))

assert np.allclose(lhs, rhs)
assert np.allclose(A_inv_from_adj, np.linalg.inv(A))

6. Higher-Dimensional Geometry: Gram Determinants and Special Transformations

In dimensions beyond 2 and 3, direct geometric pictures are harder to draw. Gram determinants give a clean workaround: they measure the intrinsic volume spanned by vectors even when those vectors live inside a larger ambient space.


6. Geometric Interpretation — Volume and Orientation

Code cell 24

# ======================================================================
# 6.1 Gram determinant volume and orientation examples
# ======================================================================

vectors = np.array([
    [1.0, 0.0, 0.0],
    [1.0, 1.0, 0.0],
    [1.0, 1.0, 1.0],
]).T
G = vectors.T @ vectors
gram_det = np.linalg.det(G)
volume = np.sqrt(max(gram_det, 0.0))

theta = np.pi / 4
R = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]])
F = np.array([[1.0, 0.0], [0.0, -1.0]])
S = 3.0 * np.eye(2)

print("Vector matrix V =\n", vectors)
print("\nGram matrix G = V^T V =\n", G)
print(f"det(G) = {gram_det:.6f}")
print(f"Intrinsic volume = sqrt(det(G)) = {volume:.6f}")

print("\nSpecial transforms in 2D:")
print(f"det(rotation)   = {np.linalg.det(R):.6f}")
print(f"det(reflection) = {np.linalg.det(F):.6f}")
print(f"det(3I)         = {np.linalg.det(S):.6f}")

assert np.isclose(np.linalg.det(R), 1.0)
assert np.isclose(np.linalg.det(F), -1.0)
assert np.isclose(np.linalg.det(S), 9.0)

7. Special Matrix Classes: SPD, Orthogonal, and Vandermonde

Structured matrices make determinant computation and interpretation easier.

  • orthogonal matrices have determinant ±1\pm 1
  • SPD matrices have positive determinant and stable log-det via Cholesky
  • Vandermonde matrices expose determinant as a product of pairwise differences

7. Log-Determinants and Special Matrix Classes

Code cell 27

# ======================================================================
# 7.1 SPD log-det and Vandermonde formula
# ======================================================================

Sigma = np.array([[4.0, 2.0, 0.0], [2.0, 5.0, 1.0], [0.0, 1.0, 3.0]])
L = np.linalg.cholesky(Sigma)
logdet_chol = 2.0 * np.sum(np.log(np.diag(L)))
eigvals = np.linalg.eigvalsh(Sigma)
logdet_eigs = np.sum(np.log(eigvals))

nodes = np.array([1.0, 2.0, 4.0])
V = np.vander(nodes, N=3, increasing=True)
vand_formula = np.prod([nodes[j] - nodes[i] for i in range(len(nodes)) for j in range(i + 1, len(nodes))])

print("Sigma =\n", Sigma)
print("\nCholesky factor L =\n", L)
print(f"\nlog det(Sigma) via Cholesky = {logdet_chol:.6f}")
print(f"log det(Sigma) via eigenvalues = {logdet_eigs:.6f}")

print("\nVandermonde matrix V =\n", V)
print(f"det(V) via NumPy   = {np.linalg.det(V):.6f}")
print(f"det(V) via formula = {vand_formula:.6f}")

assert np.allclose(logdet_chol, logdet_eigs)
assert np.isclose(np.linalg.det(V), vand_formula)

8. Determinant Identities That Matter in Practice

The most useful determinant identities are not just elegant. They reduce expensive determinant recomputation to smaller auxiliary problems.

This is the same structural trick that shows up repeatedly in ML engineering: exploit low-rank or block structure instead of treating every matrix as dense and generic.


8. Determinantal Identities (MDL, Sylvester, Schur)

Code cell 30

# ======================================================================
# 8.1 Matrix determinant lemma, Sylvester, and Schur complement
# ======================================================================

A = np.diag([2.0, 3.0, 4.0])
u = np.array([[1.0], [0.0], [1.0]])
v = np.array([[1.0], [0.0], [1.0]])
lhs_lemma = np.linalg.det(A + u @ v.T)
rhs_lemma = (1.0 + (v.T @ np.linalg.inv(A) @ u).item()) * np.linalg.det(A)

A_rect = np.array([[1.0, 0.0], [0.0, 1.0], [1.0, 1.0]])
B_rect = A_rect.T
lhs_syl = np.linalg.det(np.eye(3) + A_rect @ B_rect)
rhs_syl = np.linalg.det(np.eye(2) + B_rect @ A_rect)

A_block = np.array([[4.0, 2.0], [2.0, 3.0]])
B_block = np.array([[1.0], [0.0]])
D_block = np.array([[2.0]])
M = np.block([[A_block, B_block], [B_block.T, D_block]])
schur = D_block - B_block.T @ np.linalg.inv(A_block) @ B_block
lhs_schur = np.linalg.det(M)
rhs_schur = np.linalg.det(A_block) * np.linalg.det(schur)

print(f"Matrix determinant lemma: lhs = {lhs_lemma:.6f}, rhs = {rhs_lemma:.6f}")
print(f"Sylvester theorem:       lhs = {lhs_syl:.6f}, rhs = {rhs_syl:.6f}")
print(f"Schur complement:        lhs = {lhs_schur:.6f}, rhs = {rhs_schur:.6f}")

assert np.isclose(lhs_lemma, rhs_lemma)
assert np.isclose(lhs_syl, rhs_syl)
assert np.isclose(lhs_schur, rhs_schur)

9. Log-Determinants in Machine Learning

This is the operational core of the chapter for AI.

  • coupling and autoregressive flows are engineered to make Jacobian log-determinants cheap
  • Gaussian log-likelihoods require stable covariance log-det terms
  • Gaussian processes combine linear solves with log-determinants in one objective

9. AI Applications — Log-Det in ML

Code cell 33

# ======================================================================
# 9.1 Coupling-layer Jacobian, Gaussian log-likelihood, and tiny GP term
# ======================================================================

def s1(z1):
    return 0.5 * z1


def t1(z1):
    return z1**2


def s2(z1, z2):
    return 0.25 * (z1 + z2)


def t2(z1, z2):
    return z1 - z2


def coupling_jacobian(z1, z2, z3):
    e1 = np.exp(s1(z1))
    e2 = np.exp(s2(z1, z2))
    J = np.array([
        [1.0, 0.0, 0.0],
        [0.5 * z2 * e1 + 2.0 * z1, e1, 0.0],
        [0.25 * z3 * e2 + 1.0, 0.25 * z3 * e2 - 1.0, e2],
    ])
    return J


z = np.array([0.7, -0.2, 1.1])
J = coupling_jacobian(*z)
logdet_direct = np.log(abs(np.linalg.det(J)))
logdet_triangular = s1(z[0]) + s2(z[0], z[1])

Sigma = np.array([[4.0, 2.0, 0.0], [2.0, 5.0, 1.0], [0.0, 1.0, 3.0]])
mu = np.array([0.0, 0.0, 0.0])
x = np.array([1.0, -1.0, 2.0])
L = np.linalg.cholesky(Sigma)
alpha = np.linalg.solve(L.T, np.linalg.solve(L, x - mu))
gaussian_loglik = -0.5 * (
    len(x) * np.log(2 * np.pi)
    + 2.0 * np.sum(np.log(np.diag(L)))
    + (x - mu) @ alpha
)

K = np.array([[1.0, 0.8, 0.2], [0.8, 1.0, 0.6], [0.2, 0.6, 1.0]]) + 1e-4 * np.eye(3)
y = np.array([1.0, 2.0, 3.0])
Lk = np.linalg.cholesky(K)
alpha_gp = np.linalg.solve(Lk.T, np.linalg.solve(Lk, y))
gp_log_marginal = -0.5 * (
    y @ alpha_gp
    + 2.0 * np.sum(np.log(np.diag(Lk)))
    + len(y) * np.log(2 * np.pi)
)

print("Coupling-layer Jacobian J =\n", J)
print(f"\nlog|det J| direct      = {logdet_direct:.6f}")
print(f"log|det J| triangular = {logdet_triangular:.6f}")

print(f"\nGaussian log-likelihood = {gaussian_loglik:.6f}")
print(f"Tiny GP log marginal    = {gp_log_marginal:.6f}")

assert np.isclose(logdet_direct, logdet_triangular)

10. Diversity and Low-Rank Structure

Two modern ML uses of determinants are especially worth seeing numerically:

  1. DPP-style diversity: determinant rewards sets of vectors that span large volume rather than repeating the same direction.
  2. Low-rank updates: determinant lemmas let us update global quantities cheaply when the matrix change is structured.

10. AI Applications — DPP and LoRA

Code cell 36

# ======================================================================
# 10.1 DPP-style diversity score and LoRA-style low-rank update
# ======================================================================

embeddings = np.array([
    [1.0, 0.0],
    [0.95, 0.05],
    [0.0, 1.0],
])
K = embeddings @ embeddings.T

pair_redundant = K[np.ix_([0, 1], [0, 1])]
pair_diverse = K[np.ix_([0, 2], [0, 2])]

print("Kernel matrix from item embeddings =\n", K)
print(f"\nDet(redundant pair kernel) = {np.linalg.det(pair_redundant):.6f}")
print(f"Det(diverse pair kernel)   = {np.linalg.det(pair_diverse):.6f}")

W = np.diag([2.0, 1.5, 1.2, 0.8])
B = np.array([[1.0, 0.0], [0.5, 1.0], [0.0, 1.0], [1.0, -0.5]])
A = np.array([[0.2, -0.1], [0.1, 0.3], [0.0, 0.2], [-0.2, 0.1]])
update = B @ A.T
lhs = np.linalg.det(W + update)
rhs = np.linalg.det(W) * np.linalg.det(np.eye(2) + A.T @ np.linalg.inv(W) @ B)

print(f"\nLoRA-style update determinant lhs = {lhs:.6f}")
print(f"LoRA-style update determinant rhs = {rhs:.6f}")

assert np.linalg.det(pair_diverse) > np.linalg.det(pair_redundant)
assert np.isclose(lhs, rhs)

What to Notice

  • Determinants start as geometry before they become formulas.
  • Elimination is the practical determinant algorithm; cofactor expansion is mainly a theoretical and hand-computation tool.
  • The determinant is an exact invertibility test, but condition number is the numerical stability test.
  • The characteristic polynomial turns determinant theory into eigenvalue theory.
  • The adjugate identity shows determinants are tightly connected to inverses and derivatives.
  • Log-determinants, not raw determinants, are what modern ML usually optimizes.
  • Structure is everything: triangular, SPD, block, and low-rank matrices make determinant computation tractable.

References used for this notebook

Next step: build the exercise notebook so the computational intuition here turns into retained fluency.