Theory Notebook
Converted from
theory.ipynbfor web reading.
Determinants — From Volume Scaling to Log-Det in AI
A determinant is not just a formula for small matrices. It is the scalar that tells you whether a linear map preserves dimension, how it rescales volume, and why flow models and Gaussian likelihoods need log-determinants.
This notebook is the interactive companion to notes.md. It follows a teaching-first path rather than mirroring the note section-by-section.
| Block | Topic | What you will build |
|---|---|---|
| 1 | Geometry first | signed area, volume scaling, and orientation |
| 2 | Computation | direct formulas, elimination-based determinant computation |
| 3 | Invertibility | determinant, rank, and condition number together |
| 4 | Spectral bridge | characteristic polynomial, eigenvalues, Cayley-Hamilton |
| 5 | Cofactors | adjugate identity and inverse recovery |
| 6 | Geometry in higher dimension | Gram determinants and special transformations |
| 7 | Special classes | SPD, orthogonal, and Vandermonde determinants |
| 8 | Determinant identities | matrix determinant lemma, Sylvester, Schur complement |
| 9 | ML log-det | coupling-layer Jacobians, Gaussian log-likelihood, tiny GP term |
| 10 | Diversity and low rank | DPP-style diversity scores and LoRA-style determinant updates |
This notebook was shaped using the repository skill guide, the local Number Systems and Systems of Equations notebook style, and standard public references such as MIT 18.06, Stanford EE263, Real NVP, Glow, FFJORD, GPyTorch, and the DPP monograph by Kulesza and Taskar.
Code cell 3
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
try:
import seaborn as sns
sns.set_theme(style="whitegrid", palette="colorblind")
HAS_SNS = True
except ImportError:
plt.style.use("seaborn-v0_8-whitegrid")
HAS_SNS = False
mpl.rcParams.update({
"figure.figsize": (10, 6),
"figure.dpi": 120,
"font.size": 13,
"axes.titlesize": 15,
"axes.labelsize": 13,
"xtick.labelsize": 11,
"ytick.labelsize": 11,
"legend.fontsize": 11,
"legend.framealpha": 0.85,
"lines.linewidth": 2.0,
"axes.spines.top": False,
"axes.spines.right": False,
"savefig.bbox": "tight",
"savefig.dpi": 150,
})
np.random.seed(42)
print("Plot setup complete.")
Code cell 4
import numpy as np
import numpy.linalg as la
import scipy.linalg as sla
from scipy import stats
COLORS = {
"primary": "#0077BB",
"secondary": "#EE7733",
"tertiary": "#009988",
"error": "#CC3311",
"neutral": "#555555",
"highlight": "#EE3377",
}
HAS_MPL = True
np.set_printoptions(precision=8, suppress=True)
np.random.seed(42)
def header(title):
print("\n" + "=" * len(title))
print(title)
print("=" * len(title))
def check_true(name, cond):
ok = bool(cond)
print(f"{'PASS' if ok else 'FAIL'} - {name}")
return ok
def check_close(name, got, expected, tol=1e-8):
ok = np.allclose(got, expected, atol=tol, rtol=tol)
print(f"{'PASS' if ok else 'FAIL'} - {name}: got {got}, expected {expected}")
return ok
def check(name, got, expected, tol=1e-8):
return check_close(name, got, expected, tol=tol)
def softmax(z, axis=-1, tau=1.0):
z = np.asarray(z, dtype=float) / float(tau)
z = z - np.max(z, axis=axis, keepdims=True)
e = np.exp(z)
return e / np.sum(e, axis=axis, keepdims=True)
def cosine_similarity(a, b):
a = np.asarray(a, dtype=float); b = np.asarray(b, dtype=float)
return float(a @ b / (la.norm(a) * la.norm(b) + 1e-12))
def numerical_rank(A, tol=1e-10):
return int(np.sum(la.svd(A, compute_uv=False) > tol))
def orthonormal_basis(A, tol=1e-10):
Q, R = la.qr(A)
keep = np.abs(np.diag(R)) > tol
return Q[:, keep]
def null_space(A, tol=1e-10):
U, S, Vt = la.svd(A)
return Vt[S.size:,:].T if S.size < Vt.shape[0] else Vt[S <= tol,:].T
# Compatibility helpers used by the Chapter 02 theory and exercise cells.
def null_space(A, tol=1e-10):
A = np.asarray(A, dtype=float)
U, S, Vt = la.svd(A, full_matrices=True)
rank = int(np.sum(S > tol))
return Vt[rank:].T
svd_null_space = null_space
def gram_schmidt(vectors, tol=1e-10):
A = np.asarray(vectors, dtype=float)
if A.ndim == 1:
A = A.reshape(1, -1)
basis = []
for v in A:
w = v.astype(float).copy()
for q in basis:
w = w - np.dot(w, q) * q
norm = la.norm(w)
if norm > tol:
basis.append(w / norm)
return np.array(basis)
def projection_matrix_from_columns(A, tol=1e-10):
Q = orthonormal_basis(np.asarray(A, dtype=float), tol=tol)
return Q @ Q.T
def random_unit_vectors(n, d):
X = np.random.randn(n, d)
return X / np.maximum(la.norm(X, axis=1, keepdims=True), 1e-12)
def pairwise_distances(X):
X = np.asarray(X, dtype=float)
diff = X[:, None, :] - X[None, :, :]
return la.norm(diff, axis=-1)
def normalize(x, axis=None, tol=1e-12):
x = np.asarray(x, dtype=float)
norm = la.norm(x, axis=axis, keepdims=True)
return x / np.maximum(norm, tol)
def frobenius_inner(A, B):
return float(np.sum(np.asarray(A, dtype=float) * np.asarray(B, dtype=float)))
def outer_sum_product(A, B):
A = np.asarray(A, dtype=float)
B = np.asarray(B, dtype=float)
return sum(np.outer(A[:, k], B[k, :]) for k in range(A.shape[1]))
def softmax_rows(X):
return softmax(X, axis=1)
def col_space(A, tol=1e-10):
return orthonormal_basis(np.asarray(A, dtype=float), tol=tol)
def row_space(A, tol=1e-10):
return orthonormal_basis(np.asarray(A, dtype=float).T, tol=tol).T
def rref(A, tol=1e-10):
R = np.array(A, dtype=float, copy=True)
m, n = R.shape
pivots = []
row = 0
for col in range(n):
pivot = row + int(np.argmax(np.abs(R[row:, col]))) if row < m else row
if row >= m or abs(R[pivot, col]) <= tol:
continue
if pivot != row:
R[[row, pivot]] = R[[pivot, row]]
R[row] = R[row] / R[row, col]
for r in range(m):
if r != row:
R[r] = R[r] - R[r, col] * R[row]
pivots.append(col)
row += 1
if row == m:
break
R[np.abs(R) < tol] = 0.0
return R, pivots
def nullspace_basis(A, tol=1e-10):
A = np.asarray(A, dtype=float)
U, S, Vt = la.svd(A, full_matrices=True)
rank = int(np.sum(S > tol))
return Vt[rank:].T, rank
print("Chapter helper setup complete.")
1. Signed Area, Orientation, and the 2x2 Determinant
The fastest way to understand determinants is to stop thinking of them as mysterious scalar outputs and start thinking of them as signed area / volume scaling factors.
For two vectors , the matrix has determinant
Its absolute value is the area of the parallelogram spanned by and . Its sign tells you orientation:
- positive: orientation preserved
- negative: orientation reversed
- zero: the vectors are dependent, so the parallelogram collapses
1. Intuition — Signed Area and Orientation
Code cell 7
# ======================================================================
# 1.1 Signed area and orientation in 2D
# ======================================================================
def signed_area_2d(u, v):
A = np.column_stack([u, v])
return np.linalg.det(A)
examples = {
'positive orientation': (np.array([3.0, 1.0]), np.array([1.0, 2.0])),
'negative orientation': (np.array([3.0, 1.0]), np.array([1.0, -2.0])),
'collapsed / dependent': (np.array([2.0, 1.0]), np.array([4.0, 2.0])),
}
for name, (u, v) in examples.items():
area = signed_area_2d(u, v)
print(f"{name:>22}: det([u v]) = {area: .6f} | area = {abs(area):.6f}")
u = np.array([3.0, 1.0])
v = np.array([1.0, 2.0])
A = np.column_stack([u, v])
unit_square = np.array([[0.0, 0.0], [1.0, 0.0], [1.0, 1.0], [0.0, 1.0]])
mapped = unit_square @ A.T
print("\nMatrix A = [u v]:\n", A)
print("Mapped unit-square corners:\n", mapped)
print("Interpretation: |det(A)| is the area scaling factor from the unit square to the image parallelogram.")
2. From Formula to Computation
For tiny matrices, explicit formulas are useful. For real computation, elimination wins.
Three levels matter:
- closed form for and
- cofactor expansion for theory and hand work on small sparse matrices
- row reduction / LU-style elimination for anything practical
2. Computing Determinants — Small Matrices
Code cell 10
# ======================================================================
# 2.1 Small-matrix formulas
# ======================================================================
def det_2x2(A):
A = np.array(A, dtype=float)
return A[0, 0] * A[1, 1] - A[0, 1] * A[1, 0]
def det_3x3_sarrus(A):
A = np.array(A, dtype=float)
a, b, c = A[0]
d, e, f = A[1]
g, h, i = A[2]
return a * e * i + b * f * g + c * d * h - c * e * g - b * d * i - a * f * h
A2 = np.array([[5.0, 3.0], [2.0, 4.0]])
A3 = np.array([[1.0, 2.0, 3.0], [0.0, 4.0, 1.0], [2.0, 1.0, 0.0]])
print("2x2 formula vs NumPy:")
print(det_2x2(A2), np.linalg.det(A2))
print("\n3x3 Sarrus vs NumPy:")
print(det_3x3_sarrus(A3), np.linalg.det(A3))
assert np.isclose(det_2x2(A2), np.linalg.det(A2))
assert np.isclose(det_3x3_sarrus(A3), np.linalg.det(A3))
2. Computing Determinants — Via Elimination
Code cell 12
# ======================================================================
# 2.2 Determinant via elimination (practical path)
# ======================================================================
def det_via_elimination(A):
A = np.array(A, dtype=float).copy()
n = A.shape[0]
sign = 1.0
for k in range(n):
pivot = k + np.argmax(np.abs(A[k:, k]))
if np.isclose(A[pivot, k], 0.0):
return 0.0, A
if pivot != k:
A[[k, pivot]] = A[[pivot, k]]
sign *= -1.0
for i in range(k + 1, n):
factor = A[i, k] / A[k, k]
A[i, k:] -= factor * A[k, k:]
return sign * np.prod(np.diag(A)), A
A = np.array([[1.0, 2.0, 0.0, 1.0], [2.0, 1.0, 1.0, 0.0], [0.0, 3.0, 2.0, 1.0], [1.0, 0.0, 1.0, 2.0]])
det_elim, U = det_via_elimination(A)
det_np = np.linalg.det(A)
print("Upper-triangular form produced by elimination:\n", U)
print(f"\nDeterminant via elimination: {det_elim:.6f}")
print(f"Determinant via NumPy: {det_np:.6f}")
print("\nKey idea: after elimination, determinant is the signed product of pivots.")
assert np.isclose(det_elim, det_np)
3. Determinant, Rank, Invertibility, and Conditioning
A determinant tells you whether a square matrix is invertible. It does not tell you whether that matrix is numerically well-conditioned.
This distinction matters in ML because tiny determinants can arise simply from scale, while bad conditioning comes from a large singular-value ratio.
3. Determinant, Rank, and Conditioning
Code cell 15
# ======================================================================
# 3.1 det, rank, and condition number are related but different
# ======================================================================
matrices = {
'invertible': np.array([[2.0, 1.0], [1.0, 3.0]]),
'singular': np.array([[1.0, 2.0], [2.0, 4.0]]),
'tiny determinant, well-conditioned': 0.1 * np.eye(4),
'ill-conditioned': np.array([[1.0, 1.0], [1.0, 1.000001]]),
}
for name, M in matrices.items():
det = np.linalg.det(M)
rank = np.linalg.matrix_rank(M)
try:
cond = np.linalg.cond(M)
except np.linalg.LinAlgError:
cond = np.inf
print(f"{name:>32}: det = {det: .6e}, rank = {rank}, cond = {cond: .6e}")
print("\nTakeaway:")
print("- det = 0 is the exact symbolic singularity test for square matrices")
print("- condition number diagnoses numerical fragility")
print("- a small determinant alone does not imply bad conditioning")
4. The Spectral Bridge: Characteristic Polynomial and Cayley-Hamilton
Determinant theory becomes spectral theory when we ask:
The roots of that polynomial are the eigenvalues. This is the transition point from scalar matrix summaries to full decomposition theory.
4. Characteristic Polynomial and Eigenvalues
Code cell 18
# ======================================================================
# 4.1 Characteristic polynomial, eigenvalues, and Cayley-Hamilton
# ======================================================================
A = np.array([[4.0, 2.0], [1.0, 3.0]])
trace_A = np.trace(A)
det_A = np.linalg.det(A)
def p(lam):
return lam**2 - trace_A * lam + det_A
eigvals, eigvecs = np.linalg.eig(A)
resolvent_point = 6.0 # pick a lambda outside the spectrum so (lambda I - A) is invertible
resolvent = np.linalg.inv(resolvent_point * np.eye(2) - A)
print("A =\n", A)
print(f"trace(A) = {trace_A:.6f}")
print(f"det(A) = {det_A:.6f}")
print("eigenvalues:", eigvals)
print("check p(lambda_i):", [p(lam) for lam in eigvals])
cayley_hamilton = A @ A - trace_A * A + det_A * np.eye(2)
print("\nCayley-Hamilton residual A^2 - tr(A)A + det(A)I =\n", cayley_hamilton)
print(f"\nResolvent at lambda = {resolvent_point:g}:")
print(resolvent)
assert np.allclose(cayley_hamilton, np.zeros((2, 2)))
assert np.allclose([p(lam) for lam in eigvals], [0.0, 0.0])
5. Cofactors, Adjugate, and Explicit Inverses
Cofactors are usually introduced as a hand-computation device. But they also encode a deep identity:
That identity explains both the inverse formula and the derivative of the determinant.
5. Cofactor Matrix and the Adjugate
Code cell 21
# ======================================================================
# 5.1 Cofactor matrix and adjugate identity
# ======================================================================
def minor(A, i, j):
mask_rows = [r for r in range(A.shape[0]) if r != i]
mask_cols = [c for c in range(A.shape[1]) if c != j]
return np.linalg.det(A[np.ix_(mask_rows, mask_cols)])
def cofactor_matrix(A):
A = np.array(A, dtype=float)
n = A.shape[0]
C = np.zeros_like(A)
for i in range(n):
for j in range(n):
C[i, j] = ((-1) ** (i + j)) * minor(A, i, j)
return C
A = np.array([[1.0, 2.0, 0.0], [3.0, 1.0, 1.0], [0.0, 2.0, 1.0]])
C = cofactor_matrix(A)
adj = C.T
det_A = np.linalg.det(A)
lhs = A @ adj
rhs = det_A * np.eye(3)
A_inv_from_adj = adj / det_A
print("A =\n", A)
print("\nCofactor matrix =\n", C)
print("\nAdjugate =\n", adj)
print("\nA @ adj(A) =\n", lhs)
print("\ndet(A) I =\n", rhs)
print("\nInverse from adjugate =\n", A_inv_from_adj)
print("\nNumPy inverse =\n", np.linalg.inv(A))
assert np.allclose(lhs, rhs)
assert np.allclose(A_inv_from_adj, np.linalg.inv(A))
6. Higher-Dimensional Geometry: Gram Determinants and Special Transformations
In dimensions beyond 2 and 3, direct geometric pictures are harder to draw. Gram determinants give a clean workaround: they measure the intrinsic volume spanned by vectors even when those vectors live inside a larger ambient space.
6. Geometric Interpretation — Volume and Orientation
Code cell 24
# ======================================================================
# 6.1 Gram determinant volume and orientation examples
# ======================================================================
vectors = np.array([
[1.0, 0.0, 0.0],
[1.0, 1.0, 0.0],
[1.0, 1.0, 1.0],
]).T
G = vectors.T @ vectors
gram_det = np.linalg.det(G)
volume = np.sqrt(max(gram_det, 0.0))
theta = np.pi / 4
R = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]])
F = np.array([[1.0, 0.0], [0.0, -1.0]])
S = 3.0 * np.eye(2)
print("Vector matrix V =\n", vectors)
print("\nGram matrix G = V^T V =\n", G)
print(f"det(G) = {gram_det:.6f}")
print(f"Intrinsic volume = sqrt(det(G)) = {volume:.6f}")
print("\nSpecial transforms in 2D:")
print(f"det(rotation) = {np.linalg.det(R):.6f}")
print(f"det(reflection) = {np.linalg.det(F):.6f}")
print(f"det(3I) = {np.linalg.det(S):.6f}")
assert np.isclose(np.linalg.det(R), 1.0)
assert np.isclose(np.linalg.det(F), -1.0)
assert np.isclose(np.linalg.det(S), 9.0)
7. Special Matrix Classes: SPD, Orthogonal, and Vandermonde
Structured matrices make determinant computation and interpretation easier.
- orthogonal matrices have determinant
- SPD matrices have positive determinant and stable log-det via Cholesky
- Vandermonde matrices expose determinant as a product of pairwise differences
7. Log-Determinants and Special Matrix Classes
Code cell 27
# ======================================================================
# 7.1 SPD log-det and Vandermonde formula
# ======================================================================
Sigma = np.array([[4.0, 2.0, 0.0], [2.0, 5.0, 1.0], [0.0, 1.0, 3.0]])
L = np.linalg.cholesky(Sigma)
logdet_chol = 2.0 * np.sum(np.log(np.diag(L)))
eigvals = np.linalg.eigvalsh(Sigma)
logdet_eigs = np.sum(np.log(eigvals))
nodes = np.array([1.0, 2.0, 4.0])
V = np.vander(nodes, N=3, increasing=True)
vand_formula = np.prod([nodes[j] - nodes[i] for i in range(len(nodes)) for j in range(i + 1, len(nodes))])
print("Sigma =\n", Sigma)
print("\nCholesky factor L =\n", L)
print(f"\nlog det(Sigma) via Cholesky = {logdet_chol:.6f}")
print(f"log det(Sigma) via eigenvalues = {logdet_eigs:.6f}")
print("\nVandermonde matrix V =\n", V)
print(f"det(V) via NumPy = {np.linalg.det(V):.6f}")
print(f"det(V) via formula = {vand_formula:.6f}")
assert np.allclose(logdet_chol, logdet_eigs)
assert np.isclose(np.linalg.det(V), vand_formula)
8. Determinant Identities That Matter in Practice
The most useful determinant identities are not just elegant. They reduce expensive determinant recomputation to smaller auxiliary problems.
This is the same structural trick that shows up repeatedly in ML engineering: exploit low-rank or block structure instead of treating every matrix as dense and generic.
8. Determinantal Identities (MDL, Sylvester, Schur)
Code cell 30
# ======================================================================
# 8.1 Matrix determinant lemma, Sylvester, and Schur complement
# ======================================================================
A = np.diag([2.0, 3.0, 4.0])
u = np.array([[1.0], [0.0], [1.0]])
v = np.array([[1.0], [0.0], [1.0]])
lhs_lemma = np.linalg.det(A + u @ v.T)
rhs_lemma = (1.0 + (v.T @ np.linalg.inv(A) @ u).item()) * np.linalg.det(A)
A_rect = np.array([[1.0, 0.0], [0.0, 1.0], [1.0, 1.0]])
B_rect = A_rect.T
lhs_syl = np.linalg.det(np.eye(3) + A_rect @ B_rect)
rhs_syl = np.linalg.det(np.eye(2) + B_rect @ A_rect)
A_block = np.array([[4.0, 2.0], [2.0, 3.0]])
B_block = np.array([[1.0], [0.0]])
D_block = np.array([[2.0]])
M = np.block([[A_block, B_block], [B_block.T, D_block]])
schur = D_block - B_block.T @ np.linalg.inv(A_block) @ B_block
lhs_schur = np.linalg.det(M)
rhs_schur = np.linalg.det(A_block) * np.linalg.det(schur)
print(f"Matrix determinant lemma: lhs = {lhs_lemma:.6f}, rhs = {rhs_lemma:.6f}")
print(f"Sylvester theorem: lhs = {lhs_syl:.6f}, rhs = {rhs_syl:.6f}")
print(f"Schur complement: lhs = {lhs_schur:.6f}, rhs = {rhs_schur:.6f}")
assert np.isclose(lhs_lemma, rhs_lemma)
assert np.isclose(lhs_syl, rhs_syl)
assert np.isclose(lhs_schur, rhs_schur)
9. Log-Determinants in Machine Learning
This is the operational core of the chapter for AI.
- coupling and autoregressive flows are engineered to make Jacobian log-determinants cheap
- Gaussian log-likelihoods require stable covariance log-det terms
- Gaussian processes combine linear solves with log-determinants in one objective
9. AI Applications — Log-Det in ML
Code cell 33
# ======================================================================
# 9.1 Coupling-layer Jacobian, Gaussian log-likelihood, and tiny GP term
# ======================================================================
def s1(z1):
return 0.5 * z1
def t1(z1):
return z1**2
def s2(z1, z2):
return 0.25 * (z1 + z2)
def t2(z1, z2):
return z1 - z2
def coupling_jacobian(z1, z2, z3):
e1 = np.exp(s1(z1))
e2 = np.exp(s2(z1, z2))
J = np.array([
[1.0, 0.0, 0.0],
[0.5 * z2 * e1 + 2.0 * z1, e1, 0.0],
[0.25 * z3 * e2 + 1.0, 0.25 * z3 * e2 - 1.0, e2],
])
return J
z = np.array([0.7, -0.2, 1.1])
J = coupling_jacobian(*z)
logdet_direct = np.log(abs(np.linalg.det(J)))
logdet_triangular = s1(z[0]) + s2(z[0], z[1])
Sigma = np.array([[4.0, 2.0, 0.0], [2.0, 5.0, 1.0], [0.0, 1.0, 3.0]])
mu = np.array([0.0, 0.0, 0.0])
x = np.array([1.0, -1.0, 2.0])
L = np.linalg.cholesky(Sigma)
alpha = np.linalg.solve(L.T, np.linalg.solve(L, x - mu))
gaussian_loglik = -0.5 * (
len(x) * np.log(2 * np.pi)
+ 2.0 * np.sum(np.log(np.diag(L)))
+ (x - mu) @ alpha
)
K = np.array([[1.0, 0.8, 0.2], [0.8, 1.0, 0.6], [0.2, 0.6, 1.0]]) + 1e-4 * np.eye(3)
y = np.array([1.0, 2.0, 3.0])
Lk = np.linalg.cholesky(K)
alpha_gp = np.linalg.solve(Lk.T, np.linalg.solve(Lk, y))
gp_log_marginal = -0.5 * (
y @ alpha_gp
+ 2.0 * np.sum(np.log(np.diag(Lk)))
+ len(y) * np.log(2 * np.pi)
)
print("Coupling-layer Jacobian J =\n", J)
print(f"\nlog|det J| direct = {logdet_direct:.6f}")
print(f"log|det J| triangular = {logdet_triangular:.6f}")
print(f"\nGaussian log-likelihood = {gaussian_loglik:.6f}")
print(f"Tiny GP log marginal = {gp_log_marginal:.6f}")
assert np.isclose(logdet_direct, logdet_triangular)
10. Diversity and Low-Rank Structure
Two modern ML uses of determinants are especially worth seeing numerically:
- DPP-style diversity: determinant rewards sets of vectors that span large volume rather than repeating the same direction.
- Low-rank updates: determinant lemmas let us update global quantities cheaply when the matrix change is structured.
10. AI Applications — DPP and LoRA
Code cell 36
# ======================================================================
# 10.1 DPP-style diversity score and LoRA-style low-rank update
# ======================================================================
embeddings = np.array([
[1.0, 0.0],
[0.95, 0.05],
[0.0, 1.0],
])
K = embeddings @ embeddings.T
pair_redundant = K[np.ix_([0, 1], [0, 1])]
pair_diverse = K[np.ix_([0, 2], [0, 2])]
print("Kernel matrix from item embeddings =\n", K)
print(f"\nDet(redundant pair kernel) = {np.linalg.det(pair_redundant):.6f}")
print(f"Det(diverse pair kernel) = {np.linalg.det(pair_diverse):.6f}")
W = np.diag([2.0, 1.5, 1.2, 0.8])
B = np.array([[1.0, 0.0], [0.5, 1.0], [0.0, 1.0], [1.0, -0.5]])
A = np.array([[0.2, -0.1], [0.1, 0.3], [0.0, 0.2], [-0.2, 0.1]])
update = B @ A.T
lhs = np.linalg.det(W + update)
rhs = np.linalg.det(W) * np.linalg.det(np.eye(2) + A.T @ np.linalg.inv(W) @ B)
print(f"\nLoRA-style update determinant lhs = {lhs:.6f}")
print(f"LoRA-style update determinant rhs = {rhs:.6f}")
assert np.linalg.det(pair_diverse) > np.linalg.det(pair_redundant)
assert np.isclose(lhs, rhs)
What to Notice
- Determinants start as geometry before they become formulas.
- Elimination is the practical determinant algorithm; cofactor expansion is mainly a theoretical and hand-computation tool.
- The determinant is an exact invertibility test, but condition number is the numerical stability test.
- The characteristic polynomial turns determinant theory into eigenvalue theory.
- The adjugate identity shows determinants are tightly connected to inverses and derivatives.
- Log-determinants, not raw determinants, are what modern ML usually optimizes.
- Structure is everything: triangular, SPD, block, and low-rank matrices make determinant computation tractable.
References used for this notebook
- notes.md in this chapter
- MIT 18.06 Linear Algebra
- Stanford EE263
- Rezende and Mohamed (2015), Variational Inference with Normalizing Flows
- Dinh, Sohl-Dickstein, and Bengio (2017), Real NVP
- Kingma and Dhariwal (2018), Glow
- Grathwohl et al. (2018), FFJORD
- Gardner et al. (2018), GPyTorch
- Kulesza and Taskar (2012), Determinantal Point Processes for Machine Learning
Next step: build the exercise notebook so the computational intuition here turns into retained fluency.