Theory NotebookMath for LLMs

Probabilistic Models

Math for Specific Models / Probabilistic Models

Run notebook
Theory Notebook

Theory Notebook

Converted from theory.ipynb for web reading.

Probabilistic Models: Theory Notebook

This notebook makes probabilistic modeling executable: likelihoods, Bayes updates, naive Bayes, mixture responsibilities, EM, HMM forward recursion, Monte Carlo estimates, and calibration.

Code cell 2

import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

try:
    import seaborn as sns
    sns.set_theme(style="whitegrid", palette="colorblind")
    HAS_SNS = True
except ImportError:
    plt.style.use("seaborn-v0_8-whitegrid")
    HAS_SNS = False

mpl.rcParams.update({
    "figure.figsize":    (10, 6),
    "figure.dpi":         120,
    "font.size":           13,
    "axes.titlesize":      15,
    "axes.labelsize":      13,
    "xtick.labelsize":     11,
    "ytick.labelsize":     11,
    "legend.fontsize":     11,
    "legend.framealpha":   0.85,
    "lines.linewidth":      2.0,
    "axes.spines.top":     False,
    "axes.spines.right":   False,
    "savefig.bbox":       "tight",
    "savefig.dpi":         150,
})
np.random.seed(42)
print("Plot setup complete.")

1. Normalize probabilities

Code cell 4

scores = np.array([2.0, 1.0, 0.5])
p = scores / scores.sum()
print("p:", p)
print("sum:", p.sum())

2. Bernoulli MLE

Code cell 6

y = np.array([1, 0, 1, 1, 1, 0, 1])
p_mle = y.mean()
loglik = np.sum(y * np.log(p_mle) + (1 - y) * np.log(1 - p_mle))
print("MLE p:", p_mle)
print("log likelihood:", loglik)

3. Beta-Bernoulli posterior

Code cell 8

alpha, beta = 2, 2
heads, tails = 7, 3
post_alpha = alpha + heads
post_beta = beta + tails
posterior_mean = post_alpha / (post_alpha + post_beta)
print("posterior Beta:", (post_alpha, post_beta))
print("posterior mean:", posterior_mean)

4. Naive Bayes classification

Code cell 10

class_prior = np.array([0.6, 0.4])
feature_probs = np.array([
    [0.8, 0.3, 0.5],
    [0.2, 0.7, 0.6],
])
x = np.array([1, 0, 1])
log_scores = np.log(class_prior)
for c in range(2):
    log_scores[c] += np.sum(x*np.log(feature_probs[c]) + (1-x)*np.log(1-feature_probs[c]))
posterior = np.exp(log_scores - log_scores.max())
posterior = posterior / posterior.sum()
print("posterior class probabilities:", posterior)

5. Gaussian mixture responsibilities

Code cell 12

def normal_pdf(x, mean, std):
    return np.exp(-0.5*((x-mean)/std)**2) / (std*np.sqrt(2*np.pi))

x = np.array([-2.0, -1.0, 0.2, 2.0, 3.0])
pi = np.array([0.45, 0.55])
mu = np.array([-1.0, 2.0])
std = np.array([0.8, 0.9])
joint = np.stack([pi[k] * normal_pdf(x, mu[k], std[k]) for k in range(2)], axis=1)
gamma = joint / joint.sum(axis=1, keepdims=True)
print("responsibilities:\n", np.round(gamma, 3))

6. One EM mean update

Code cell 14

Nk = gamma.sum(axis=0)
mu_new = (gamma * x[:, None]).sum(axis=0) / Nk
print("old means:", mu)
print("new means:", np.round(mu_new, 3))

7. HMM forward recursion

Code cell 16

start = np.array([0.6, 0.4])
trans = np.array([[0.7, 0.3], [0.2, 0.8]])
emit = np.array([[0.9, 0.1], [0.3, 0.7]])
obs = [0, 1, 1]
alpha = start * emit[:, obs[0]]
print("t=0", np.round(alpha, 4))
for o in obs[1:]:
    alpha = (alpha @ trans) * emit[:, o]
    print("next", np.round(alpha, 4), "normed", np.round(alpha/alpha.sum(), 4))
print("sequence likelihood:", alpha.sum())

8. Viterbi-style max recursion

Code cell 18

delta = start * emit[:, obs[0]]
for o in obs[1:]:
    delta = np.max(delta[:, None] * trans, axis=0) * emit[:, o]
    print("delta:", np.round(delta, 5))
print("best path probability proxy:", delta.max())

9. Monte Carlo expectation

Code cell 20

rng = np.random.default_rng(0)
samples = rng.normal(loc=2.0, scale=3.0, size=20000)
estimate = np.mean(samples**2)
truth = 3.0**2 + 2.0**2
print("MC estimate E[X^2]:", estimate)
print("truth:", truth)

10. ELBO identity intuition

Code cell 22

p_xz = np.array([0.2, 0.3, 0.1])
q = np.array([0.3, 0.4, 0.3])
log_px = np.log(p_xz.sum())
elbo = np.sum(q * (np.log(p_xz + 1e-12) - np.log(q + 1e-12)))
kl_gap = log_px - elbo
print("log p(x):", log_px)
print("ELBO:", elbo)
print("gap:", kl_gap)

11. Calibration bin check

Code cell 24

conf = np.array([0.9, 0.8, 0.6, 0.4, 0.3])
correct = np.array([1, 0, 1, 0, 0])
bins = [(0,0.5),(0.5,0.75),(0.75,1.0)]
for lo, hi in bins:
    m = (conf > lo) & (conf <= hi)
    if m.any():
        print((lo, hi), "acc", correct[m].mean(), "conf", conf[m].mean())

12. Posterior predictive simulation

Code cell 26

rng = np.random.default_rng(1)
theta_samples = rng.beta(post_alpha, post_beta, size=5000)
future_heads = rng.binomial(n=10, p=theta_samples)
print("posterior predictive mean heads in 10 flips:", future_heads.mean())
print("90% interval:", np.percentile(future_heads, [5, 95]))

13. Final checklist

Code cell 28

checks = [
    "probabilities normalize and log probabilities are finite",
    "likelihood and posterior are not confused",
    "latent responsibilities sum to one",
    "held-out log likelihood is tracked",
    "posterior predictive simulations resemble real data",
    "calibration is evaluated when probabilities drive decisions",
]
for i, check in enumerate(checks, 1):
    print(f"{i}. {check}")