Theory Notebook
Theory Notebook
Converted from
theory.ipynbfor web reading.
Probabilistic Models: Theory Notebook
This notebook makes probabilistic modeling executable: likelihoods, Bayes updates, naive Bayes, mixture responsibilities, EM, HMM forward recursion, Monte Carlo estimates, and calibration.
Code cell 2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
try:
import seaborn as sns
sns.set_theme(style="whitegrid", palette="colorblind")
HAS_SNS = True
except ImportError:
plt.style.use("seaborn-v0_8-whitegrid")
HAS_SNS = False
mpl.rcParams.update({
"figure.figsize": (10, 6),
"figure.dpi": 120,
"font.size": 13,
"axes.titlesize": 15,
"axes.labelsize": 13,
"xtick.labelsize": 11,
"ytick.labelsize": 11,
"legend.fontsize": 11,
"legend.framealpha": 0.85,
"lines.linewidth": 2.0,
"axes.spines.top": False,
"axes.spines.right": False,
"savefig.bbox": "tight",
"savefig.dpi": 150,
})
np.random.seed(42)
print("Plot setup complete.")
1. Normalize probabilities
Code cell 4
scores = np.array([2.0, 1.0, 0.5])
p = scores / scores.sum()
print("p:", p)
print("sum:", p.sum())
2. Bernoulli MLE
Code cell 6
y = np.array([1, 0, 1, 1, 1, 0, 1])
p_mle = y.mean()
loglik = np.sum(y * np.log(p_mle) + (1 - y) * np.log(1 - p_mle))
print("MLE p:", p_mle)
print("log likelihood:", loglik)
3. Beta-Bernoulli posterior
Code cell 8
alpha, beta = 2, 2
heads, tails = 7, 3
post_alpha = alpha + heads
post_beta = beta + tails
posterior_mean = post_alpha / (post_alpha + post_beta)
print("posterior Beta:", (post_alpha, post_beta))
print("posterior mean:", posterior_mean)
4. Naive Bayes classification
Code cell 10
class_prior = np.array([0.6, 0.4])
feature_probs = np.array([
[0.8, 0.3, 0.5],
[0.2, 0.7, 0.6],
])
x = np.array([1, 0, 1])
log_scores = np.log(class_prior)
for c in range(2):
log_scores[c] += np.sum(x*np.log(feature_probs[c]) + (1-x)*np.log(1-feature_probs[c]))
posterior = np.exp(log_scores - log_scores.max())
posterior = posterior / posterior.sum()
print("posterior class probabilities:", posterior)
5. Gaussian mixture responsibilities
Code cell 12
def normal_pdf(x, mean, std):
return np.exp(-0.5*((x-mean)/std)**2) / (std*np.sqrt(2*np.pi))
x = np.array([-2.0, -1.0, 0.2, 2.0, 3.0])
pi = np.array([0.45, 0.55])
mu = np.array([-1.0, 2.0])
std = np.array([0.8, 0.9])
joint = np.stack([pi[k] * normal_pdf(x, mu[k], std[k]) for k in range(2)], axis=1)
gamma = joint / joint.sum(axis=1, keepdims=True)
print("responsibilities:\n", np.round(gamma, 3))
6. One EM mean update
Code cell 14
Nk = gamma.sum(axis=0)
mu_new = (gamma * x[:, None]).sum(axis=0) / Nk
print("old means:", mu)
print("new means:", np.round(mu_new, 3))
7. HMM forward recursion
Code cell 16
start = np.array([0.6, 0.4])
trans = np.array([[0.7, 0.3], [0.2, 0.8]])
emit = np.array([[0.9, 0.1], [0.3, 0.7]])
obs = [0, 1, 1]
alpha = start * emit[:, obs[0]]
print("t=0", np.round(alpha, 4))
for o in obs[1:]:
alpha = (alpha @ trans) * emit[:, o]
print("next", np.round(alpha, 4), "normed", np.round(alpha/alpha.sum(), 4))
print("sequence likelihood:", alpha.sum())
8. Viterbi-style max recursion
Code cell 18
delta = start * emit[:, obs[0]]
for o in obs[1:]:
delta = np.max(delta[:, None] * trans, axis=0) * emit[:, o]
print("delta:", np.round(delta, 5))
print("best path probability proxy:", delta.max())
9. Monte Carlo expectation
Code cell 20
rng = np.random.default_rng(0)
samples = rng.normal(loc=2.0, scale=3.0, size=20000)
estimate = np.mean(samples**2)
truth = 3.0**2 + 2.0**2
print("MC estimate E[X^2]:", estimate)
print("truth:", truth)
10. ELBO identity intuition
Code cell 22
p_xz = np.array([0.2, 0.3, 0.1])
q = np.array([0.3, 0.4, 0.3])
log_px = np.log(p_xz.sum())
elbo = np.sum(q * (np.log(p_xz + 1e-12) - np.log(q + 1e-12)))
kl_gap = log_px - elbo
print("log p(x):", log_px)
print("ELBO:", elbo)
print("gap:", kl_gap)
11. Calibration bin check
Code cell 24
conf = np.array([0.9, 0.8, 0.6, 0.4, 0.3])
correct = np.array([1, 0, 1, 0, 0])
bins = [(0,0.5),(0.5,0.75),(0.75,1.0)]
for lo, hi in bins:
m = (conf > lo) & (conf <= hi)
if m.any():
print((lo, hi), "acc", correct[m].mean(), "conf", conf[m].mean())
12. Posterior predictive simulation
Code cell 26
rng = np.random.default_rng(1)
theta_samples = rng.beta(post_alpha, post_beta, size=5000)
future_heads = rng.binomial(n=10, p=theta_samples)
print("posterior predictive mean heads in 10 flips:", future_heads.mean())
print("90% interval:", np.percentile(future_heads, [5, 95]))
13. Final checklist
Code cell 28
checks = [
"probabilities normalize and log probabilities are finite",
"likelihood and posterior are not confused",
"latent responsibilities sum to one",
"held-out log likelihood is tracked",
"posterior predictive simulations resemble real data",
"calibration is evaluated when probabilities drive decisions",
]
for i, check in enumerate(checks, 1):
print(f"{i}. {check}")