Theory Notebook
Theory Notebook
Converted from
theory.ipynbfor web reading.
RAG Math and Retrieval: Theory Notebook
This notebook makes RAG math concrete: similarity scores, sparse retrieval intuition, contrastive loss, recall metrics, MMR, reranking, and context packing.
Code cell 2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
try:
import seaborn as sns
sns.set_theme(style="whitegrid", palette="colorblind")
HAS_SNS = True
except ImportError:
plt.style.use("seaborn-v0_8-whitegrid")
HAS_SNS = False
mpl.rcParams.update({
"figure.figsize": (10, 6),
"figure.dpi": 120,
"font.size": 13,
"axes.titlesize": 15,
"axes.labelsize": 13,
"xtick.labelsize": 11,
"ytick.labelsize": 11,
"legend.fontsize": 11,
"legend.framealpha": 0.85,
"lines.linewidth": 2.0,
"axes.spines.top": False,
"axes.spines.right": False,
"savefig.bbox": "tight",
"savefig.dpi": 150,
})
np.random.seed(42)
print("Plot setup complete.")
1. Cosine retrieval
Code cell 4
def normalize(X):
X = np.asarray(X, dtype=float)
return X / np.linalg.norm(X, axis=-1, keepdims=True)
query = normalize(np.array([[1.0, 1.0, 0.0]]))[0]
docs = normalize(np.array([
[1.0, 0.9, 0.1],
[-1.0, 0.0, 0.0],
[0.2, 0.1, 1.0],
[0.8, 0.7, 0.0],
]))
scores = docs @ query
print("cosine scores:", np.round(scores, 3))
print("top-2 docs:", np.argsort(scores)[-2:][::-1])
2. Norm effects
Code cell 6
q = np.array([1.0, 0.0])
d1 = np.array([1.0, 0.0])
d2 = np.array([5.0, 1.0])
dot_scores = np.array([q @ d1, q @ d2])
cos_scores = np.array([q @ d1 / (np.linalg.norm(q)*np.linalg.norm(d1)), q @ d2 / (np.linalg.norm(q)*np.linalg.norm(d2))])
print("dot scores:", dot_scores)
print("cosine scores:", np.round(cos_scores, 3))
3. BM25-style lexical intuition
Code cell 8
query_terms = ["vector", "search"]
docs_terms = [
["vector", "search", "index", "vector"],
["language", "model", "generation"],
["search", "engine", "retrieval", "search"],
]
idf = {"vector": 1.3, "search": 0.9}
k1 = 1.2
scores = []
for doc in docs_terms:
score = 0.0
for term in query_terms:
tf = doc.count(term)
score += idf[term] * (tf * (k1 + 1)) / (tf + k1) if tf else 0.0
scores.append(score)
print("BM25-style scores:", np.round(scores, 3))
4. Dense contrastive loss
Code cell 10
query = normalize(np.array([[1.0, 0.5, 0.0]]))[0]
doc_mat = normalize(np.array([
[1.0, 0.4, 0.0],
[0.0, 1.0, 0.2],
[-1.0, 0.0, 0.1],
]))
scores = doc_mat @ query
exp_scores = np.exp(scores)
loss = -np.log(exp_scores[0] / exp_scores.sum())
print("scores:", np.round(scores, 3))
print("contrastive loss:", loss)
5. Recall@k and MRR
Code cell 12
ranked = ["d3", "d7", "d2", "d4", "d1"]
relevant = {"d2", "d5"}
for k in [1, 3, 5]:
recall = len(set(ranked[:k]) & relevant) / len(relevant)
print(f"Recall@{k}:", recall)
rr = 0.0
for rank, doc in enumerate(ranked, 1):
if doc in relevant:
rr = 1 / rank
break
print("MRR for this query:", rr)
6. MMR diversity selection
Code cell 14
rel = np.array([0.95, 0.90, 0.88, 0.70])
sim = np.array([
[1.0, 0.9, 0.2, 0.1],
[0.9, 1.0, 0.3, 0.2],
[0.2, 0.3, 1.0, 0.8],
[0.1, 0.2, 0.8, 1.0],
])
lam = 0.7
selected = []
candidates = set(range(len(rel)))
while len(selected) < 3:
best, best_score = None, -1e9
for c in candidates:
diversity_penalty = 0 if not selected else max(sim[c, s] for s in selected)
score = lam * rel[c] - (1 - lam) * diversity_penalty
if score > best_score:
best, best_score = c, score
selected.append(best)
candidates.remove(best)
print("MMR selected docs:", selected)
7. Context packing
Code cell 16
chunks = [
{"id": "a", "score": 0.95, "tokens": 120},
{"id": "b", "score": 0.85, "tokens": 300},
{"id": "c", "score": 0.80, "tokens": 180},
{"id": "d", "score": 0.70, "tokens": 90},
]
budget = 400
packed, used = [], 0
for c in sorted(chunks, key=lambda x: x["score"] / x["tokens"], reverse=True):
if used + c["tokens"] <= budget:
packed.append(c["id"])
used += c["tokens"]
print("packed:", packed, "tokens used:", used)
8. Reciprocal rank fusion
Code cell 18
rank_dense = ["a", "b", "c", "d"]
rank_sparse = ["c", "a", "e", "b"]
k = 60
scores = {}
for ranking in [rank_dense, rank_sparse]:
for r, doc in enumerate(ranking, 1):
scores[doc] = scores.get(doc, 0) + 1 / (k + r)
print("RRF scores:", {d: round(s, 4) for d, s in sorted(scores.items(), key=lambda x: -x[1])})
9. ANN recall toy
Code cell 20
exact_top = {"d1", "d2", "d3", "d4", "d5"}
ann_top = {"d1", "d2", "d4", "d8", "d9"}
recall = len(exact_top & ann_top) / len(exact_top)
print("ANN recall against exact top-5:", recall)
10. Failure decomposition
Code cell 22
cases = [
{"retrieved": False, "used": False, "correct": False},
{"retrieved": True, "used": False, "correct": False},
{"retrieved": True, "used": True, "correct": True},
]
for i, c in enumerate(cases, 1):
if not c["retrieved"]:
reason = "retrieval miss"
elif not c["used"]:
reason = "generation ignored evidence"
elif not c["correct"]:
reason = "generation error"
else:
reason = "success"
print(i, reason)
11. RAG trace checklist
Code cell 24
checks = [
"query text and normalized query embedding norm",
"top-k ids, scores, and chunk text",
"reranker scores and final selected chunks",
"full prompt after context packing",
"answer claims mapped to supporting chunks",
"retrieval metrics and answer metrics logged separately",
]
for i, check in enumerate(checks, 1):
print(f"{i}. {check}")