Exercises Notebook
Converted from
exercises.ipynbfor web reading.
Exercises: Experiment Tracking and Reproducibility
There are 10 exercises. Exercises 1-3 are mechanics, 4-6 are theory, and 7-10 are production AI applications.
Code cell 2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
try:
import seaborn as sns
sns.set_theme(style="whitegrid", palette="colorblind")
HAS_SNS = True
except ImportError:
plt.style.use("seaborn-v0_8-whitegrid")
HAS_SNS = False
mpl.rcParams.update({
"figure.figsize": (10, 6),
"figure.dpi": 120,
"font.size": 13,
"axes.titlesize": 15,
"axes.labelsize": 13,
"xtick.labelsize": 11,
"ytick.labelsize": 11,
"legend.fontsize": 11,
"legend.framealpha": 0.85,
"lines.linewidth": 2.0,
"axes.spines.top": False,
"axes.spines.right": False,
"savefig.bbox": "tight",
"savefig.dpi": 150,
})
np.random.seed(42)
print("Plot setup complete.")
Code cell 3
COLORS = {
"primary": "#0077BB",
"secondary": "#EE7733",
"tertiary": "#009988",
"error": "#CC3311",
"neutral": "#555555",
"highlight": "#EE3377",
}
def header(title):
print("\n" + "=" * 72)
print(title)
print("=" * 72)
def check_true(condition, name):
ok = bool(condition)
print(f"{'PASS' if ok else 'FAIL'} - {name}")
assert ok, name
def check_close(value, target, tol=1e-8, name="value"):
ok = abs(float(value) - float(target)) <= tol
print(f"{'PASS' if ok else 'FAIL'} - {name}: got {float(value):.6f}, expected {float(target):.6f}")
assert ok, name
def softmax(z):
z = np.asarray(z, dtype=float)
z = z - np.max(z)
e = np.exp(z)
return e / e.sum()
def psi(ref, cur, eps=1e-8):
ref = np.asarray(ref, dtype=float) + eps
cur = np.asarray(cur, dtype=float) + eps
ref = ref / ref.sum()
cur = cur / cur.sum()
return float(np.sum((cur - ref) * np.log(cur / ref)))
def js_divergence(p, q, eps=1e-8):
p = np.asarray(p, dtype=float) + eps
q = np.asarray(q, dtype=float) + eps
p = p / p.sum()
q = q / q.sum()
m = 0.5 * (p + q)
return float(0.5 * np.sum(p * np.log(p / m)) + 0.5 * np.sum(q * np.log(q / m)))
def percentile(values, q):
return float(np.percentile(np.asarray(values, dtype=float), q))
print("Helper functions ready.")
Exercise 1: experiments as scientific records (*)
Define the production object, compute a small check, and explain what action the system should take.
Code cell 5
# Your Solution - Exercise 1
answer = None
print("Your answer placeholder:", answer)
Code cell 6
# Solution
header("Exercise 1: Experiment Tracking and Reproducibility")
values = np.array([10, 12, 12, 15], dtype=float)
weights = np.arange(1, len(values) + 1)
fingerprint = float(np.sum(values * weights))
check_close(fingerprint, 130.0, name="weighted fingerprint")
print("Artifact values:", values.tolist())
print("\nTakeaway: production ML decisions should be backed by explicit objects, checks, and logged evidence.")
Exercise 2: why metrics alone are not enough (*)
Define the production object, compute a small check, and explain what action the system should take.
Code cell 8
# Your Solution - Exercise 2
answer = None
print("Your answer placeholder:", answer)
Code cell 9
# Solution
header("Exercise 2: Experiment Tracking and Reproducibility")
baseline = np.array([0.80, 0.81, 0.79, 0.82])
candidate = np.array([0.82, 0.83, 0.81, 0.84])
delta = float(np.mean(candidate - baseline))
check_true(delta > 0.0, "candidate mean is better")
print("Mean improvement:", round(delta, 4))
print("\nTakeaway: production ML decisions should be backed by explicit objects, checks, and logged evidence.")
Exercise 3: reproducibility versus repeatability (*)
Define the production object, compute a small check, and explain what action the system should take.
Code cell 11
# Your Solution - Exercise 3
answer = None
print("Your answer placeholder:", answer)
Code cell 12
# Solution
header("Exercise 3: Experiment Tracking and Reproducibility")
ref = np.array([0.25, 0.25, 0.25, 0.25])
cur = np.array([0.10, 0.20, 0.30, 0.40])
score = js_divergence(ref, cur)
check_true(score >= 0.0, "JS divergence is nonnegative")
print("JS divergence:", round(score, 6))
print("\nTakeaway: production ML decisions should be backed by explicit objects, checks, and logged evidence.")
Exercise 4: comparison tables (**)
Define the production object, compute a small check, and explain what action the system should take.
Code cell 14
# Your Solution - Exercise 4
answer = None
print("Your answer placeholder:", answer)
Code cell 15
# Solution
header("Exercise 4: Experiment Tracking and Reproducibility")
arrivals = 30.0
service = 50.0
rho = arrivals / service
check_true(0.0 <= rho < 1.0, "queue utilization is stable")
print("Utilization:", round(rho, 3))
print("\nTakeaway: production ML decisions should be backed by explicit objects, checks, and logged evidence.")
Exercise 5: experiment debt (**)
Define the production object, compute a small check, and explain what action the system should take.
Code cell 17
# Your Solution - Exercise 5
answer = None
print("Your answer placeholder:", answer)
Code cell 18
# Solution
header("Exercise 5: Experiment Tracking and Reproducibility")
scores = np.array([0.15, 0.66, 0.71, 0.88])
threshold = 0.70
blocked = scores >= threshold
check_true(int(blocked.sum()) == 2, "two events exceed threshold")
print("Blocked mask:", blocked.tolist())
print("\nTakeaway: production ML decisions should be backed by explicit objects, checks, and logged evidence.")
Exercise 6: run (**)
Define the production object, compute a small check, and explain what action the system should take.
Code cell 20
# Your Solution - Exercise 6
answer = None
print("Your answer placeholder:", answer)
Code cell 21
# Solution
header("Exercise 6: Experiment Tracking and Reproducibility")
values = np.array([10, 12, 12, 15], dtype=float)
weights = np.arange(1, len(values) + 1)
fingerprint = float(np.sum(values * weights))
check_close(fingerprint, 130.0, name="weighted fingerprint")
print("Artifact values:", values.tolist())
print("\nTakeaway: production ML decisions should be backed by explicit objects, checks, and logged evidence.")
Exercise 7: parameter vector (***)
Define the production object, compute a small check, and explain what action the system should take.
Code cell 23
# Your Solution - Exercise 7
answer = None
print("Your answer placeholder:", answer)
Code cell 24
# Solution
header("Exercise 7: Experiment Tracking and Reproducibility")
baseline = np.array([0.80, 0.81, 0.79, 0.82])
candidate = np.array([0.82, 0.83, 0.81, 0.84])
delta = float(np.mean(candidate - baseline))
check_true(delta > 0.0, "candidate mean is better")
print("Mean improvement:", round(delta, 4))
print("\nTakeaway: production ML decisions should be backed by explicit objects, checks, and logged evidence.")
Exercise 8: metric vector (***)
Define the production object, compute a small check, and explain what action the system should take.
Code cell 26
# Your Solution - Exercise 8
answer = None
print("Your answer placeholder:", answer)
Code cell 27
# Solution
header("Exercise 8: Experiment Tracking and Reproducibility")
ref = np.array([0.25, 0.25, 0.25, 0.25])
cur = np.array([0.10, 0.20, 0.30, 0.40])
score = js_divergence(ref, cur)
check_true(score >= 0.0, "JS divergence is nonnegative")
print("JS divergence:", round(score, 6))
print("\nTakeaway: production ML decisions should be backed by explicit objects, checks, and logged evidence.")
Exercise 9: artifact set (***)
Define the production object, compute a small check, and explain what action the system should take.
Code cell 29
# Your Solution - Exercise 9
answer = None
print("Your answer placeholder:", answer)
Code cell 30
# Solution
header("Exercise 9: Experiment Tracking and Reproducibility")
arrivals = 30.0
service = 50.0
rho = arrivals / service
check_true(0.0 <= rho < 1.0, "queue utilization is stable")
print("Utilization:", round(rho, 3))
print("\nTakeaway: production ML decisions should be backed by explicit objects, checks, and logged evidence.")
Exercise 10: reproducibility envelope (***)
Define the production object, compute a small check, and explain what action the system should take.
Code cell 32
# Your Solution - Exercise 10
answer = None
print("Your answer placeholder:", answer)
Code cell 33
# Solution
header("Exercise 10: Experiment Tracking and Reproducibility")
scores = np.array([0.15, 0.66, 0.71, 0.88])
threshold = 0.70
blocked = scores >= threshold
check_true(int(blocked.sum()) == 2, "two events exceed threshold")
print("Blocked mask:", blocked.tolist())
print("\nTakeaway: production ML decisions should be backed by explicit objects, checks, and logged evidence.")