Exercises Notebook
Exercises Notebook
Converted from
exercises.ipynbfor web reading.
Normalization Techniques - Exercises
Ten graded exercises for normalization axes and diagnostics.
Code cell 2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
try:
import seaborn as sns
sns.set_theme(style="whitegrid", palette="colorblind")
HAS_SNS = True
except ImportError:
plt.style.use("seaborn-v0_8-whitegrid")
HAS_SNS = False
mpl.rcParams.update({
"figure.figsize": (10, 6),
"figure.dpi": 120,
"font.size": 13,
"axes.titlesize": 15,
"axes.labelsize": 13,
"xtick.labelsize": 11,
"ytick.labelsize": 11,
"legend.fontsize": 11,
"legend.framealpha": 0.85,
"lines.linewidth": 2.0,
"axes.spines.top": False,
"axes.spines.right": False,
"savefig.bbox": "tight",
"savefig.dpi": 150,
})
np.random.seed(42)
print("Plot setup complete.")
Code cell 3
import numpy as np
def header(title): print("\n"+"="*72+"\n"+title+"\n"+"="*72)
def check_close(name,value,expected,tol=1e-7):
ok=np.allclose(value,expected,atol=tol,rtol=tol); print(f"{'PASS' if ok else 'FAIL'} - {name}: value={value}, expected={expected}"); return ok
def check_true(name,condition): ok=bool(condition); print(f"{'PASS' if ok else 'FAIL'} - {name}"); return ok
print("Exercise helpers ready.")
Exercise 1: Normalize a vector (*)
Compute mean, variance, and normalized vector.
Code cell 5
# Your Solution
x=np.array([1.,2.,3.])
y=None
print(y)
Code cell 6
# Solution
header("Exercise 1: Normalize a vector")
x=np.array([1.,2.,3.]); mu=x.mean(); var=x.var(); y=(x-mu)/np.sqrt(var+1e-5)
check_close("mean zero", y.mean(), 0.0, tol=1e-6)
print("\nTakeaway: normalization subtracts a chosen mean and divides by a stabilized scale.")
Exercise 2: BatchNorm (*)
Normalize a B x D matrix over batch axis.
Code cell 8
# Your Solution
X=np.array([[1.,2.],[3.,4.]])
Y=None
print(Y)
Code cell 9
# Solution
header("Exercise 2: BatchNorm")
X=np.array([[1.,2.],[3.,4.]]); Y=(X-X.mean(axis=0,keepdims=True))/np.sqrt(X.var(axis=0,keepdims=True)+1e-5)
check_close("feature means zero", Y.mean(axis=0), np.zeros(2), tol=1e-5)
print("\nTakeaway: BatchNorm couples examples through feature statistics.")
Exercise 3: LayerNorm (*)
Normalize a B x D matrix over feature axis.
Code cell 11
# Your Solution
X=np.array([[1.,2.],[3.,7.]])
Y=None
print(Y)
Code cell 12
# Solution
header("Exercise 3: LayerNorm")
X=np.array([[1.,2.],[3.,7.]]); Y=(X-X.mean(axis=1,keepdims=True))/np.sqrt(X.var(axis=1,keepdims=True)+1e-5)
check_close("row means zero", Y.mean(axis=1), np.zeros(2), tol=1e-5)
print("\nTakeaway: LayerNorm couples features inside each example.")
Exercise 4: Batch dependence (**)
Show same sample changes under BatchNorm when batch changes.
Code cell 14
# Your Solution
sample=np.array([[1.,2.]])
out=None
print(out)
Code cell 15
# Solution
header("Exercise 4: Batch dependence")
sample=np.array([[1.,2.]])
A=np.vstack([sample, [[2.,3.],[3.,4.]]]); B=np.vstack([sample, [[100.,200.],[110.,210.]]])
YA=(A-A.mean(0,keepdims=True))/np.sqrt(A.var(0,keepdims=True)+1e-5); YB=(B-B.mean(0,keepdims=True))/np.sqrt(B.var(0,keepdims=True)+1e-5)
check_true("same sample differs", np.linalg.norm(YA[0]-YB[0])>1)
print("\nTakeaway: BatchNorm output depends on other examples in the mini-batch.")
Exercise 5: Layer independence (**)
Show LayerNorm ignores other batch examples.
Code cell 17
# Your Solution
sample=np.array([[1.,2.,4.]])
out=None
print(out)
Code cell 18
# Solution
header("Exercise 5: Layer independence")
sample=np.array([[1.,2.,4.]])
A=np.vstack([sample, np.zeros((2,3))]); B=np.vstack([sample, 100*np.ones((2,3))])
def ln(X): return (X-X.mean(1,keepdims=True))/np.sqrt(X.var(1,keepdims=True)+1e-5)
check_close("same sample same LN", ln(A)[0], ln(B)[0])
print("\nTakeaway: LayerNorm is stable across batch composition.")
Exercise 6: RMSNorm (**)
Implement RMSNorm and compare mean.
Code cell 20
# Your Solution
X=np.array([[1.,2.,3.]])
Y=None
print(Y)
Code cell 21
# Solution
header("Exercise 6: RMSNorm")
X=np.array([[1.,2.,3.]]); rms=np.sqrt(np.mean(X**2,axis=1,keepdims=True)+1e-5); Y=X/rms
check_close("unit RMS", np.sqrt(np.mean(Y**2,axis=1)), np.ones(1), tol=1e-5)
check_true("mean not zero", abs(Y.mean())>0.1)
print("\nTakeaway: RMSNorm controls scale without centering.")
Exercise 7: GroupNorm (**)
Normalize two groups per example.
Code cell 23
# Your Solution
X=np.array([[1.,2.,10.,12.]])
Y=None
print(Y)
Code cell 24
# Solution
header("Exercise 7: GroupNorm")
X=np.array([[1.,2.,10.,12.]]); G=X.reshape(1,2,2); Y=((G-G.mean(2,keepdims=True))/np.sqrt(G.var(2,keepdims=True)+1e-5)).reshape(1,4)
check_close("group means", Y.reshape(1,2,2).mean(2), np.zeros((1,2)), tol=1e-5)
print("\nTakeaway: GroupNorm normalizes channel groups within each example.")
Exercise 8: WeightNorm (***)
Verify weight norm equals learned scale.
Code cell 26
# Your Solution
v=np.array([3.,4.]); g=2.
w=None
print(w)
Code cell 27
# Solution
header("Exercise 8: WeightNorm")
v=np.array([3.,4.]); g=2.; w=g*v/np.linalg.norm(v)
check_close("norm equals g", np.linalg.norm(w), g)
print("\nTakeaway: WeightNorm separates direction from magnitude.")
Exercise 9: SpectralNorm (***)
Normalize a matrix by largest singular value.
Code cell 29
# Your Solution
W=np.array([[3.,0.],[0.,1.]])
Wn=None
print(Wn)
Code cell 30
# Solution
header("Exercise 9: SpectralNorm")
W=np.array([[3.,0.],[0.,1.]]); sigma=np.linalg.svd(W,compute_uv=False)[0]; Wn=W/sigma
check_close("spectral norm one", np.linalg.svd(Wn,compute_uv=False)[0], 1.0)
print("\nTakeaway: SpectralNorm controls operator scale, not activation statistics.")
Exercise 10: Broadcasting bug (***)
Check gamma shape for a B x T x D tensor.
Code cell 32
# Your Solution
X=np.zeros((2,3,4)); gamma=np.ones((4,))
ok=None
print(ok)
Code cell 33
# Solution
header("Exercise 10: Broadcasting bug")
X=np.zeros((2,3,4)); gamma=np.ones((4,)); Y=X+gamma
check_true("broadcasts across B and T", Y.shape==X.shape)
check_close("last dimension gamma", Y[0,0], gamma)
print("\nTakeaway: normalization parameters must align with the intended feature axis.")