Exercises Notebook
Exercises Notebook
Converted from
exercises.ipynbfor web reading.
Activation Functions - Exercises
Ten graded exercises covering values, derivatives, gates, softmax, and diagnostics.
Code cell 2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
try:
import seaborn as sns
sns.set_theme(style="whitegrid", palette="colorblind")
HAS_SNS = True
except ImportError:
plt.style.use("seaborn-v0_8-whitegrid")
HAS_SNS = False
mpl.rcParams.update({
"figure.figsize": (10, 6),
"figure.dpi": 120,
"font.size": 13,
"axes.titlesize": 15,
"axes.labelsize": 13,
"xtick.labelsize": 11,
"ytick.labelsize": 11,
"legend.fontsize": 11,
"legend.framealpha": 0.85,
"lines.linewidth": 2.0,
"axes.spines.top": False,
"axes.spines.right": False,
"savefig.bbox": "tight",
"savefig.dpi": 150,
})
np.random.seed(42)
print("Plot setup complete.")
Code cell 3
import numpy as np
def header(title):
print("\n" + "=" * 72)
print(title)
print("=" * 72)
def check_close(name, value, expected, tol=1e-7):
ok = np.allclose(value, expected, atol=tol, rtol=tol)
print(f"{'PASS' if ok else 'FAIL'} - {name}: value={value}, expected={expected}")
return ok
def check_true(name, condition):
ok = bool(condition)
print(f"{'PASS' if ok else 'FAIL'} - {name}")
return ok
def sigmoid(x): return 1/(1+np.exp(-x))
def softmax(z):
shifted = z - np.max(z)
e = np.exp(shifted)
return e/e.sum()
print("Exercise helpers ready.")
Exercise 1: Sigmoid derivative (*)
Derive and compute sigmoid derivative.
Code cell 5
# Your Solution
x=np.array([-1.0,0.0,1.0])
deriv=None
print(deriv)
Code cell 6
# Solution
header("Exercise 1: Sigmoid derivative")
x=np.array([-1.0,0.0,1.0])
s=sigmoid(x)
deriv=s*(1-s)
check_close("center derivative", deriv[1], 0.25)
print("\nTakeaway: sigmoid saturates because its derivative approaches zero in both tails.")
Exercise 2: Tanh derivative (*)
Compute tanh derivative.
Code cell 8
# Your Solution
x=np.array([-1.0,0.0,1.0])
deriv=None
print(deriv)
Code cell 9
# Solution
header("Exercise 2: Tanh derivative")
x=np.array([-1.0,0.0,1.0])
deriv=1-np.tanh(x)**2
check_close("center derivative", deriv[1], 1.0)
print("\nTakeaway: tanh is zero-centered but still saturates.")
Exercise 3: ReLU family (*)
Compute ReLU and Leaky ReLU.
Code cell 11
# Your Solution
x=np.array([-2.0,0.0,3.0])
y=None
print(y)
Code cell 12
# Solution
header("Exercise 3: ReLU family")
x=np.array([-2.0,0.0,3.0])
relu=np.maximum(0,x)
leaky=np.where(x>0,x,0.1*x)
check_close("relu", relu, np.array([0.0,0.0,3.0]))
check_true("leaky keeps negative signal", leaky[0]<0)
print("\nTakeaway: Leaky ReLU reduces the dead-neuron zero-gradient region.")
Exercise 4: Affine collapse (**)
Show two affine layers collapse without activation.
Code cell 14
# Your Solution
W1=np.eye(2); W2=2*np.eye(2); x=np.ones(2)
out=None
print(out)
Code cell 15
# Solution
header("Exercise 4: Affine collapse")
W1=np.array([[1.,2.],[0.,1.]]); W2=np.array([[2.,0.],[1.,1.]])
b1=np.array([1.,-1.]); b2=np.array([0.5,0.5]); x=np.array([2.,3.])
out=W2@(W1@x+b1)+b2
A=W2@W1; c=W2@b1+b2
check_close("collapsed affine", out, A@x+c)
print("\nTakeaway: activations are required for nonlinear depth.")
Exercise 5: Stable softmax (**)
Implement stable softmax and test shift invariance.
Code cell 17
# Your Solution
z=np.array([1000.,999.,998.])
p=None
print(p)
Code cell 18
# Solution
header("Exercise 5: Stable softmax")
z=np.array([1000.,999.,998.])
p=softmax(z)
check_close("sums to one", p.sum(), 1.0)
check_close("shift invariant", p, softmax(z-1000))
print("\nTakeaway: subtracting the max protects softmax from overflow.")
Exercise 6: Softmax Jacobian (**)
Compute the softmax Jacobian.
Code cell 20
# Your Solution
s=np.array([0.2,0.3,0.5])
J=None
print(J)
Code cell 21
# Solution
header("Exercise 6: Softmax Jacobian")
s=np.array([0.2,0.3,0.5])
J=np.diag(s)-np.outer(s,s)
check_close("row sums zero", J.sum(axis=1), np.zeros(3))
check_true("PSD", np.linalg.eigvalsh(J).min()>-1e-10)
print("\nTakeaway: softmax derivatives are coupled across classes.")
Exercise 7: GELU vs SiLU (**)
Compute smooth activations at sample inputs.
Code cell 23
# Your Solution
x=np.array([-1.,0.,1.])
gel=None
print(gel)
Code cell 24
# Solution
header("Exercise 7: GELU vs SiLU")
x=np.array([-1.,0.,1.])
gel=0.5*x*(1+np.tanh(np.sqrt(2/np.pi)*(x+0.044715*x**3)))
silu=x*sigmoid(x)
check_true("finite smooth activations", np.isfinite(gel).all() and np.isfinite(silu).all())
print("GELU", gel, "SiLU", silu)
print("\nTakeaway: smooth activations keep small negative outputs instead of hard zeroing.")
Exercise 8: GLU (***)
Compute GLU output and local derivatives.
Code cell 26
# Your Solution
a=np.array([2.,-1.]); b=np.array([0.,2.])
out=None
print(out)
Code cell 27
# Solution
header("Exercise 8: GLU")
a=np.array([2.,-1.]); b=np.array([0.,2.])
g=sigmoid(b); out=a*g
check_close("GLU output", out, a*g)
check_close("dy/da", g, sigmoid(b))
print("\nTakeaway: gated activations create separate content and gate gradient paths.")
Exercise 9: He variance (***)
Compute Xavier and He variances.
Code cell 29
# Your Solution
n_in=128; n_out=64
var=None
print(var)
Code cell 30
# Solution
header("Exercise 9: He variance")
n_in=128; n_out=64
xavier=2/(n_in+n_out); he=2/n_in
check_true("He greater than Xavier here", he>xavier)
print("xavier", xavier, "he", he)
print("\nTakeaway: initialization depends on activation statistics.")
Exercise 10: Dead ReLU diagnostic (***)
Detect dead units from preactivation statistics.
Code cell 32
# Your Solution
preacts=np.array([[-1.,2.],[-2.,3.]])
dead=None
print(dead)
Code cell 33
# Solution
header("Exercise 10: Dead ReLU diagnostic")
preacts=np.array([[-2.,1.],[-1.,2.],[-3.,3.]])
active_fraction=np.mean(preacts>0, axis=0)
dead=active_fraction<0.01
check_true("first unit dead", dead[0])
print("\nTakeaway: dead ReLUs are diagnosed by persistently inactive preactivations.")