Theory Notebook
Theory Notebook
Converted from
theory.ipynbfor web reading.
CNN and Convolution Math: Theory Notebook
This notebook makes convolution math executable: indexing, shape formulas, parameter counts, pooling, receptive fields, im2col, gradient checks, and patch embeddings.
Code cell 2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
try:
import seaborn as sns
sns.set_theme(style="whitegrid", palette="colorblind")
HAS_SNS = True
except ImportError:
plt.style.use("seaborn-v0_8-whitegrid")
HAS_SNS = False
mpl.rcParams.update({
"figure.figsize": (10, 6),
"figure.dpi": 120,
"font.size": 13,
"axes.titlesize": 15,
"axes.labelsize": 13,
"xtick.labelsize": 11,
"ytick.labelsize": 11,
"legend.fontsize": 11,
"legend.framealpha": 0.85,
"lines.linewidth": 2.0,
"axes.spines.top": False,
"axes.spines.right": False,
"savefig.bbox": "tight",
"savefig.dpi": 150,
})
np.random.seed(42)
print("Plot setup complete.")
1. 1D cross-correlation
Code cell 4
x = np.array([1, 2, 3, 4, 5], dtype=float)
w = np.array([1, 0, -1], dtype=float)
y = np.array([np.sum(x[i:i+len(w)] * w) for i in range(len(x)-len(w)+1)])
print("output:", y)
2. 2D cross-correlation
Code cell 6
X = np.arange(1, 10, dtype=float).reshape(3, 3)
K = np.array([[1, 0], [0, -1]], dtype=float)
Y = np.zeros((2, 2))
for i in range(2):
for j in range(2):
Y[i, j] = np.sum(X[i:i+2, j:j+2] * K)
print("X:\n", X)
print("Y:\n", Y)
3. Output shape formula
Code cell 8
def conv_out(size, kernel, padding=0, stride=1, dilation=1):
return int(np.floor((size + 2 * padding - dilation * (kernel - 1) - 1) / stride) + 1)
for args in [(32, 3, 1, 1, 1), (32, 5, 0, 2, 1), (64, 3, 2, 1, 2)]:
print(args, "->", conv_out(*args))
4. Parameter counts
Code cell 10
C_in, C_out, K = 64, 128, 3
conv_params = C_out * C_in * K * K + C_out
dense_params = (224 * 224 * C_in) * C_out + C_out
print("conv params:", conv_params)
print("dense params if flatten image to 128 units:", dense_params)
print("dense/conv ratio:", dense_params / conv_params)
5. Pooling
Code cell 12
X = np.array([[1, 3, 2, 0], [4, 6, 5, 1], [2, 1, 0, 2], [3, 4, 1, 5]], dtype=float)
max_pool = np.zeros((2, 2))
avg_pool = np.zeros((2, 2))
for i in range(2):
for j in range(2):
patch = X[2*i:2*i+2, 2*j:2*j+2]
max_pool[i, j] = patch.max()
avg_pool[i, j] = patch.mean()
print("max pool:\n", max_pool)
print("avg pool:\n", avg_pool)
6. Receptive field through layers
Code cell 14
layers = [
{"kernel": 3, "stride": 1},
{"kernel": 3, "stride": 2},
{"kernel": 3, "stride": 1},
{"kernel": 3, "stride": 2},
]
R, J = 1, 1
for idx, layer in enumerate(layers, 1):
R = R + (layer["kernel"] - 1) * J
J = J * layer["stride"]
print(f"layer {idx}: receptive_field={R}, jump={J}")
7. Depthwise separable convolution savings
Code cell 16
C_in, C_out, K = 64, 128, 3
standard = C_in * C_out * K * K
depthwise_sep = C_in * K * K + C_in * C_out
print("standard params:", standard)
print("depthwise separable params:", depthwise_sep)
print("saving factor:", standard / depthwise_sep)
8. im2col for a tiny input
Code cell 18
X = np.arange(1, 10).reshape(3, 3)
cols = []
for i in range(2):
for j in range(2):
cols.append(X[i:i+2, j:j+2].reshape(-1))
X_col = np.stack(cols, axis=1)
print("im2col matrix:\n", X_col)
9. Finite-difference kernel gradient
Code cell 20
X = np.array([[1., 2.], [3., 4.]])
K = np.array([[0.5, -1.0], [0.2, 0.3]])
def loss(Kmat):
return np.sum(X * Kmat) ** 2
analytic = 2 * np.sum(X * K) * X
eps = 1e-5
numeric = np.zeros_like(K)
for i in range(2):
for j in range(2):
E = np.zeros_like(K)
E[i, j] = eps
numeric[i, j] = (loss(K + E) - loss(K - E)) / (2 * eps)
print("max grad error:", np.max(np.abs(analytic - numeric)))
10. Residual block
Code cell 22
x = np.array([1.0, -0.5, 0.25])
F = np.array([-0.2, 0.1, 0.4])
y = x + F
print("residual output:", y)
11. Patch embedding as convolution
Code cell 24
image_h, image_w, patch = 224, 224, 16
num_patches = (image_h // patch) * (image_w // patch)
embed_dim = 768
patch_vector = patch * patch * 3
params = patch_vector * embed_dim + embed_dim
print("num patches:", num_patches)
print("patch projection params:", params)
12. Activation statistics
Code cell 26
acts = np.random.normal(size=(8, 16, 14, 14))
channel_mean = acts.mean(axis=(0, 2, 3))
channel_std = acts.std(axis=(0, 2, 3))
print("mean range:", channel_mean.min(), channel_mean.max())
print("std range:", channel_std.min(), channel_std.max())
13. CNN checklist
Code cell 28
checks = [
"track N,C,H,W axes explicitly",
"verify output sizes after every stride, padding, and dilation choice",
"count parameters and FLOPs separately",
"test receptive field with gradients or controlled inputs",
"inspect activation statistics and dead channels",
"compare standard, 1x1, and depthwise separable convolutions when optimizing",
]
for i, check in enumerate(checks, 1):
print(f"{i}. {check}")