CNN and Convolution Math: Theory Notebook

This notebook makes convolution math executable: indexing, shape formulas, parameter counts, pooling, receptive fields, im2col, gradient checks, and patch embeddings.

Code cell 2

import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

try:
    import seaborn as sns
    sns.set_theme(style="whitegrid", palette="colorblind")
    HAS_SNS = True
except ImportError:
    plt.style.use("seaborn-v0_8-whitegrid")
    HAS_SNS = False

mpl.rcParams.update({
    "figure.figsize":    (10, 6),
    "figure.dpi":         120,
    "font.size":           13,
    "axes.titlesize":      15,
    "axes.labelsize":      13,
    "xtick.labelsize":     11,
    "ytick.labelsize":     11,
    "legend.fontsize":     11,
    "legend.framealpha":   0.85,
    "lines.linewidth":      2.0,
    "axes.spines.top":     False,
    "axes.spines.right":   False,
    "savefig.bbox":       "tight",
    "savefig.dpi":         150,
})
np.random.seed(42)
print("Plot setup complete.")

1. 1D cross-correlation

Code cell 4

x = np.array([1, 2, 3, 4, 5], dtype=float)
w = np.array([1, 0, -1], dtype=float)
y = np.array([np.sum(x[i:i+len(w)] * w) for i in range(len(x)-len(w)+1)])
print("output:", y)

2. 2D cross-correlation

Code cell 6

X = np.arange(1, 10, dtype=float).reshape(3, 3)
K = np.array([[1, 0], [0, -1]], dtype=float)
Y = np.zeros((2, 2))
for i in range(2):
    for j in range(2):
        Y[i, j] = np.sum(X[i:i+2, j:j+2] * K)
print("X:\n", X)
print("Y:\n", Y)

3. Output shape formula

Code cell 8

def conv_out(size, kernel, padding=0, stride=1, dilation=1):
    return int(np.floor((size + 2 * padding - dilation * (kernel - 1) - 1) / stride) + 1)

for args in [(32, 3, 1, 1, 1), (32, 5, 0, 2, 1), (64, 3, 2, 1, 2)]:
    print(args, "->", conv_out(*args))

4. Parameter counts

Code cell 10

C_in, C_out, K = 64, 128, 3
conv_params = C_out * C_in * K * K + C_out
dense_params = (224 * 224 * C_in) * C_out + C_out
print("conv params:", conv_params)
print("dense params if flatten image to 128 units:", dense_params)
print("dense/conv ratio:", dense_params / conv_params)

5. Pooling

Code cell 12

X = np.array([[1, 3, 2, 0], [4, 6, 5, 1], [2, 1, 0, 2], [3, 4, 1, 5]], dtype=float)
max_pool = np.zeros((2, 2))
avg_pool = np.zeros((2, 2))
for i in range(2):
    for j in range(2):
        patch = X[2*i:2*i+2, 2*j:2*j+2]
        max_pool[i, j] = patch.max()
        avg_pool[i, j] = patch.mean()
print("max pool:\n", max_pool)
print("avg pool:\n", avg_pool)

6. Receptive field through layers

Code cell 14

layers = [
    {"kernel": 3, "stride": 1},
    {"kernel": 3, "stride": 2},
    {"kernel": 3, "stride": 1},
    {"kernel": 3, "stride": 2},
]
R, J = 1, 1
for idx, layer in enumerate(layers, 1):
    R = R + (layer["kernel"] - 1) * J
    J = J * layer["stride"]
    print(f"layer {idx}: receptive_field={R}, jump={J}")

7. Depthwise separable convolution savings

Code cell 16

C_in, C_out, K = 64, 128, 3
standard = C_in * C_out * K * K
depthwise_sep = C_in * K * K + C_in * C_out
print("standard params:", standard)
print("depthwise separable params:", depthwise_sep)
print("saving factor:", standard / depthwise_sep)

8. im2col for a tiny input

Code cell 18

X = np.arange(1, 10).reshape(3, 3)
cols = []
for i in range(2):
    for j in range(2):
        cols.append(X[i:i+2, j:j+2].reshape(-1))
X_col = np.stack(cols, axis=1)
print("im2col matrix:\n", X_col)

9. Finite-difference kernel gradient

Code cell 20

X = np.array([[1., 2.], [3., 4.]])
K = np.array([[0.5, -1.0], [0.2, 0.3]])
def loss(Kmat):
    return np.sum(X * Kmat) ** 2
analytic = 2 * np.sum(X * K) * X
eps = 1e-5
numeric = np.zeros_like(K)
for i in range(2):
    for j in range(2):
        E = np.zeros_like(K)
        E[i, j] = eps
        numeric[i, j] = (loss(K + E) - loss(K - E)) / (2 * eps)
print("max grad error:", np.max(np.abs(analytic - numeric)))

10. Residual block

Code cell 22

x = np.array([1.0, -0.5, 0.25])
F = np.array([-0.2, 0.1, 0.4])
y = x + F
print("residual output:", y)

11. Patch embedding as convolution

Code cell 24

image_h, image_w, patch = 224, 224, 16
num_patches = (image_h // patch) * (image_w // patch)
embed_dim = 768
patch_vector = patch * patch * 3
params = patch_vector * embed_dim + embed_dim
print("num patches:", num_patches)
print("patch projection params:", params)

12. Activation statistics

Code cell 26

acts = np.random.normal(size=(8, 16, 14, 14))
channel_mean = acts.mean(axis=(0, 2, 3))
channel_std = acts.std(axis=(0, 2, 3))
print("mean range:", channel_mean.min(), channel_mean.max())
print("std range:", channel_std.min(), channel_std.max())

13. CNN checklist

Code cell 28

checks = [
    "track N,C,H,W axes explicitly",
    "verify output sizes after every stride, padding, and dilation choice",
    "count parameters and FLOPs separately",
    "test receptive field with gradients or controlled inputs",
    "inspect activation statistics and dead channels",
    "compare standard, 1x1, and depthwise separable convolutions when optimizing",
]
for i, check in enumerate(checks, 1):
    print(f"{i}. {check}")