ml exercises
Converted from
05_ml_exercises.ipynbfor web reading.
Code cell 1
import numpy as np
from typing import List, Tuple, Dict, Any, Optional
Exercise 1: NumPy Array Operations
Create functions for common array operations.
Code cell 3
def create_identity_matrix(n: int) -> np.ndarray:
"""
Create an n x n identity matrix.
Example:
>>> create_identity_matrix(3)
array([[1., 0., 0.],
[0., 1., 0.],
[0., 0., 1.]])
"""
# YOUR CODE HERE
pass
# Test
create_identity_matrix(3)
Code cell 4
def normalize_array(arr: np.ndarray) -> np.ndarray:
"""
Normalize array to have mean=0 and std=1 (z-score normalization).
Formula: (x - mean) / std
"""
# YOUR CODE HERE
pass
# Test
arr = np.array([10, 20, 30, 40, 50])
normalize_array(arr)
Code cell 5
def min_max_scale(arr: np.ndarray) -> np.ndarray:
"""
Scale array to range [0, 1].
Formula: (x - min) / (max - min)
"""
# YOUR CODE HERE
pass
# Test
arr = np.array([10, 20, 30, 40, 50])
min_max_scale(arr)
Exercise 2: Matrix Operations
Code cell 7
def compute_statistics(arr: np.ndarray) -> Dict[str, float]:
"""
Compute common statistics for an array.
Returns: Dictionary with mean, median, std, var, min, max, range
"""
# YOUR CODE HERE
pass
# Test
arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
compute_statistics(arr)
Exercise 3: Data Cleaning
Code cell 9
def find_missing_values(data: np.ndarray) -> Tuple[int, np.ndarray]:
"""
Find missing values (NaN) in array.
Returns: (count of missing, indices of missing values)
"""
# YOUR CODE HERE
pass
# Test
data = np.array([1, np.nan, 3, np.nan, 5])
find_missing_values(data)
Code cell 10
def fill_missing_with_mean(data: np.ndarray) -> np.ndarray:
"""
Replace NaN values with the mean of non-NaN values.
"""
# YOUR CODE HERE
pass
# Test
data = np.array([1, np.nan, 3, np.nan, 5])
fill_missing_with_mean(data)
Exercise 4: Simple Linear Regression from Scratch
Code cell 12
class SimpleLinearRegression:
"""
Simple linear regression using ordinary least squares.
Formula: y = mx + b
m = sum((x - x_mean)(y - y_mean)) / sum((x - x_mean)^2)
b = y_mean - m * x_mean
"""
def __init__(self):
self.slope = None
self.intercept = None
def fit(self, X: np.ndarray, y: np.ndarray):
"""Fit the model to training data."""
# YOUR CODE HERE
pass
def predict(self, X: np.ndarray) -> np.ndarray:
"""Make predictions."""
# YOUR CODE HERE
pass
def r_squared(self, X: np.ndarray, y: np.ndarray) -> float:
"""Calculate R² score."""
# YOUR CODE HERE
pass
Code cell 13
# Test Linear Regression
X = np.array([1, 2, 3, 4, 5])
y = np.array([2, 4, 6, 8, 10])
model = SimpleLinearRegression()
model.fit(X, y)
print(f"Slope: {model.slope}")
print(f"Intercept: {model.intercept}")
print(f"Predictions: {model.predict(np.array([6, 7]))}")
Exercise 5: K-Nearest Neighbors from Scratch
Code cell 15
class SimpleKNN:
"""
K-Nearest Neighbors classifier.
"""
def __init__(self, k: int = 3):
self.k = k
self.X_train = None
self.y_train = None
def fit(self, X: np.ndarray, y: np.ndarray):
"""Store training data."""
# YOUR CODE HERE
pass
def _euclidean_distance(self, x1: np.ndarray, x2: np.ndarray) -> float:
"""Calculate Euclidean distance between two points."""
# YOUR CODE HERE
pass
def _predict_single(self, x: np.ndarray) -> int:
"""Predict class for a single sample."""
# YOUR CODE HERE
pass
def predict(self, X: np.ndarray) -> np.ndarray:
"""Predict classes for multiple samples."""
# YOUR CODE HERE
pass
Code cell 16
# Test KNN
X_train = np.array([[1, 1], [2, 2], [3, 3], [4, 4]])
y_train = np.array([0, 0, 1, 1])
knn = SimpleKNN(k=3)
knn.fit(X_train, y_train)
print(f"Prediction for [2.5, 2.5]: {knn.predict(np.array([[2.5, 2.5]]))}")
Exercise 6: Confusion Matrix and Metrics
Code cell 18
def confusion_matrix(y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
"""
Compute confusion matrix for binary classification.
Returns: 2x2 array: [[TN, FP], [FN, TP]]
"""
# YOUR CODE HERE
pass
# Test
y_true = np.array([0, 0, 1, 1, 1])
y_pred = np.array([0, 1, 0, 1, 1])
confusion_matrix(y_true, y_pred)
Code cell 19
def precision_recall_f1(y_true: np.ndarray, y_pred: np.ndarray) -> Dict[str, float]:
"""
Calculate precision, recall, and F1 score.
Precision = TP / (TP + FP)
Recall = TP / (TP + FN)
F1 = 2 * (precision * recall) / (precision + recall)
"""
# YOUR CODE HERE
pass
# Test
precision_recall_f1(y_true, y_pred)
Solutions
Uncomment to check your work
Code cell 21
# # Solution 1: Array Operations
# def create_identity_matrix(n: int) -> np.ndarray:
# return np.eye(n)
# def normalize_array(arr: np.ndarray) -> np.ndarray:
# return (arr - arr.mean()) / arr.std()
# def min_max_scale(arr: np.ndarray) -> np.ndarray:
# return (arr - arr.min()) / (arr.max() - arr.min())
# # Solution 2: Statistics
# def compute_statistics(arr: np.ndarray) -> Dict[str, float]:
# return {
# 'mean': float(np.mean(arr)),
# 'median': float(np.median(arr)),
# 'std': float(np.std(arr)),
# 'var': float(np.var(arr)),
# 'min': float(np.min(arr)),
# 'max': float(np.max(arr)),
# 'range': float(np.max(arr) - np.min(arr))
# }
# # Solution 3: Data Cleaning
# def find_missing_values(data: np.ndarray) -> Tuple[int, np.ndarray]:
# mask = np.isnan(data)
# return int(mask.sum()), np.where(mask)[0]
# def fill_missing_with_mean(data: np.ndarray) -> np.ndarray:
# result = data.copy()
# mean_val = np.nanmean(data)
# result[np.isnan(result)] = mean_val
# return result