PYTHONPython

hashing

real world projects / file sync / sync / utils

PYTHON
hashing.py🐍
"""
File hashing utilities.
"""

import hashlib
from pathlib import Path
from typing import Literal


HashAlgorithm = Literal["md5", "sha256", "sha512"]


def calculate_hash(
    file_path: Path,
    algorithm: HashAlgorithm = "md5",
    chunk_size: int = 8192
) -> str:
    """
    Calculate hash of a file.
    
    Args:
        file_path: Path to file
        algorithm: Hash algorithm (md5, sha256, sha512)
        chunk_size: Size of chunks to read
    
    Returns:
        Hex digest of hash
    """
    hash_func = hashlib.new(algorithm)
    
    with open(file_path, "rb") as f:
        while chunk := f.read(chunk_size):
            hash_func.update(chunk)
    
    return hash_func.hexdigest()


def verify_hash(
    file_path: Path,
    expected_hash: str,
    algorithm: HashAlgorithm = "md5"
) -> bool:
    """
    Verify file hash matches expected value.
    
    Args:
        file_path: Path to file
        expected_hash: Expected hash value
        algorithm: Hash algorithm used
    
    Returns:
        True if hash matches
    """
    actual_hash = calculate_hash(file_path, algorithm)
    return actual_hash == expected_hash.lower()


def calculate_hash_streaming(
    data: bytes,
    algorithm: HashAlgorithm = "md5"
) -> str:
    """
    Calculate hash of bytes data.
    
    Args:
        data: Bytes to hash
        algorithm: Hash algorithm
    
    Returns:
        Hex digest of hash
    """
    hash_func = hashlib.new(algorithm)
    hash_func.update(data)
    return hash_func.hexdigest()
PreviousNext