PYTHONPython

state

real world projects / file sync / sync

PYTHON
state.py🐍
"""
Sync state management for tracking file changes.
"""

import json
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Optional
from dataclasses import dataclass, asdict, field
import hashlib
import logging

logger = logging.getLogger(__name__)


@dataclass
class FileState:
    """State of a tracked file."""
    path: str
    checksum: str
    size: int
    modified_time: float
    synced_time: Optional[str] = None
    remote_checksum: Optional[str] = None
    
    def to_dict(self) -> dict:
        return asdict(self)
    
    @classmethod
    def from_dict(cls, data: dict) -> "FileState":
        return cls(**data)


@dataclass
class SyncStateData:
    """Full sync state data."""
    files: Dict[str, dict] = field(default_factory=dict)
    last_sync: Optional[str] = None
    conflicts: List[str] = field(default_factory=list)
    version: str = "1.0"


class SyncState:
    """Manages sync state persistence."""
    
    STATE_FILE = ".sync_state.json"
    
    def __init__(self, source_dir: str):
        self.source_dir = Path(source_dir)
        self.state_file = self.source_dir / self.STATE_FILE
        self._state: SyncStateData = SyncStateData()
        self._load()
    
    def _load(self):
        """Load state from file."""
        if self.state_file.exists():
            try:
                with open(self.state_file, "r") as f:
                    data = json.load(f)
                    self._state = SyncStateData(
                        files=data.get("files", {}),
                        last_sync=data.get("last_sync"),
                        conflicts=data.get("conflicts", []),
                        version=data.get("version", "1.0")
                    )
                logger.debug(f"Loaded state: {len(self._state.files)} files")
            except Exception as e:
                logger.error(f"Error loading state: {e}")
                self._state = SyncStateData()
    
    def save(self):
        """Save state to file."""
        try:
            data = {
                "files": self._state.files,
                "last_sync": self._state.last_sync,
                "conflicts": self._state.conflicts,
                "version": self._state.version
            }
            
            with open(self.state_file, "w") as f:
                json.dump(data, f, indent=2)
            
            logger.debug("State saved")
        except Exception as e:
            logger.error(f"Error saving state: {e}")
    
    def get_file_state(self, path: str) -> Optional[FileState]:
        """Get state for a specific file."""
        if path in self._state.files:
            return FileState.from_dict(self._state.files[path])
        return None
    
    def update_file_state(self, file_state: FileState):
        """Update state for a file."""
        self._state.files[file_state.path] = file_state.to_dict()
    
    def remove_file_state(self, path: str):
        """Remove state for a file."""
        if path in self._state.files:
            del self._state.files[path]
    
    def get_all_files(self) -> List[FileState]:
        """Get all tracked files."""
        return [FileState.from_dict(data) for data in self._state.files.values()]
    
    def set_last_sync(self, timestamp: Optional[datetime] = None):
        """Update last sync timestamp."""
        if timestamp is None:
            timestamp = datetime.utcnow()
        self._state.last_sync = timestamp.isoformat()
    
    def get_last_sync(self) -> Optional[datetime]:
        """Get last sync timestamp."""
        if self._state.last_sync:
            return datetime.fromisoformat(self._state.last_sync)
        return None
    
    def add_conflict(self, path: str):
        """Add a conflict."""
        if path not in self._state.conflicts:
            self._state.conflicts.append(path)
    
    def remove_conflict(self, path: str):
        """Remove a conflict."""
        if path in self._state.conflicts:
            self._state.conflicts.remove(path)
    
    def get_conflicts(self) -> List[str]:
        """Get all conflicts."""
        return self._state.conflicts.copy()
    
    def clear_conflicts(self):
        """Clear all conflicts."""
        self._state.conflicts.clear()
    
    @staticmethod
    def calculate_checksum(file_path: Path, algorithm: str = "md5") -> str:
        """Calculate file checksum."""
        hash_func = hashlib.new(algorithm)
        
        with open(file_path, "rb") as f:
            while chunk := f.read(8192):
                hash_func.update(chunk)
        
        return hash_func.hexdigest()
    
    def has_file_changed(self, path: Path) -> bool:
        """Check if file has changed since last sync."""
        rel_path = str(path.relative_to(self.source_dir))
        state = self.get_file_state(rel_path)
        
        if not state:
            return True  # New file
        
        if not path.exists():
            return True  # Deleted
        
        # Check modification time first (fast)
        current_mtime = path.stat().st_mtime
        if current_mtime != state.modified_time:
            # Verify with checksum (slower but accurate)
            current_checksum = self.calculate_checksum(path)
            return current_checksum != state.checksum
        
        return False
    
    def create_file_state(self, path: Path) -> FileState:
        """Create a FileState object for a file."""
        rel_path = str(path.relative_to(self.source_dir))
        stat = path.stat()
        
        return FileState(
            path=rel_path,
            checksum=self.calculate_checksum(path),
            size=stat.st_size,
            modified_time=stat.st_mtime
        )
    
    def get_changed_files(self, all_files: List[Path]) -> List[Path]:
        """Get list of files that have changed."""
        changed = []
        
        for path in all_files:
            if self.has_file_changed(path):
                changed.append(path)
        
        return changed
    
    def get_deleted_files(self, all_files: List[Path]) -> List[str]:
        """Get list of files that were deleted (in state but not on disk)."""
        current_paths = {
            str(p.relative_to(self.source_dir))
            for p in all_files
        }
        
        deleted = []
        for path in self._state.files:
            if path not in current_paths:
                deleted.append(path)
        
        return deleted
PreviousNext