PYTHON
state.py🐍python
"""
Sync state management for tracking file changes.
"""
import json
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Optional
from dataclasses import dataclass, asdict, field
import hashlib
import logging
logger = logging.getLogger(__name__)
@dataclass
class FileState:
"""State of a tracked file."""
path: str
checksum: str
size: int
modified_time: float
synced_time: Optional[str] = None
remote_checksum: Optional[str] = None
def to_dict(self) -> dict:
return asdict(self)
@classmethod
def from_dict(cls, data: dict) -> "FileState":
return cls(**data)
@dataclass
class SyncStateData:
"""Full sync state data."""
files: Dict[str, dict] = field(default_factory=dict)
last_sync: Optional[str] = None
conflicts: List[str] = field(default_factory=list)
version: str = "1.0"
class SyncState:
"""Manages sync state persistence."""
STATE_FILE = ".sync_state.json"
def __init__(self, source_dir: str):
self.source_dir = Path(source_dir)
self.state_file = self.source_dir / self.STATE_FILE
self._state: SyncStateData = SyncStateData()
self._load()
def _load(self):
"""Load state from file."""
if self.state_file.exists():
try:
with open(self.state_file, "r") as f:
data = json.load(f)
self._state = SyncStateData(
files=data.get("files", {}),
last_sync=data.get("last_sync"),
conflicts=data.get("conflicts", []),
version=data.get("version", "1.0")
)
logger.debug(f"Loaded state: {len(self._state.files)} files")
except Exception as e:
logger.error(f"Error loading state: {e}")
self._state = SyncStateData()
def save(self):
"""Save state to file."""
try:
data = {
"files": self._state.files,
"last_sync": self._state.last_sync,
"conflicts": self._state.conflicts,
"version": self._state.version
}
with open(self.state_file, "w") as f:
json.dump(data, f, indent=2)
logger.debug("State saved")
except Exception as e:
logger.error(f"Error saving state: {e}")
def get_file_state(self, path: str) -> Optional[FileState]:
"""Get state for a specific file."""
if path in self._state.files:
return FileState.from_dict(self._state.files[path])
return None
def update_file_state(self, file_state: FileState):
"""Update state for a file."""
self._state.files[file_state.path] = file_state.to_dict()
def remove_file_state(self, path: str):
"""Remove state for a file."""
if path in self._state.files:
del self._state.files[path]
def get_all_files(self) -> List[FileState]:
"""Get all tracked files."""
return [FileState.from_dict(data) for data in self._state.files.values()]
def set_last_sync(self, timestamp: Optional[datetime] = None):
"""Update last sync timestamp."""
if timestamp is None:
timestamp = datetime.utcnow()
self._state.last_sync = timestamp.isoformat()
def get_last_sync(self) -> Optional[datetime]:
"""Get last sync timestamp."""
if self._state.last_sync:
return datetime.fromisoformat(self._state.last_sync)
return None
def add_conflict(self, path: str):
"""Add a conflict."""
if path not in self._state.conflicts:
self._state.conflicts.append(path)
def remove_conflict(self, path: str):
"""Remove a conflict."""
if path in self._state.conflicts:
self._state.conflicts.remove(path)
def get_conflicts(self) -> List[str]:
"""Get all conflicts."""
return self._state.conflicts.copy()
def clear_conflicts(self):
"""Clear all conflicts."""
self._state.conflicts.clear()
@staticmethod
def calculate_checksum(file_path: Path, algorithm: str = "md5") -> str:
"""Calculate file checksum."""
hash_func = hashlib.new(algorithm)
with open(file_path, "rb") as f:
while chunk := f.read(8192):
hash_func.update(chunk)
return hash_func.hexdigest()
def has_file_changed(self, path: Path) -> bool:
"""Check if file has changed since last sync."""
rel_path = str(path.relative_to(self.source_dir))
state = self.get_file_state(rel_path)
if not state:
return True # New file
if not path.exists():
return True # Deleted
# Check modification time first (fast)
current_mtime = path.stat().st_mtime
if current_mtime != state.modified_time:
# Verify with checksum (slower but accurate)
current_checksum = self.calculate_checksum(path)
return current_checksum != state.checksum
return False
def create_file_state(self, path: Path) -> FileState:
"""Create a FileState object for a file."""
rel_path = str(path.relative_to(self.source_dir))
stat = path.stat()
return FileState(
path=rel_path,
checksum=self.calculate_checksum(path),
size=stat.st_size,
modified_time=stat.st_mtime
)
def get_changed_files(self, all_files: List[Path]) -> List[Path]:
"""Get list of files that have changed."""
changed = []
for path in all_files:
if self.has_file_changed(path):
changed.append(path)
return changed
def get_deleted_files(self, all_files: List[Path]) -> List[str]:
"""Get list of files that were deleted (in state but not on disk)."""
current_paths = {
str(p.relative_to(self.source_dir))
for p in all_files
}
deleted = []
for path in self._state.files:
if path not in current_paths:
deleted.append(path)
return deleted