|
| 1 | +"""AES-256-GCM file encryption helpers. |
| 2 | +
|
| 3 | +``encrypt_file(source, target, key)`` writes a self-describing envelope:: |
| 4 | +
|
| 5 | + magic = b"FA-AESG" 7 bytes |
| 6 | + version = 0x01 1 byte |
| 7 | + flags = 0x00 1 byte (reserved) |
| 8 | + aad_len = uint32 BE 4 bytes |
| 9 | + nonce = 12 bytes |
| 10 | + aad = <aad_len> |
| 11 | + ciphertext + tag (rest — GCM tag is the trailing 16 bytes) |
| 12 | +
|
| 13 | +``decrypt_file`` reads the same format, verifies the tag, and writes the |
| 14 | +plaintext to ``target``. Tampering (bit flips anywhere in the envelope |
| 15 | +except ``aad_len``) surfaces as :class:`CryptoException`. |
| 16 | +
|
| 17 | +GCM has a hard plaintext limit of roughly 64 GiB per ``(key, nonce)`` |
| 18 | +pair; since each encrypt generates a fresh nonce, the practical cap is |
| 19 | +per-file and is much larger than typical automation payloads. For files |
| 20 | +approaching that size, split before calling ``encrypt_file``. |
| 21 | +""" |
| 22 | + |
| 23 | +from __future__ import annotations |
| 24 | + |
| 25 | +import os |
| 26 | +from pathlib import Path |
| 27 | + |
| 28 | +from cryptography.exceptions import InvalidTag |
| 29 | +from cryptography.hazmat.primitives import hashes |
| 30 | +from cryptography.hazmat.primitives.ciphers.aead import AESGCM |
| 31 | +from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC |
| 32 | + |
| 33 | +from automation_file.exceptions import FileAutomationException |
| 34 | +from automation_file.logging_config import file_automation_logger |
| 35 | + |
| 36 | +_MAGIC = b"FA-AESG" |
| 37 | +_VERSION = 0x01 |
| 38 | +_NONCE_SIZE = 12 |
| 39 | +_HEADER_FIXED_SIZE = len(_MAGIC) + 2 + 4 # magic + version + flags + aad_len |
| 40 | +_VALID_KEY_SIZES = frozenset({16, 24, 32}) |
| 41 | +_DEFAULT_PBKDF2_ITERATIONS = 200_000 |
| 42 | + |
| 43 | + |
| 44 | +class CryptoException(FileAutomationException): |
| 45 | + """Raised when encryption / decryption fails (including on tamper).""" |
| 46 | + |
| 47 | + |
| 48 | +def generate_key(*, bits: int = 256) -> bytes: |
| 49 | + """Return cryptographically random bytes suitable for AES-GCM.""" |
| 50 | + if bits not in (128, 192, 256): |
| 51 | + raise CryptoException(f"bits must be 128 / 192 / 256, got {bits}") |
| 52 | + return os.urandom(bits // 8) |
| 53 | + |
| 54 | + |
| 55 | +def key_from_password( |
| 56 | + password: str, |
| 57 | + salt: bytes, |
| 58 | + *, |
| 59 | + iterations: int = _DEFAULT_PBKDF2_ITERATIONS, |
| 60 | + bits: int = 256, |
| 61 | +) -> bytes: |
| 62 | + """Derive a symmetric key from ``password`` via PBKDF2-HMAC-SHA256.""" |
| 63 | + if not password: |
| 64 | + raise CryptoException("password must be non-empty") |
| 65 | + if len(salt) < 16: |
| 66 | + raise CryptoException("salt must be at least 16 bytes") |
| 67 | + if bits not in (128, 192, 256): |
| 68 | + raise CryptoException(f"bits must be 128 / 192 / 256, got {bits}") |
| 69 | + kdf = PBKDF2HMAC( |
| 70 | + algorithm=hashes.SHA256(), |
| 71 | + length=bits // 8, |
| 72 | + salt=salt, |
| 73 | + iterations=iterations, |
| 74 | + ) |
| 75 | + return kdf.derive(password.encode("utf-8")) |
| 76 | + |
| 77 | + |
| 78 | +def encrypt_file( |
| 79 | + source: str | os.PathLike[str], |
| 80 | + target: str | os.PathLike[str], |
| 81 | + key: bytes, |
| 82 | + *, |
| 83 | + associated_data: bytes = b"", |
| 84 | +) -> dict[str, int]: |
| 85 | + """Encrypt ``source`` to ``target`` under AES-GCM. Returns a size summary.""" |
| 86 | + _validate_key(key) |
| 87 | + if not isinstance(associated_data, (bytes, bytearray)): |
| 88 | + raise CryptoException("associated_data must be bytes") |
| 89 | + src = Path(source) |
| 90 | + if not src.is_file(): |
| 91 | + raise CryptoException(f"source file not found: {src}") |
| 92 | + |
| 93 | + plaintext = src.read_bytes() |
| 94 | + nonce = os.urandom(_NONCE_SIZE) |
| 95 | + aesgcm = AESGCM(bytes(key)) |
| 96 | + ciphertext = aesgcm.encrypt(nonce, plaintext, bytes(associated_data) or None) |
| 97 | + |
| 98 | + envelope = _build_header(associated_data, nonce) + ciphertext |
| 99 | + dst = Path(target) |
| 100 | + dst.parent.mkdir(parents=True, exist_ok=True) |
| 101 | + dst.write_bytes(envelope) |
| 102 | + file_automation_logger.info( |
| 103 | + "encrypt_file: %s -> %s (%d -> %d bytes)", |
| 104 | + src, |
| 105 | + dst, |
| 106 | + len(plaintext), |
| 107 | + len(envelope), |
| 108 | + ) |
| 109 | + return {"plaintext_bytes": len(plaintext), "ciphertext_bytes": len(envelope)} |
| 110 | + |
| 111 | + |
| 112 | +def decrypt_file( |
| 113 | + source: str | os.PathLike[str], |
| 114 | + target: str | os.PathLike[str], |
| 115 | + key: bytes, |
| 116 | +) -> dict[str, int]: |
| 117 | + """Decrypt ``source`` to ``target``. Raises on invalid tag / header.""" |
| 118 | + _validate_key(key) |
| 119 | + src = Path(source) |
| 120 | + if not src.is_file(): |
| 121 | + raise CryptoException(f"source file not found: {src}") |
| 122 | + envelope = src.read_bytes() |
| 123 | + nonce, aad, ciphertext = _parse_envelope(envelope) |
| 124 | + aesgcm = AESGCM(bytes(key)) |
| 125 | + try: |
| 126 | + plaintext = aesgcm.decrypt(nonce, ciphertext, aad or None) |
| 127 | + except InvalidTag as err: |
| 128 | + raise CryptoException("authentication failed: wrong key or tampered data") from err |
| 129 | + |
| 130 | + dst = Path(target) |
| 131 | + dst.parent.mkdir(parents=True, exist_ok=True) |
| 132 | + dst.write_bytes(plaintext) |
| 133 | + file_automation_logger.info( |
| 134 | + "decrypt_file: %s -> %s (%d -> %d bytes)", |
| 135 | + src, |
| 136 | + dst, |
| 137 | + len(envelope), |
| 138 | + len(plaintext), |
| 139 | + ) |
| 140 | + return {"ciphertext_bytes": len(envelope), "plaintext_bytes": len(plaintext)} |
| 141 | + |
| 142 | + |
| 143 | +def _validate_key(key: bytes) -> None: |
| 144 | + if not isinstance(key, (bytes, bytearray)): |
| 145 | + raise CryptoException("key must be bytes") |
| 146 | + if len(key) not in _VALID_KEY_SIZES: |
| 147 | + raise CryptoException( |
| 148 | + f"key length must be 16 / 24 / 32 bytes, got {len(key)}", |
| 149 | + ) |
| 150 | + |
| 151 | + |
| 152 | +def _build_header(aad: bytes, nonce: bytes) -> bytes: |
| 153 | + aad_len = len(aad).to_bytes(4, "big") |
| 154 | + return _MAGIC + bytes([_VERSION, 0x00]) + aad_len + nonce + bytes(aad) |
| 155 | + |
| 156 | + |
| 157 | +def _parse_envelope(envelope: bytes) -> tuple[bytes, bytes, bytes]: |
| 158 | + if len(envelope) < _HEADER_FIXED_SIZE + _NONCE_SIZE + 16: |
| 159 | + raise CryptoException("ciphertext envelope is shorter than the fixed header") |
| 160 | + if not envelope.startswith(_MAGIC): |
| 161 | + raise CryptoException("not an AES-GCM envelope (bad magic)") |
| 162 | + version = envelope[len(_MAGIC)] |
| 163 | + if version != _VERSION: |
| 164 | + raise CryptoException(f"unsupported envelope version {version}") |
| 165 | + aad_len = int.from_bytes(envelope[_HEADER_FIXED_SIZE - 4 : _HEADER_FIXED_SIZE], "big") |
| 166 | + nonce_start = _HEADER_FIXED_SIZE |
| 167 | + nonce_end = nonce_start + _NONCE_SIZE |
| 168 | + aad_end = nonce_end + aad_len |
| 169 | + if aad_end > len(envelope): |
| 170 | + raise CryptoException("envelope truncated before aad end") |
| 171 | + nonce = envelope[nonce_start:nonce_end] |
| 172 | + aad = envelope[nonce_end:aad_end] |
| 173 | + ciphertext = envelope[aad_end:] |
| 174 | + return nonce, aad, ciphertext |
0 commit comments