Skip to content

Commit 775871d

Browse files
committed
more chunking renames
1 parent ddb8925 commit 775871d

4 files changed

Lines changed: 26 additions & 24 deletions

File tree

prime_backup/action/helpers/blob_pre_calc_result.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import dataclasses
22
from pathlib import Path
3-
from typing import List, IO
3+
from typing import List, IO, Optional
44

55
from prime_backup.types.chunk_method import ChunkMethod
66
from prime_backup.types.chunker import PrettyChunk
@@ -16,20 +16,20 @@ class SizeMismatched(Exception):
1616
size: int
1717
hash: str
1818
should_be_chunked: bool
19-
chunks: List[PrettyChunk]
19+
chunks: Optional[List[PrettyChunk]]
2020

2121
def simple_repr(self) -> str:
2222
return misc_utils.represent(self, attrs={
2323
'size': self.size,
2424
'hash': self.hash,
2525
'should_be_chunked': self.should_be_chunked,
26-
'chunks_len': len(self.chunks),
26+
'chunks_len': len(self.chunks) if self.chunks is not None else None,
2727
})
2828

2929
@classmethod
3030
def from_stream(cls, stream: IO[bytes], rel_path: Path, size: int) -> 'BlobPrecalculateResult':
3131
chunk_method = ChunkMethod.get_for_file(rel_path, size)
32-
chunks: List[PrettyChunk] = []
32+
chunks: Optional[List[PrettyChunk]] = None
3333
if chunk_method is not None:
3434
chunker = chunk_method.create_stream_chunker(stream, need_entire_file_hash=True)
3535
chunks = chunker.cut_all()
@@ -49,8 +49,8 @@ def from_stream(cls, stream: IO[bytes], rel_path: Path, size: int) -> 'BlobPreca
4949
@classmethod
5050
def from_file(cls, path: Path, rel_path: Path, size: int) -> 'BlobPrecalculateResult':
5151
chunk_method = ChunkMethod.get_for_file(rel_path, size)
52-
chunks: List[PrettyChunk] = []
53-
if chunk_method is not None:
52+
chunks: Optional[List[PrettyChunk]] = None
53+
if chunk_method is not None and chunk_method.needs_precalculation():
5454
chunker = chunk_method.create_file_chunker(path, need_entire_file_hash=True)
5555
chunks = chunker.cut_all()
5656
sah = SizeAndHash(chunker.get_read_file_size(), chunker.get_entire_file_hash())

prime_backup/types/chunk_method.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,23 @@
33
from typing import Optional, IO, TYPE_CHECKING
44

55
from prime_backup.types.chunker import Chunker
6-
from prime_backup.types.chunker_definition import ChunkerDefinition, CDCChunkerDefinition, FixedSizeChunkerDefinition
6+
from prime_backup.types.chunker_definition import ChunkerDefinition, FastCDCChunkerDefinition, FixedSizeChunkerDefinition
77
from prime_backup.utils.path_like import PathLike
88

99

1010
class ChunkMethod(enum.Enum):
11-
# Content-Defined Chunking (CDC)
12-
cdc_32k = CDCChunkerDefinition(avg_size=32 * 1024, min_size=8 * 1024, max_size=256 * 1024)
13-
cdc_128k = CDCChunkerDefinition(avg_size=128 * 1024, min_size=64 * 1024, max_size=1024 * 1024)
14-
cdc = cdc_32k
11+
# Content-Defined Chunking with FastCDC
12+
fastcdc_32k = FastCDCChunkerDefinition(avg_size=32 * 1024, min_size=8 * 1024, max_size=256 * 1024)
13+
fastcdc_128k = FastCDCChunkerDefinition(avg_size=128 * 1024, min_size=64 * 1024, max_size=1024 * 1024)
1514

1615
# Fixed-Size Chunking
1716
fixed_4k = FixedSizeChunkerDefinition(4 * 1024)
1817
fixed_32k = FixedSizeChunkerDefinition(32 * 1024)
1918
fixed_128k = FixedSizeChunkerDefinition(128 * 1024)
2019

20+
# Common Alias
21+
cdc = fastcdc_32k
22+
2123
if TYPE_CHECKING:
2224
value: ChunkerDefinition
2325

prime_backup/types/chunker.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -82,17 +82,17 @@ def get_read_file_size(self) -> int:
8282
return self.__file_size_sum
8383

8484

85-
# ======================== CDC Chunker ========================
85+
# ======================== FastCDC Chunker ========================
8686

8787
@dataclasses.dataclass(frozen=True)
88-
class CDCChunkerConfig:
88+
class FastCDCChunkerConfig:
8989
avg_size: int
9090
min_size: int
9191
max_size: int
9292

9393

9494
class _CDCChunker(Chunker, ABC):
95-
def __init__(self, cfg: CDCChunkerConfig, need_entire_file_hash: bool):
95+
def __init__(self, cfg: FastCDCChunkerConfig, need_entire_file_hash: bool):
9696
super().__init__(need_entire_file_hash)
9797
self.cfg = cfg
9898

@@ -107,8 +107,8 @@ def _create_cdc_engine(self) -> 'pyfastcdc.FastCDC':
107107
)
108108

109109

110-
class CDCFileChunker(_CDCChunker):
111-
def __init__(self, cfg: CDCChunkerConfig, file_path: Path, need_entire_file_hash: bool = False):
110+
class FastCDCFileChunker(_CDCChunker):
111+
def __init__(self, cfg: FastCDCChunkerConfig, file_path: Path, need_entire_file_hash: bool = False):
112112
super().__init__(cfg, need_entire_file_hash)
113113
self.file_path = file_path
114114

@@ -119,8 +119,8 @@ def _iter_raw_chunks(self) -> Iterable[_RawChunk]:
119119
yield _RawChunk(offset=c.offset, length=c.length, data=c.data)
120120

121121

122-
class CDCStreamChunker(_CDCChunker):
123-
def __init__(self, cfg: CDCChunkerConfig, stream: 'pyfastcdc.BinaryStreamReader', need_entire_file_hash: bool = False):
122+
class FastCDCStreamChunker(_CDCChunker):
123+
def __init__(self, cfg: FastCDCChunkerConfig, stream: 'pyfastcdc.BinaryStreamReader', need_entire_file_hash: bool = False):
124124
super().__init__(cfg, need_entire_file_hash)
125125
self.stream = stream
126126

prime_backup/types/chunker_definition.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
from typing_extensions import override
77

8-
from prime_backup.types.chunker import Chunker, CDCFileChunker, CDCStreamChunker, FixedSizeFileChunker, FixedSizeStreamChunker, CDCChunkerConfig
8+
from prime_backup.types.chunker import Chunker, FastCDCFileChunker, FastCDCStreamChunker, FixedSizeFileChunker, FixedSizeStreamChunker, FastCDCChunkerConfig
99

1010

1111
class ChunkerDefinition(ABC):
@@ -19,22 +19,22 @@ def create_stream_chunker(self, stream, need_entire_file_hash: bool) -> Chunker:
1919

2020

2121
@dataclasses.dataclass(frozen=True)
22-
class CDCChunkerDefinition(ChunkerDefinition):
22+
class FastCDCChunkerDefinition(ChunkerDefinition):
2323
avg_size: int
2424
min_size: int
2525
max_size: int
26-
_config: CDCChunkerConfig = dataclasses.field(init=False, repr=False, compare=False)
26+
_config: FastCDCChunkerConfig = dataclasses.field(init=False, repr=False, compare=False)
2727

2828
def __post_init__(self):
29-
object.__setattr__(self, '_config', CDCChunkerConfig(self.avg_size, self.min_size, self.max_size))
29+
object.__setattr__(self, '_config', FastCDCChunkerConfig(self.avg_size, self.min_size, self.max_size))
3030

3131
@override
3232
def create_file_chunker(self, file_path: Path, need_entire_file_hash: bool) -> Chunker:
33-
return CDCFileChunker(self._config, file_path, need_entire_file_hash)
33+
return FastCDCFileChunker(self._config, file_path, need_entire_file_hash)
3434

3535
@override
3636
def create_stream_chunker(self, stream: IO[bytes], need_entire_file_hash: bool) -> Chunker:
37-
return CDCStreamChunker(self._config, stream, need_entire_file_hash)
37+
return FastCDCStreamChunker(self._config, stream, need_entire_file_hash)
3838

3939

4040
@dataclasses.dataclass(frozen=True)

0 commit comments

Comments
 (0)