Skip to content

Commit e70bb18

Browse files
committed
add CRC validation for bluefile metadata
1 parent ce8f266 commit e70bb18

1 file changed

Lines changed: 84 additions & 13 deletions

File tree

sigmf/convert/blue.py

Lines changed: 84 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import struct
1919
import tempfile
2020
from datetime import datetime, timezone
21+
from functools import lru_cache
2122
from pathlib import Path
2223
from typing import Optional, Tuple
2324

@@ -62,7 +63,6 @@
6263
]
6364
# fmt: on
6465

65-
HEADER_SIZE_BYTES = 512
6666
BLOCK_SIZE_BYTES = 512
6767

6868
TYPE_MAP = {
@@ -143,10 +143,9 @@ def detect_endian(data: bytes) -> str:
143143
endianness = data[8:12].decode("ascii")
144144
if endianness == "EEEI":
145145
return "<"
146-
elif endianness == "IEEE":
146+
if endianness == "IEEE":
147147
return ">"
148-
else:
149-
raise SigMFConversionError(f"Unsupported endianness: {endianness}")
148+
raise SigMFConversionError(f"Unsupported endianness: {endianness}")
150149

151150

152151
def read_hcb(file_path: Path) -> (dict, dict):
@@ -183,8 +182,8 @@ def read_hcb(file_path: Path) -> (dict, dict):
183182
raw = header_bytes[offset : offset + size]
184183
try:
185184
val = struct.unpack(endian + fmt, raw)[0]
186-
except struct.error:
187-
raise SigMFConversionError(f"Failed to unpack field {key} with endian {endian}")
185+
except struct.error as err:
186+
raise SigMFConversionError(f"Failed to unpack field {key} with endian {endian}") from err
188187
if isinstance(val, bytes):
189188
val = val.decode("ascii", errors="replace")
190189
h_fixed[key] = val
@@ -222,6 +221,20 @@ def read_hcb(file_path: Path) -> (dict, dict):
222221
raw_adjunct = handle.read(256)
223222
h_adjunct = {"raw_base64": base64.b64encode(raw_adjunct).decode("ascii")}
224223

224+
if h_fixed["keywords"].get("CRC") is not None:
225+
# CRC calculated on first 160 bytes of fixed header + full extended header
226+
handle.seek(0)
227+
buffer = handle.read(160)
228+
handle.seek(int(h_fixed["ext_start"]) * BLOCK_SIZE_BYTES)
229+
buffer += handle.read(int(h_fixed["ext_size"]))
230+
target_crc = h_fixed["keywords"]["CRC"].lower()
231+
if target_crc == _crc32_broken(buffer):
232+
log.debug("CRC ok (BLUE implementation)")
233+
elif target_crc == _crc32_posix(buffer):
234+
log.debug("CRC ok (POSIX implementation)")
235+
else:
236+
log.warning("CRC mismatch in BLUE metadata!")
237+
225238
validate_fixed(h_fixed)
226239
validate_adjunct(h_adjunct)
227240

@@ -322,11 +335,6 @@ def data_loopback(blue_path: Path, data_path: Path, h_fixed: dict) -> None:
322335
Destination path for the SigMF dataset (.sigmf-data).
323336
h_fixed : dict
324337
Header Control Block dictionary.
325-
326-
Returns
327-
-------
328-
numpy.ndarray
329-
Parsed samples. Empty array for zero-sample files.
330338
"""
331339
header_bytes, data_bytes, _ = _get_blue_boundaries(blue_path, h_fixed)
332340
blue_format = h_fixed.get("format")
@@ -371,6 +379,70 @@ def data_loopback(blue_path: Path, data_path: Path, h_fixed: dict) -> None:
371379
log.info("wrote SigMF dataset to %s", data_path)
372380

373381

382+
@lru_cache
383+
def _generate_crc_table(poly: int):
384+
"""generate lookup table for given polynomial"""
385+
table = []
386+
for idx in range(256):
387+
crc = idx << 24
388+
for _ in range(8):
389+
if crc & 0x80000000:
390+
crc = (crc << 1) ^ poly
391+
else:
392+
crc = crc << 1
393+
crc &= 0xFFFFFFFF
394+
table.append(crc)
395+
return table
396+
397+
398+
def _crc32_posix(data: bytes) -> str:
399+
"""
400+
POSIX.2 CRC-32 with buffer length included
401+
402+
Supposed BLUE standard implementation.
403+
"""
404+
table = _generate_crc_table(0x04C11DB7)
405+
crc = 0
406+
407+
# process data bytes
408+
for byte_val in data:
409+
crc = (crc << 8) ^ table[(crc >> 24) ^ byte_val]
410+
crc &= 0xFFFFFFFF
411+
412+
# include buffer size in calculation
413+
size_val = len(data)
414+
while size_val > 0:
415+
crc = (crc << 8) ^ table[(crc >> 24) ^ (size_val & 0xFF)]
416+
crc &= 0xFFFFFFFF
417+
size_val >>= 8
418+
419+
return f"{(~crc) & 0xFFFFFFFF:08x}"
420+
421+
422+
def _crc32_broken(data: bytes) -> str:
423+
"""
424+
Similar to posix but with a broken length calculation.
425+
426+
Used in many BLUE files.
427+
"""
428+
table = _generate_crc_table(0x04C11DB7)
429+
crc = 0xFFFFFFFF
430+
431+
# process data bytes
432+
for byte_val in data:
433+
crc = (crc << 8) ^ table[(crc >> 24) ^ byte_val]
434+
crc &= 0xFFFFFFFF
435+
436+
# broken length calculation - only processes high byte of length
437+
size_val = len(data)
438+
while size_val > 0:
439+
crc = (crc << 8) ^ table[(crc >> 24) ^ (size_val >> 24)]
440+
crc &= 0xFFFFFFFF
441+
size_val >>= 8
442+
443+
return f"{(~crc) & 0xFFFFFFFF:08x}"
444+
445+
374446
def _get_blue_boundaries(blue_path: Path, h_fixed: dict) -> (int, int):
375447
"""
376448
Extract data boundaries from fixed header.
@@ -395,7 +467,6 @@ def _description(h_fixed: dict) -> str:
395467
spec_str = f"Platinum {version}"
396468
except InvalidVersion:
397469
log.warning("Could not parse BLUE specification from VER keyword.")
398-
pass
399470
# h_fixed will contain number e.g. 1000, 1001, 2000, 2001
400471
description = (
401472
f"Read {h_fixed['version']} type {h_fixed['type']} {h_fixed['format']} using {spec_str} specification."
@@ -460,7 +531,7 @@ def get_tag(tag):
460531
global_info = {
461532
SigMFFile.AUTHOR_KEY: getpass.getuser(),
462533
SigMFFile.DATATYPE_KEY: datatype,
463-
SigMFFile.RECORDER_KEY: f"Official SigMF BLUE converter",
534+
SigMFFile.RECORDER_KEY: "Official SigMF BLUE converter",
464535
SigMFFile.NUM_CHANNELS_KEY: num_channels,
465536
SigMFFile.SAMPLE_RATE_KEY: sample_rate_hz,
466537
SigMFFile.EXTENSIONS_KEY: [{"name": "blue", "version": "0.0.1", "optional": True}],

0 commit comments

Comments
 (0)