-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathedac.c
More file actions
417 lines (380 loc) · 14.8 KB
/
Copy pathedac.c
File metadata and controls
417 lines (380 loc) · 14.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
// SPDX-License-Identifier: GPL-2.0-only
/*
* beamfs - EDAC layer: CRC32 + Reed-Solomon FEC
* Author: Aurelien DESBRIERES <aurelien@hackers.camp>
*
* Reed-Solomon encoding/decoding uses the kernel's lib/reed_solomon
* library (RS(255,239) over GF(2^8), primitive polynomial 0x187).
* This avoids duplicating well-tested RS code already present in the
* kernel (used by NAND MTD, DVB, etc.) and addresses the concern raised
* during review (Eric Biggers, linux-fsdevel, April 2026).
*
* RS parameters:
* BEAMFS_RS_SYMSIZE = 8 (GF(2^8))
* BEAMFS_RS_FCR = 0 (first consecutive root)
* BEAMFS_RS_PRIM = 1 (primitive element)
* BEAMFS_RS_NROOTS = BEAMFS_RS_PARITY = 16 (parity symbols)
* data per subblock: BEAMFS_SUBBLOCK_DATA = 239 bytes
* codeword length: 255 bytes (BEAMFS_SUBBLOCK_TOTAL)
*/
#include <linux/kernel.h>
#include <linux/crc32.h>
#include <linux/slab.h>
#include <linux/rslib.h>
#include "beamfs.h"
/* RS codec handle - allocated once at module init */
static struct rs_control *beamfs_rs_ctrl;
/*
* beamfs_rs_init_tables - initialize the RS codec
* Called once from beamfs_init() before any mount.
*/
void beamfs_rs_init_tables(void)
{
/*
* init_rs(symsize, gfpoly, fcr, prim, nroots)
* symsize = 8 -> GF(2^8)
* gfpoly = 0x187 -> primitive polynomial x^8+x^7+x^2+x+1
* fcr = 0 -> first consecutive root
* prim = 1 -> primitive element alpha
* nroots = 16 -> 16 parity symbols, corrects up to 8 errors
*/
beamfs_rs_ctrl = init_rs(8, 0x187, 0, 1, BEAMFS_RS_PARITY);
if (!beamfs_rs_ctrl)
pr_err("beamfs: failed to initialize RS codec\n");
else
pr_debug("beamfs: RS codec initialized (RS(%d,%d))\n",
BEAMFS_SUBBLOCK_TOTAL, BEAMFS_SUBBLOCK_DATA);
}
/*
* beamfs_rs_exit - release the RS codec at module exit
*/
void beamfs_rs_exit_tables(void)
{
if (beamfs_rs_ctrl) {
free_rs(beamfs_rs_ctrl);
beamfs_rs_ctrl = NULL;
}
}
/*
* Shannon entropy term LUT in Q16.16 fixed point.
*
* Auto-generated by tools/gen_entropy_lut.py -- DO NOT EDIT.
*
* LUT[N][k] = round(-(k/N) * log2(k/N) * 2^16) for N in [1,8],
* = 0 for k == 0 (or N == 0, unused).
*
* To regenerate after a parameter change:
* ./tools/gen_entropy_lut.py
* The output between the SENTINEL markers below should match
* byte-for-byte.
*/
/* SENTINEL_LUT_BEGIN -- generator hash 1f5f8831e91b0318 */
static const __u32 beamfs_rs_entropy_term_q16_16[9][9] = {
[0] = { 0, }, /* [0] unused */
[1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 },
[2] = { 0, 32768, 0, 0, 0, 0, 0, 0, 0 },
[3] = { 0, 34624, 25557, 0, 0, 0, 0, 0, 0 },
[4] = { 0, 32768, 32768, 20400, 0, 0, 0, 0, 0 },
[5] = { 0, 30434, 34654, 28979, 16878, 0, 0, 0, 0 },
[6] = { 0, 28235, 34624, 32768, 25557, 14365, 0, 0, 0 },
[7] = { 0, 26283, 33842, 34333, 30235, 22724, 12493, 0, 0 },
[8] = { 0, 24576, 32768, 34776, 32768, 27774, 20400, 11047, 0 },
};
/* SENTINEL_LUT_END */
/*
* beamfs_rs_compute_entropy_q16_16 -- Shannon entropy estimator over the
* spatial distribution of corrected byte positions in an RS codeword.
*
* @positions: byte positions corrected (output of beamfs_rs_decode)
* @n_positions: number of valid entries; MUST be in [1, BEAMFS_RS_PARITY/2]
* @code_len_bytes: data length of the codeword, in [n_positions, 239]
*
* Returns Shannon H = sum_{bin} LUT[N][k_bin] in Q16.16 fixed point,
* range [0, 3*65536). Deterministic, no FPU; one u32 division per
* position for bin index computation.
*
* Pre: positions != NULL, 1 <= n_positions <= BEAMFS_RS_PARITY/2,
* code_len_bytes >= n_positions
*
* Caller decides whether to set BEAMFS_RS_EVENT_FLAG_ENTROPY_VALID:
* a single sample (n_positions == 1) yields H = 0 mathematically
* but is forensically non-significant; the caller (beamfs_log_rs_event)
* clears the flag in that case.
*/
__u32 beamfs_rs_compute_entropy_q16_16(const int *positions,
unsigned int n_positions,
size_t code_len_bytes)
{
unsigned int bin_count[BEAMFS_RS_ENTROPY_BINS];
__u32 h = 0;
unsigned int i, b;
if (WARN_ON_ONCE(!positions))
return 0;
if (WARN_ON_ONCE(n_positions == 0 ||
n_positions > BEAMFS_RS_PARITY / 2))
return 0;
if (WARN_ON_ONCE(code_len_bytes < n_positions ||
code_len_bytes > BEAMFS_SUBBLOCK_DATA))
return 0;
memset(bin_count, 0, sizeof(bin_count));
for (i = 0; i < n_positions; i++) {
/* bin = pos * BINS / code_len. u32 product fits since
* pos < 239 and BINS = 8, max product = 1912. */
unsigned int idx = (unsigned int)positions[i];
unsigned int bin;
if (WARN_ON_ONCE(idx >= code_len_bytes))
return 0;
bin = (idx * BEAMFS_RS_ENTROPY_BINS) /
(unsigned int)code_len_bytes;
if (WARN_ON_ONCE(bin >= BEAMFS_RS_ENTROPY_BINS))
return 0;
bin_count[bin]++;
}
/* H = sum over bins of LUT[N][k_bin]. Bins with k=0 contribute 0
* by LUT convention. Total guaranteed < 3 * 65536. */
for (b = 0; b < BEAMFS_RS_ENTROPY_BINS; b++)
h += beamfs_rs_entropy_term_q16_16[n_positions][bin_count[b]];
return h;
}
/*
* beamfs_rs_encode - encode @len data bytes, produce BEAMFS_RS_PARITY parity
* @data: input data (@len bytes, must be <= BEAMFS_SUBBLOCK_DATA)
* @len: number of data bytes (BEAMFS_SUBBLOCK_DATA for the bitmap path,
* BEAMFS_INODE_RS_DATA for inodes, etc.)
* @parity: output parity (BEAMFS_RS_PARITY bytes)
*
* lib/reed_solomon supports shortened RS codes natively: passing a
* length less than BEAMFS_SUBBLOCK_DATA produces a valid codeword,
* mathematically equivalent to padding the data with zeros up to the
* full subblock length. The same length must be passed to the matching
* beamfs_rs_decode call.
*
* The kernel encode_rs8 API takes uint8_t *data directly; parity is
* returned via a uint16_t *par buffer (low byte holds the parity symbol).
*/
int beamfs_rs_encode(uint8_t *data, size_t len, uint8_t *parity)
{
uint16_t par[BEAMFS_RS_PARITY];
int i;
if (!beamfs_rs_ctrl)
return -EINVAL;
if (len > BEAMFS_SUBBLOCK_DATA)
return -EINVAL;
memset(par, 0, sizeof(par));
encode_rs8(beamfs_rs_ctrl, data, len, par, 0);
for (i = 0; i < BEAMFS_RS_PARITY; i++)
parity[i] = (uint8_t)par[i];
return 0;
}
/*
* beamfs_rs_decode -- decode and correct a shortened RS codeword in place,
* optionally exposing the list of corrected byte
* positions for forensic entropy logging.
*
* @data: data bytes (@len bytes), corrected in place on success
* @len: number of data bytes (must match the length passed to encode)
* @parity: parity bytes (BEAMFS_RS_PARITY)
* @positions: optional output, BEAMFS_RS_PARITY/2 = 8 entries; on a
* positive return holds the corrected DATA byte positions
* in [0, len). May be NULL if no entropy logging is needed.
* @max_positions: capacity of @positions; ignored if @positions is NULL.
*
* Returns:
* < 0 uncorrectable (-EBADMSG) or invalid input (-EINVAL)
* = 0 no errors detected
* > 0 number of symbol errors corrected in place (data + parity)
*
* Implementation note (Option 2): decode_rs8 cannot both correct in place
* AND report eras_pos in a single call (its API is mutually exclusive on
* those modes; see lib/reed_solomon/decode_rs.c). To avoid a second RS
* decode pass, we snapshot @data before the call and reconstruct the
* corrected data positions by byte diff afterwards. Parity-side
* corrections are not reflected in @positions: only data positions are
* forensically meaningful for the journal, since parity-only flips are
* an artefact of the codec's internal recovery rather than an indicator
* of physical media damage at a specific data byte.
*
* Stack cost: BEAMFS_SUBBLOCK_DATA = 239 bytes for the snapshot (per call).
*/
int beamfs_rs_decode(u8 *data, size_t len, u8 *parity,
int *positions, unsigned int max_positions)
{
u16 par[BEAMFS_RS_PARITY];
u8 data_orig[BEAMFS_SUBBLOCK_DATA];
int i, nerr;
unsigned int n_data_corrected = 0;
if (!beamfs_rs_ctrl)
return -EINVAL;
if (len > BEAMFS_SUBBLOCK_DATA)
return -EINVAL;
if (positions && max_positions == 0)
return -EINVAL;
for (i = 0; i < BEAMFS_RS_PARITY; i++)
par[i] = parity[i];
/* Snapshot data for post-decode position reconstruction. */
if (positions)
memcpy(data_orig, data, len);
/* Mode "apply": corrects data and parity in place. eras_pos = NULL
* forces the kernel into the apply branch (decode_rs.c line 315). */
nerr = decode_rs8(beamfs_rs_ctrl, data, par, len,
NULL, 0, NULL, 0, NULL);
if (nerr < 0) {
pr_err_ratelimited("beamfs: RS block uncorrectable (len=%zu)\n",
len);
return -EBADMSG;
}
/* Reconstruct DATA-side corrected positions by byte diff. nerr is
* the total count (data + parity); n_data_corrected is the subset
* relevant for the entropy estimator. */
if (positions && nerr > 0) {
for (i = 0; i < (int)len &&
n_data_corrected < max_positions; i++) {
if (data[i] != data_orig[i])
positions[n_data_corrected++] = i;
}
}
/* The journal logs n_data_corrected as the entropy sample size,
* even though nerr (total) is the value returned upward; callers
* that need the entropy-aligned count must derive it from
* compute_entropy on the positions buffer they passed in. */
return nerr;
}
/*
* beamfs_rs_encode_region -- encode N RS(255,239) shortened subblocks
* across a region.
*
* @data_buf: base pointer of the data region
* @data_stride: distance in bytes between the start of two
* consecutive data subblocks (e.g.
* BEAMFS_SUBBLOCK_TOTAL=255 for the bitmap)
* @parity_buf: base pointer of the parity region (may equal
* data_buf + data_len for the interleaved case)
* @parity_stride: distance between the start of two consecutive
* parity blobs (e.g. BEAMFS_SUBBLOCK_TOTAL=255 for
* the bitmap, BEAMFS_RS_PARITY=16 for contiguous
* parity placement)
* @data_len: number of data bytes per subblock (e.g.
* BEAMFS_SUBBLOCK_DATA=239 for a full subblock)
* @n_subblocks: number of subblocks to encode
*
* Returns 0 on success, negative on the first encode failure.
* On failure the buffer is in an indeterminate state.
*/
int beamfs_rs_encode_region(u8 *data_buf, size_t data_stride,
u8 *parity_buf, size_t parity_stride,
size_t data_len, unsigned int n_subblocks)
{
unsigned int i;
if (!data_buf || !parity_buf)
return -EINVAL;
for (i = 0; i < n_subblocks; i++) {
u8 *d = data_buf + (size_t)i * data_stride;
u8 *p = parity_buf + (size_t)i * parity_stride;
int rc = beamfs_rs_encode(d, data_len, p);
if (rc < 0)
return rc;
}
return 0;
}
/*
* beamfs_rs_decode_region -- decode N RS(255,239) shortened subblocks
* across a region. Symmetric to beamfs_rs_encode_region.
*
* @results: optional, n_subblocks entries. On exit each
* entry holds the return value of beamfs_rs_decode
* for the corresponding subblock: < 0 uncorrectable,
* = 0 no errors, > 0 number of corrected symbols.
* Pass NULL to skip per-subblock reporting.
* @positions_buf: optional, flat array of n_subblocks * positions_stride
* int entries. On a positive results[i], the first
* results[i] entries of positions_buf[i*stride ..] hold
* the corrected DATA positions for subblock i. May be
* NULL (then positions_stride must be 0).
* @positions_stride: slot count per subblock in positions_buf; should be
* >= BEAMFS_RS_PARITY/2. Ignored if positions_buf NULL.
*
* Returns 0 if every subblock decoded successfully, the first
* negative error otherwise. Decoding does not stop on error: all
* subblocks are processed, results[] reflects the per-subblock
* outcome, and the worst negative error is returned.
*/
int beamfs_rs_decode_region(u8 *data_buf, size_t data_stride,
u8 *parity_buf, size_t parity_stride,
size_t data_len, unsigned int n_subblocks,
int *results,
int *positions_buf,
unsigned int positions_stride)
{
unsigned int i;
int worst = 0;
if (!data_buf || !parity_buf)
return -EINVAL;
if (positions_buf && positions_stride == 0)
return -EINVAL;
if (!positions_buf && positions_stride != 0)
return -EINVAL;
for (i = 0; i < n_subblocks; i++) {
u8 *d = data_buf + (size_t)i * data_stride;
u8 *p = parity_buf + (size_t)i * parity_stride;
int *pos = positions_buf
? positions_buf + (size_t)i * positions_stride
: NULL;
int rc = beamfs_rs_decode(d, data_len, p,
pos, positions_stride);
if (results)
results[i] = rc;
if (rc < 0 && worst >= 0)
worst = rc;
}
return worst;
}
/*
* beamfs_crc32 - compute CRC32 checksum
* @buf: data buffer
* @len: length in bytes
*
* Uses the kernel's hardware-accelerated crc32_le (same as ext4/btrfs).
* Seed 0xFFFFFFFF, final XOR 0xFFFFFFFF (standard CRC-32/ISO-HDLC).
*/
__u32 beamfs_crc32(const void *buf, size_t len)
{
return crc32_le(0xFFFFFFFF, buf, len) ^ 0xFFFFFFFF;
}
/*
* beamfs_crc32_sb -- compute CRC32 over the meaningful regions of the
* superblock, excluding s_crc32 itself and s_pad.
*
* Coverage (computed at compile time from struct layout):
* region A: [0, offsetof(s_crc32)) -- magic, counters,
* version, flags
* region B: [offsetof(s_uuid), offsetof(s_pad)) -- uuid, label,
* RS journal,
* bitmap_blk, features,
* protection scheme
* Total coverage: BEAMFS_SB_RS_COVERAGE_BYTES, asserted at build time
* by BUILD_BUG_ON below.
*
* Chained via crc32_le without intermediate XOR. Must match the
* userspace mkfs.beamfs::crc32_sb() byte-for-byte so that superblocks
* formatted by mkfs validate at mount time.
*
* On-disk format compatibility: superblocks produced by an mkfs whose
* struct layout differs from this kernel's (e.g. older v2 images
* predating the v3 feature-field extension) are correctly rejected by
* the resulting CRC mismatch.
*/
__u32 beamfs_crc32_sb(const struct beamfs_super_block *fsb)
{
const u8 *base = (const u8 *)fsb;
const size_t off_crc32 = offsetof(struct beamfs_super_block, s_crc32);
const size_t off_uuid = offsetof(struct beamfs_super_block, s_uuid);
const size_t off_pad = offsetof(struct beamfs_super_block, s_pad);
u32 c;
BUILD_BUG_ON(sizeof_field(struct beamfs_super_block, s_crc32) != 4);
BUILD_BUG_ON(off_uuid != off_crc32 + 4);
BUILD_BUG_ON(off_crc32 + (off_pad - off_uuid) !=
BEAMFS_SB_RS_COVERAGE_BYTES);
c = crc32_le(0xFFFFFFFF, base, off_crc32);
c = crc32_le(c, base + off_uuid, off_pad - off_uuid);
return c ^ 0xFFFFFFFF;
}