Skip to content

Commit 6817f66

Browse files
committed
refactor: deepen architecture by consolidating frequency and streaming code
- Consolidate frequency table serialization to codec package: - Add WriteFrequencies/ReadFrequencies with expectedCount validation - Add WriteFrequenciesToBytes/ReadFrequenciesFromBytes for byte slice APIs - Add WriteU32LE/ReadU32LE helper functions - Algorithms now use codec.ScaleFrequencies with maxTotal parameter - Merge streaming.go thin wrappers into main algorithm files: - Remove separate streaming.go files from all algorithms - Add NewStreamingEncoder/NewStreamingDecoder directly to algorithm files - Add countRun internal function to RLE for better testability - Formalize state machine specification in OpenSpec: - Add state transition table to REQ-ARCH-009 - Document all valid operations per state - Clarify error transition behavior - Add KindIO error kind for I/O related errors Benefits: - Locality: frequency table format changes in one place - Leverage: algorithms focus on encoding logic - Reduced file count: 173 lines of thin wrappers removed All tests pass, including cross-language conformance (144 cases).
1 parent e673775 commit 6817f66

15 files changed

Lines changed: 263 additions & 436 deletions

File tree

algorithms/arithmetic/go/arithmetic.go

Lines changed: 48 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ package arithmetic
33

44
import (
55
"bufio"
6-
"encoding/binary"
6+
"bytes"
77
"fmt"
88
"io"
99
"os"
@@ -179,44 +179,31 @@ func (d *ArithmeticDecoder) DecodeSymbol(cumulative []uint32) uint32 {
179179
}
180180

181181
// ScaleFrequencies normalizes frequencies to fit within MaxTotal.
182+
// This is an alias for codec.ScaleFrequencies for backward compatibility.
182183
func ScaleFrequencies(freq []uint32) {
183-
var total uint64
184-
for _, f := range freq {
185-
total += uint64(f)
186-
}
187-
if total == 0 {
188-
for i := range freq {
189-
freq[i] = 1
190-
}
191-
return
192-
}
193-
if total <= uint64(MaxTotal) {
194-
return
195-
}
196-
var newTotal uint64
197-
for i, f := range freq {
198-
if f == 0 {
199-
continue
200-
}
201-
scaled := uint64(f) * uint64(MaxTotal) / total
202-
if scaled == 0 {
203-
scaled = 1
204-
}
205-
freq[i] = uint32(scaled)
206-
newTotal += scaled
207-
}
208-
if newTotal == 0 {
209-
base := MaxTotal / uint32(len(freq))
210-
if base == 0 {
211-
base = 1
212-
}
213-
for i := range freq {
214-
freq[i] = base
215-
}
216-
}
184+
codec.ScaleFrequencies(freq, MaxTotal)
185+
}
186+
187+
// BuildCumulative builds a cumulative frequency table from frequencies.
188+
// This is an alias for codec.BuildCumulative for backward compatibility.
189+
func BuildCumulative(freq []uint32) []uint32 {
190+
return codec.BuildCumulative(freq)
191+
}
192+
193+
// WriteFrequencies serializes a frequency table to the writer.
194+
// This is an alias for codec.WriteFrequencies for backward compatibility.
195+
func WriteFrequencies(w io.Writer, freq []uint32) error {
196+
return codec.WriteFrequencies(w, freq)
197+
}
198+
199+
// ReadFrequencies deserializes a frequency table from the reader.
200+
// This is an alias for codec.ReadFrequencies for backward compatibility.
201+
func ReadFrequencies(r io.Reader) ([]uint32, error) {
202+
return codec.ReadFrequencies(r, SymbolLimit)
217203
}
218204

219205
// BuildFrequenciesFromFile reads a file and counts byte frequencies.
206+
// The frequencies are scaled to fit within MaxTotal.
220207
func BuildFrequenciesFromFile(path string) ([]uint32, error) {
221208
freq := make([]uint32, SymbolLimit)
222209
f, err := os.Open(path)
@@ -246,50 +233,6 @@ func BuildFrequenciesFromFile(path string) ([]uint32, error) {
246233
return freq, nil
247234
}
248235

249-
// BuildCumulative builds a cumulative frequency table from frequencies.
250-
func BuildCumulative(freq []uint32) []uint32 {
251-
cum := make([]uint32, len(freq)+1)
252-
for i, f := range freq {
253-
cum[i+1] = cum[i] + f
254-
}
255-
if cum[len(cum)-1] == 0 {
256-
for i := range freq {
257-
cum[i+1] = uint32(i + 1)
258-
}
259-
}
260-
return cum
261-
}
262-
263-
// WriteFrequencies serializes a frequency table to the writer.
264-
func WriteFrequencies(w io.Writer, freq []uint32) error {
265-
count := uint32(len(freq))
266-
if err := binary.Write(w, binary.LittleEndian, count); err != nil {
267-
return err
268-
}
269-
for _, v := range freq {
270-
if err := binary.Write(w, binary.LittleEndian, v); err != nil {
271-
return err
272-
}
273-
}
274-
return nil
275-
}
276-
277-
// ReadFrequencies deserializes a frequency table from the reader.
278-
func ReadFrequencies(r io.Reader) ([]uint32, error) {
279-
var count uint32
280-
if err := binary.Read(r, binary.LittleEndian, &count); err != nil {
281-
return nil, codec.WrapError(codec.KindTruncated, "failed to read frequency table", err)
282-
}
283-
if count != uint32(SymbolLimit) {
284-
return nil, codec.NewError(codec.KindCorrupt, fmt.Sprintf("invalid frequency table size: %d", count))
285-
}
286-
freq := make([]uint32, count)
287-
if err := binary.Read(r, binary.LittleEndian, freq); err != nil {
288-
return nil, codec.WrapError(codec.KindTruncated, "failed to read frequency table", err)
289-
}
290-
return freq, nil
291-
}
292-
293236
// Encode reads from input and writes the arithmetic encoded output to w.
294237
func Encode(input io.Reader, w io.Writer) error {
295238
data, err := io.ReadAll(input)
@@ -366,6 +309,32 @@ func Decode(r io.Reader, w io.Writer) error {
366309
return bw.Flush()
367310
}
368311

312+
// NewStreamingEncoder creates a new streaming Arithmetic encoder.
313+
// It uses a buffered encoder that collects all input and encodes in one pass
314+
// during Finish(), since Arithmetic encoding requires complete input for frequency analysis.
315+
func NewStreamingEncoder() codec.Encoder {
316+
return codec.NewBufferedEncoder(func(input []byte) ([]byte, error) {
317+
var outBuf bytes.Buffer
318+
if err := Encode(bytes.NewReader(input), &outBuf); err != nil {
319+
return nil, err
320+
}
321+
return outBuf.Bytes(), nil
322+
})
323+
}
324+
325+
// NewStreamingDecoder creates a new streaming Arithmetic decoder.
326+
// It uses a buffered decoder that collects all input and decodes in one pass
327+
// during Finish().
328+
func NewStreamingDecoder() codec.Decoder {
329+
return codec.NewBufferedDecoder(func(input []byte) ([]byte, error) {
330+
var outBuf bytes.Buffer
331+
if err := Decode(bytes.NewReader(input), &outBuf); err != nil {
332+
return nil, err
333+
}
334+
return outBuf.Bytes(), nil
335+
})
336+
}
337+
369338
// EncodeFile is a convenience function for file-based encoding.
370339
func EncodeFile(inputPath, outputPath string) error {
371340
return codec.EncodeFile(NewStreamingEncoder(), inputPath, outputPath)

algorithms/arithmetic/go/streaming.go

Lines changed: 0 additions & 41 deletions
This file was deleted.

algorithms/arithmetic/go/streaming_test.go

Lines changed: 0 additions & 33 deletions
This file was deleted.

algorithms/huffman/go/huffman.go

Lines changed: 31 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ package huffman
33

44
import (
55
"bufio"
6+
"bytes"
67
"container/heap"
7-
"encoding/binary"
88
"fmt"
99
"io"
1010
"os"
@@ -130,33 +130,15 @@ func BuildFrequenciesFromFile(path string) ([]uint32, error) {
130130
}
131131

132132
// WriteFrequencies serializes a frequency table to the writer.
133+
// This is an alias for codec.WriteFrequencies for backward compatibility.
133134
func WriteFrequencies(w io.Writer, freq []uint32) error {
134-
count := uint32(len(freq))
135-
if err := binary.Write(w, binary.LittleEndian, count); err != nil {
136-
return err
137-
}
138-
for _, v := range freq {
139-
if err := binary.Write(w, binary.LittleEndian, v); err != nil {
140-
return err
141-
}
142-
}
143-
return nil
135+
return codec.WriteFrequencies(w, freq)
144136
}
145137

146138
// ReadFrequencies deserializes a frequency table from the reader.
139+
// This is an alias for codec.ReadFrequencies for backward compatibility.
147140
func ReadFrequencies(r io.Reader) ([]uint32, error) {
148-
var count uint32
149-
if err := binary.Read(r, binary.LittleEndian, &count); err != nil {
150-
return nil, codec.WrapError(codec.KindTruncated, "failed to read frequency table", err)
151-
}
152-
if count != uint32(SymbolLimit) {
153-
return nil, codec.NewError(codec.KindCorrupt, fmt.Sprintf("invalid frequency table size: %d", count))
154-
}
155-
freq := make([]uint32, count)
156-
if err := binary.Read(r, binary.LittleEndian, freq); err != nil {
157-
return nil, codec.WrapError(codec.KindTruncated, "failed to read frequency table", err)
158-
}
159-
return freq, nil
141+
return codec.ReadFrequencies(r, SymbolLimit)
160142
}
161143

162144
// BuildCodes generates Huffman codes for each symbol by traversing the tree.
@@ -294,6 +276,32 @@ func Decode(r io.Reader, w io.Writer) error {
294276
return bw.Flush()
295277
}
296278

279+
// NewStreamingEncoder creates a new streaming Huffman encoder.
280+
// It uses a buffered encoder that collects all input and encodes in one pass
281+
// during Finish(), since Huffman encoding requires complete input for frequency analysis.
282+
func NewStreamingEncoder() codec.Encoder {
283+
return codec.NewBufferedEncoder(func(input []byte) ([]byte, error) {
284+
var outBuf bytes.Buffer
285+
if err := Encode(bytes.NewReader(input), &outBuf); err != nil {
286+
return nil, err
287+
}
288+
return outBuf.Bytes(), nil
289+
})
290+
}
291+
292+
// NewStreamingDecoder creates a new streaming Huffman decoder.
293+
// It uses a buffered decoder that collects all input and decodes in one pass
294+
// during Finish().
295+
func NewStreamingDecoder() codec.Decoder {
296+
return codec.NewBufferedDecoder(func(input []byte) ([]byte, error) {
297+
var outBuf bytes.Buffer
298+
if err := Decode(bytes.NewReader(input), &outBuf); err != nil {
299+
return nil, err
300+
}
301+
return outBuf.Bytes(), nil
302+
})
303+
}
304+
297305
// EncodeFile is a convenience function for file-based encoding.
298306
func EncodeFile(inputPath, outputPath string) error {
299307
return codec.EncodeFile(NewStreamingEncoder(), inputPath, outputPath)

algorithms/huffman/go/streaming.go

Lines changed: 0 additions & 41 deletions
This file was deleted.

algorithms/huffman/go/streaming_test.go

Lines changed: 0 additions & 36 deletions
This file was deleted.

0 commit comments

Comments
 (0)