diff --git a/sio/csvio/reader.go b/sio/csvio/reader.go index 4581a47015..5693acb0ad 100644 --- a/sio/csvio/reader.go +++ b/sio/csvio/reader.go @@ -8,6 +8,9 @@ import ( "strconv" "unicode" + utext "golang.org/x/text/encoding/unicode" + "golang.org/x/text/transform" + "github.com/brimdata/super" "github.com/brimdata/super/sup" ) @@ -35,7 +38,8 @@ type ReaderOpts struct { //} func NewReader(sctx *super.Context, r io.Reader, opts ReaderOpts) *Reader { - preprocess := newPreprocess(r, opts.Delim) + utf8Reader := transform.NewReader(r, utext.UTF8BOM.NewDecoder()) + preprocess := newPreprocess(utf8Reader, opts.Delim) reader := csv.NewReader(preprocess) if opts.Delim != 0 { reader.Comma = opts.Delim diff --git a/sio/csvio/ztests/bom.yaml b/sio/csvio/ztests/bom.yaml new file mode 100644 index 0000000000..5f8e97f961 --- /dev/null +++ b/sio/csvio/ztests/bom.yaml @@ -0,0 +1,15 @@ +# The input data leads with the UTF-8 representation of byte-order mark (BOM). +# See https://en.wikipedia.org/wiki/Byte_order_mark + +script: | + head -c 3 with-bom.csv | xxd + super -s -i csv -c "cut a" with-bom.csv + +inputs: + - name: with-bom.csv + +outputs: + - name: stdout + data: | + 00000000: efbb bf ... + {a:"foo"} diff --git a/sio/csvio/ztests/with-bom.csv b/sio/csvio/ztests/with-bom.csv new file mode 100644 index 0000000000..9ba8d37dcf --- /dev/null +++ b/sio/csvio/ztests/with-bom.csv @@ -0,0 +1,2 @@ +a,b +foo,bar