Skip to content

Commit 1834882

Browse files
committed
Add UnsafeAppend to dict builder
This commit adds a proper implementation of `UnsafeAppend` and `UnsafeAppendBoolToBitmap` to the dictionary-encoded array builder. This allows using `Reserve(n)` followed by `n` calls to `UnsafeAppend` or `UnsafeAppendBoolToBitmap`. I also added a test to it.
1 parent ae1f7f5 commit 1834882

2 files changed

Lines changed: 99 additions & 2 deletions

File tree

arrow/array/dictionary.go

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,8 @@ func arrayApproxEqualDict(l, r *Dictionary, opt equalOption) bool {
314314
// helper for building the properly typed indices of the dictionary builder
315315
type IndexBuilder struct {
316316
Builder
317-
Append func(int)
317+
Append func(int)
318+
UnsafeAppend func(int)
318319
}
319320

320321
func createIndexBuilder(mem memory.Allocator, dt arrow.FixedWidthDataType) (ret IndexBuilder, err error) {
@@ -324,34 +325,58 @@ func createIndexBuilder(mem memory.Allocator, dt arrow.FixedWidthDataType) (ret
324325
ret.Append = func(idx int) {
325326
ret.Builder.(*Int8Builder).Append(int8(idx))
326327
}
328+
ret.UnsafeAppend = func(idx int) {
329+
ret.Builder.(*Int8Builder).UnsafeAppend(int8(idx))
330+
}
327331
case arrow.UINT8:
328332
ret.Append = func(idx int) {
329333
ret.Builder.(*Uint8Builder).Append(uint8(idx))
330334
}
335+
ret.UnsafeAppend = func(idx int) {
336+
ret.Builder.(*Uint8Builder).UnsafeAppend(uint8(idx))
337+
}
331338
case arrow.INT16:
332339
ret.Append = func(idx int) {
333340
ret.Builder.(*Int16Builder).Append(int16(idx))
334341
}
342+
ret.UnsafeAppend = func(idx int) {
343+
ret.Builder.(*Int16Builder).UnsafeAppend(int16(idx))
344+
}
335345
case arrow.UINT16:
336346
ret.Append = func(idx int) {
337347
ret.Builder.(*Uint16Builder).Append(uint16(idx))
338348
}
349+
ret.UnsafeAppend = func(idx int) {
350+
ret.Builder.(*Uint16Builder).UnsafeAppend(uint16(idx))
351+
}
339352
case arrow.INT32:
340353
ret.Append = func(idx int) {
341354
ret.Builder.(*Int32Builder).Append(int32(idx))
342355
}
356+
ret.UnsafeAppend = func(idx int) {
357+
ret.Builder.(*Int32Builder).UnsafeAppend(int32(idx))
358+
}
343359
case arrow.UINT32:
344360
ret.Append = func(idx int) {
345361
ret.Builder.(*Uint32Builder).Append(uint32(idx))
346362
}
363+
ret.UnsafeAppend = func(idx int) {
364+
ret.Builder.(*Uint32Builder).UnsafeAppend(uint32(idx))
365+
}
347366
case arrow.INT64:
348367
ret.Append = func(idx int) {
349368
ret.Builder.(*Int64Builder).Append(int64(idx))
350369
}
370+
ret.UnsafeAppend = func(idx int) {
371+
ret.Builder.(*Int64Builder).UnsafeAppend(int64(idx))
372+
}
351373
case arrow.UINT64:
352374
ret.Append = func(idx int) {
353375
ret.Builder.(*Uint64Builder).Append(uint64(idx))
354376
}
377+
ret.UnsafeAppend = func(idx int) {
378+
ret.Builder.(*Uint64Builder).UnsafeAppend(uint64(idx))
379+
}
355380
default:
356381
debug.Assert(false, "dictionary index type must be integral")
357382
err = fmt.Errorf("dictionary index type must be integral, not %s", dt)
@@ -647,7 +672,11 @@ func (b *dictionaryBuilder) AppendEmptyValues(n int) {
647672
}
648673

649674
func (b *dictionaryBuilder) UnsafeAppendBoolToBitmap(v bool) {
650-
panic("Calling UnsafeAppendBoolToBitmap on dictionaryBuilder would leave it in inconsistent state. Use AppendIndices instead.")
675+
if !v {
676+
b.nulls += 1
677+
}
678+
b.length += 1
679+
b.idxBuilder.UnsafeAppendBoolToBitmap(v)
651680
}
652681

653682
func (b *dictionaryBuilder) Reserve(n int) {
@@ -785,6 +814,13 @@ func (b *dictionaryBuilder) insertDictBytes(val []byte) error {
785814
return err
786815
}
787816

817+
func (b *dictionaryBuilder) unsafeAppendValue(val interface{}) error {
818+
idx, _, err := b.memoTable.GetOrInsert(val)
819+
b.idxBuilder.UnsafeAppend(idx)
820+
b.length += 1
821+
return err
822+
}
823+
788824
func (b *dictionaryBuilder) appendValue(val interface{}) error {
789825
idx, _, err := b.memoTable.GetOrInsert(val)
790826
b.idxBuilder.Append(idx)
@@ -994,6 +1030,26 @@ type dictBuilder[T arrow.ValueType] struct {
9941030
dictionaryBuilder
9951031
}
9961032

1033+
func (b *dictBuilder[T]) UnsafeAppend(v T) error {
1034+
switch val := any(v).(type) {
1035+
case arrow.Duration:
1036+
return b.unsafeAppendValue(int64(val))
1037+
case arrow.Timestamp:
1038+
return b.unsafeAppendValue(int64(val))
1039+
case arrow.Time32:
1040+
return b.unsafeAppendValue(int32(val))
1041+
case arrow.Time64:
1042+
return b.unsafeAppendValue(int64(val))
1043+
case arrow.Date32:
1044+
return b.unsafeAppendValue(int32(val))
1045+
case arrow.Date64:
1046+
return b.unsafeAppendValue(int64(val))
1047+
case arrow.MonthInterval:
1048+
return b.unsafeAppendValue(int32(val))
1049+
}
1050+
return b.unsafeAppendValue(v)
1051+
}
1052+
9971053
func (b *dictBuilder[T]) Append(v T) error {
9981054
switch val := any(v).(type) {
9991055
case arrow.Duration:

arrow/array/dictionary_test.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,47 @@ func (p *PrimitiveDictionaryTestSuite) TestDictionaryBuilderInit() {
145145
p.True(array.Equal(expected, arr))
146146
}
147147

148+
func (p *PrimitiveDictionaryTestSuite) TestDictionaryBuilderReserveAndAppend() {
149+
expectedType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: p.typ}
150+
bldr := array.NewDictionaryBuilder(p.mem, expectedType)
151+
defer bldr.Release()
152+
153+
builder := reflect.ValueOf(bldr)
154+
appendFn := builder.MethodByName("UnsafeAppend")
155+
validFn := builder.MethodByName("UnsafeAppendBoolToBitmap")
156+
157+
bldr.Reserve(7)
158+
validFn.Call([]reflect.Value{reflect.ValueOf(true)})
159+
validFn.Call([]reflect.Value{reflect.ValueOf(false)})
160+
appendFn.Call([]reflect.Value{reflect.ValueOf(0).Convert(p.reftyp)})
161+
appendFn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})
162+
validFn.Call([]reflect.Value{reflect.ValueOf(false)})
163+
appendFn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})
164+
appendFn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})
165+
166+
p.EqualValues(7, bldr.Len())
167+
p.EqualValues(2, bldr.NullN())
168+
169+
p.EqualValues(3, bldr.DictionarySize())
170+
171+
arr := bldr.NewArray().(*array.Dictionary)
172+
defer arr.Release()
173+
174+
p.True(arrow.TypeEqual(expectedType, arr.DataType()))
175+
expectedDict, _, err := array.FromJSON(p.mem, expectedType.ValueType, strings.NewReader("[0, 1, 2]"))
176+
p.NoError(err)
177+
defer expectedDict.Release()
178+
179+
expectedIndices, _, err := array.FromJSON(p.mem, expectedType.IndexType, strings.NewReader("[0, null, 0, 1, null, 1, 2]"))
180+
p.NoError(err)
181+
defer expectedIndices.Release()
182+
183+
expected := array.NewDictionaryArray(expectedType, expectedIndices, expectedDict)
184+
defer expected.Release()
185+
186+
p.True(array.Equal(expected, arr))
187+
}
188+
148189
func (p *PrimitiveDictionaryTestSuite) TestDictionaryNewBuilder() {
149190
valueType := p.typ
150191
dictArr, _, err := array.FromJSON(p.mem, valueType, strings.NewReader("[1, 2]"))

0 commit comments

Comments
 (0)