Skip to content

Commit

Permalink
apacheGH-39013: [Go][Integration] Support cABI import/export of Strin…
Browse files Browse the repository at this point in the history
…gView in Go
  • Loading branch information
bkietz committed Dec 1, 2023
1 parent 92fe831 commit 7de4b26
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 10 deletions.
1 change: 0 additions & 1 deletion dev/archery/archery/integration/datagen.py
Original file line number Diff line number Diff line change
Expand Up @@ -1855,7 +1855,6 @@ def _temp_path():

generate_binary_view_case()
.skip_tester('C#')
.skip_tester('Go')
.skip_tester('Java')
.skip_tester('JS')
.skip_tester('Rust'),
Expand Down
46 changes: 46 additions & 0 deletions go/arrow/cdata/cdata.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ var formatToSimpleType = map[string]arrow.DataType{
"Z": arrow.BinaryTypes.LargeBinary,
"u": arrow.BinaryTypes.String,
"U": arrow.BinaryTypes.LargeString,
"vz": arrow.BinaryTypes.BinaryView,
"vu": arrow.BinaryTypes.StringView,
"tdD": arrow.FixedWidthTypes.Date32,
"tdm": arrow.FixedWidthTypes.Date64,
"tts": arrow.FixedWidthTypes.Time32s,
Expand Down Expand Up @@ -485,6 +487,10 @@ func (imp *cimporter) doImport() error {
return imp.importStringLike(int64(arrow.Int64SizeBytes))
case *arrow.LargeBinaryType:
return imp.importStringLike(int64(arrow.Int64SizeBytes))
case *arrow.StringViewType:
return imp.importBinaryViewLike()
case *arrow.BinaryViewType:
return imp.importBinaryViewLike()
case *arrow.ListType:
return imp.importListLike()
case *arrow.LargeListType:
Expand Down Expand Up @@ -654,6 +660,46 @@ func (imp *cimporter) importStringLike(offsetByteWidth int64) (err error) {
return
}

func (imp *cimporter) importBinaryViewLike() (err error) {
if err = imp.checkNoChildren(); err != nil {
return
}

buffers := make([]*memory.Buffer, len(imp.cbuffers)-1)
defer func() {
for _, buf := range buffers {
if buf != nil {
buf.Release()
}
}
}()

if buffers[0], err = imp.importNullBitmap(0); err != nil {
return
}

if buffers[1], err = imp.importFixedSizeBuffer(1, int64(arrow.ViewHeaderSizeBytes)); err != nil {
return
}

var dataBufferSizes *memory.Buffer
if dataBufferSizes, err = imp.importFixedSizeBuffer(1, int64(len(buffers))-2); err != nil {
return
}
defer dataBufferSizes.Release()

for i, size := range arrow.Int64Traits.CastFromBytes(dataBufferSizes.Bytes()) {
if buffers[i+2], err = imp.importVariableValuesBuffer(i+2, 1, size); err != nil {
return
}
}

imp.data = array.NewData(imp.dt, int(imp.arr.length), buffers, nil, int(imp.arr.null_count), int(imp.arr.offset))

buffers = []*memory.Buffer{}
return
}

func (imp *cimporter) importListLike() (err error) {
if err = imp.checkNumChildren(1); err != nil {
return err
Expand Down
52 changes: 43 additions & 9 deletions go/arrow/cdata/cdata_exports.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,10 @@ func (exp *schemaExporter) exportFormat(dt arrow.DataType) string {
return "u"
case *arrow.LargeStringType:
return "U"
case *arrow.BinaryViewType:
return "vz"
case *arrow.StringViewType:
return "vu"
case *arrow.Date32Type:
return "tdD"
case *arrow.Date64Type:
Expand Down Expand Up @@ -328,6 +332,15 @@ func allocateBufferPtrArr(n int) (out []*C.void) {
return
}

func allocateBufferSizeArr(n int) (out []C.int64_t) {
s := (*reflect.SliceHeader)(unsafe.Pointer(&out))
s.Data = uintptr(C.calloc(C.size_t(n), C.size_t(unsafe.Sizeof(int64(0)))))
s.Len = n
s.Cap = n

return
}

func (exp *schemaExporter) finish(out *CArrowSchema) {
out.dictionary = nil
if exp.dict != nil {
Expand Down Expand Up @@ -369,14 +382,14 @@ func exportArray(arr arrow.Array, out *CArrowArray, outSchema *CArrowSchema) {
}

nbuffers := len(arr.Data().Buffers())
buf_offset := 0
bufs := arr.Data().Buffers()
// Some types don't have validity bitmaps, but we keep them shifted
// to make processing easier in other contexts. This means that
// we have to adjust when exporting.
has_validity_bitmap := internal.DefaultHasValidityBitmap(arr.DataType().ID())
if nbuffers > 0 && !has_validity_bitmap {
nbuffers--
buf_offset++
bufs = bufs[1:]
}

out.dictionary = nil
Expand All @@ -386,26 +399,47 @@ func exportArray(arr arrow.Array, out *CArrowArray, outSchema *CArrowSchema) {
out.n_buffers = C.int64_t(nbuffers)
out.buffers = nil

needBufferSizes := func() bool {
switch arr.(type) {
case *array.BinaryView:
return true
case *array.StringView:
return true
default:
return false
}
}()
if needBufferSizes {
nbuffers++
}

if nbuffers > 0 {
bufs := arr.Data().Buffers()
buffers := allocateBufferPtrArr(nbuffers)
for i, buf := range bufs[buf_offset:] {
cBufs := allocateBufferPtrArr(nbuffers)
for i, buf := range bufs {
if buf == nil || buf.Len() == 0 {
if i > 0 || !has_validity_bitmap {
// apache/arrow#33936: export a dummy buffer to be friendly to
// implementations that don't import NULL properly
buffers[i] = (*C.void)(unsafe.Pointer(&C.kGoCdataZeroRegion))
cBufs[i] = (*C.void)(unsafe.Pointer(&C.kGoCdataZeroRegion))
} else {
// null pointer permitted for the validity bitmap
// (assuming null count is 0)
buffers[i] = nil
cBufs[i] = nil
}
continue
}

buffers[i] = (*C.void)(unsafe.Pointer(&buf.Bytes()[0]))
cBufs[i] = (*C.void)(unsafe.Pointer(&buf.Bytes()[0]))
}

if needBufferSizes {
sizes := allocateBufferSizeArr(len(bufs[2:]))
for i, buf := range bufs[2:] {
sizes[i] = C.int64_t(buf.Len())
}
cBufs[nbuffers-1] = (*C.void)(unsafe.Pointer(&sizes[0]))
}
out.buffers = (*unsafe.Pointer)(unsafe.Pointer(&buffers[0]))
out.buffers = (*unsafe.Pointer)(unsafe.Pointer(&cBufs[0]))
}

arr.Data().Retain()
Expand Down

0 comments on commit 7de4b26

Please sign in to comment.