Skip to content

Commit 9e79dc9

Browse files
eric-wang-1990colin-rogers-dbt
authored andcommitted
feat(csharp/src/Drivers/Apache): enhance GetColumns with BASE_TYPE_NAME column (apache#2695)
## Summary This PR enhances the C# ADBC driver's `GetColumns` functionality by adding a new `BASE_TYPE_NAME` column to the result set. This column provides the base type name without parameters or decorations, making it easier for clients to identify the fundamental data type regardless of its parameterization. ## Proposed Changes - Added a new `BASE_TYPE_NAME` column to the `GetColumns` result schema - Enhanced the `HiveServer2Statement` class to process and populate the `BASE_TYPE_NAME` column - Changed visibility modifiers for several methods from `protected` to `public` or `internal` to enable the enhancement - Added support for the `VARIANT` SQL type in the `SqlTypeNameParser` - Created comprehensive unit tests to verify the functionality with various data types ## Implementation Details The implementation: 1. Intercepts `GetColumns` query results 2. Processes each row to extract the base type name using the existing `SetPrecisionScaleAndTypeName` method 3. Adds the base type name as a new column in the result set 4. Preserves and potentially enhances precision and scale information for parameterized types ## Benefits - Simplifies client code that needs to identify base types (e.g., all DECIMAL types regardless of precision/scale) - Maintains compatibility with existing code as this is an additive change - Improves developer experience by providing clearer type information ## Testing Added comprehensive unit tests that verify: - The presence of the BASE_TYPE_NAME column in GetColumns results - Correct base type extraction for simple types - Correct base type extraction for complex types (DECIMAL, INTERVAL, MAP, ARRAY, STRUCT) - Precision and scale preservation for DECIMAL types - Proper handling of type aliases (INT vs INTEGER)
1 parent ac266b8 commit 9e79dc9

7 files changed

Lines changed: 354 additions & 13 deletions

File tree

csharp/src/Drivers/Apache/Hive2/HiveServer2Connection.cs

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -768,7 +768,7 @@ protected static Uri GetBaseAddress(string? uri, string? hostName, string? path,
768768
return baseAddress;
769769
}
770770

771-
protected IReadOnlyDictionary<string, int> GetColumnIndexMap(List<TColumnDesc> columns) => columns
771+
internal IReadOnlyDictionary<string, int> GetColumnIndexMap(List<TColumnDesc> columns) => columns
772772
.Select(t => new { Index = t.Position - ColumnMapIndexOffset, t.ColumnName })
773773
.ToDictionary(t => t.ColumnName, t => t.Index);
774774

@@ -1242,12 +1242,7 @@ private static StructArray GetColumnSchema(TableInfo tableInfo)
12421242
nullBitmapBuffer.Build());
12431243
}
12441244

1245-
protected abstract void SetPrecisionScaleAndTypeName(
1246-
short colType,
1247-
string typeName,
1248-
TableInfo? tableInfo,
1249-
int columnSize,
1250-
int decimalDigits);
1245+
internal abstract void SetPrecisionScaleAndTypeName(short columnType, string typeName, TableInfo? tableInfo, int columnSize, int decimalDigits);
12511246

12521247
public override Schema GetTableSchema(string? catalog, string? dbSchema, string? tableName)
12531248
{
@@ -1364,7 +1359,7 @@ private static IArrowType GetArrowType(int columnTypeId, string typeName, bool i
13641359
}
13651360
}
13661361

1367-
protected async Task<TRowSet> FetchResultsAsync(TOperationHandle operationHandle, long batchSize = BatchSizeDefault, CancellationToken cancellationToken = default)
1362+
internal async Task<TRowSet> FetchResultsAsync(TOperationHandle operationHandle, long batchSize = BatchSizeDefault, CancellationToken cancellationToken = default)
13681363
{
13691364
await PollForResponseAsync(operationHandle, Client, PollTimeMillisecondsDefault, cancellationToken);
13701365

csharp/src/Drivers/Apache/Hive2/HiveServer2HttpConnection.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ protected override TOpenSessionReq CreateSessionRequest()
224224
return req;
225225
}
226226

227-
protected override void SetPrecisionScaleAndTypeName(
227+
internal override void SetPrecisionScaleAndTypeName(
228228
short colType,
229229
string typeName,
230230
TableInfo? tableInfo,

csharp/src/Drivers/Apache/Hive2/HiveServer2Statement.cs

Lines changed: 127 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
using System.Threading;
2222
using System.Threading.Tasks;
2323
using Apache.Arrow.Ipc;
24+
using Apache.Arrow.Types;
2425
using Apache.Hive.Service.Rpc.Thrift;
2526
using Thrift.Transport;
2627

@@ -406,7 +407,39 @@ private async Task<QueryResult> GetColumnsAsync(CancellationToken cancellationTo
406407
cancellationToken);
407408
OperationHandle = resp.OperationHandle;
408409

409-
return await GetQueryResult(resp.DirectResults, cancellationToken);
410+
// Common variables declared upfront
411+
TGetResultSetMetadataResp metadata;
412+
Schema schema;
413+
TRowSet rowSet;
414+
415+
// For GetColumns, we need to enhance the result with BASE_TYPE_NAME
416+
if (Connection.AreResultsAvailableDirectly() && resp.DirectResults?.ResultSet?.Results != null)
417+
{
418+
// Get data from direct results
419+
metadata = resp.DirectResults.ResultSetMetadata;
420+
schema = Connection.SchemaParser.GetArrowSchema(metadata.Schema, Connection.DataTypeConversion);
421+
rowSet = resp.DirectResults.ResultSet.Results;
422+
}
423+
else
424+
{
425+
// Poll and fetch results
426+
await HiveServer2Connection.PollForResponseAsync(OperationHandle!, Connection.Client, PollTimeMilliseconds, cancellationToken);
427+
428+
// Get metadata
429+
metadata = await HiveServer2Connection.GetResultSetMetadataAsync(OperationHandle!, Connection.Client, cancellationToken);
430+
schema = Connection.SchemaParser.GetArrowSchema(metadata.Schema, Connection.DataTypeConversion);
431+
432+
// Fetch the results
433+
rowSet = await Connection.FetchResultsAsync(OperationHandle!, BatchSize, cancellationToken);
434+
}
435+
436+
// Common processing for both paths
437+
int columnCount = HiveServer2Reader.GetColumnCount(rowSet);
438+
int rowCount = HiveServer2Reader.GetRowCount(rowSet, columnCount);
439+
IReadOnlyList<IArrowArray> data = HiveServer2Reader.GetArrowArrayData(rowSet, columnCount, schema, Connection.DataTypeConversion);
440+
441+
// Return the enhanced result with added BASE_TYPE_NAME column
442+
return EnhanceGetColumnsResult(schema, data, rowCount, metadata, rowSet);
410443
}
411444

412445
private async Task<Schema> GetResultSetSchemaAsync(TOperationHandle operationHandle, TCLIService.IAsync client, CancellationToken cancellationToken = default)
@@ -426,12 +459,105 @@ private async Task<QueryResult> GetQueryResult(TSparkDirectResults? directResult
426459
int columnCount = HiveServer2Reader.GetColumnCount(rowSet);
427460
int rowCount = HiveServer2Reader.GetRowCount(rowSet, columnCount);
428461
IReadOnlyList<IArrowArray> data = HiveServer2Reader.GetArrowArrayData(rowSet, columnCount, schema, Connection.DataTypeConversion);
462+
429463
return new QueryResult(rowCount, new HiveServer2Connection.HiveInfoArrowStream(schema, data));
430464
}
431465

432466
await HiveServer2Connection.PollForResponseAsync(OperationHandle!, Connection.Client, PollTimeMilliseconds, cancellationToken);
433467
schema = await GetResultSetSchemaAsync(OperationHandle!, Connection.Client, cancellationToken);
468+
434469
return new QueryResult(-1, Connection.NewReader(this, schema));
435470
}
471+
472+
protected internal QueryResult EnhanceGetColumnsResult(Schema originalSchema, IReadOnlyList<IArrowArray> originalData,
473+
int rowCount, TGetResultSetMetadataResp metadata, TRowSet rowSet)
474+
{
475+
// Create a column map using Connection's GetColumnIndexMap method
476+
var columnMap = Connection.GetColumnIndexMap(metadata.Schema.Columns);
477+
478+
// Get column indices - we know these columns always exist
479+
int typeNameIndex = columnMap["TYPE_NAME"];
480+
int dataTypeIndex = columnMap["DATA_TYPE"];
481+
int columnSizeIndex = columnMap["COLUMN_SIZE"];
482+
int decimalDigitsIndex = columnMap["DECIMAL_DIGITS"];
483+
484+
// Extract the existing arrays
485+
StringArray typeNames = (StringArray)originalData[typeNameIndex];
486+
Int32Array originalColumnSizes = (Int32Array)originalData[columnSizeIndex];
487+
Int32Array originalDecimalDigits = (Int32Array)originalData[decimalDigitsIndex];
488+
489+
// Create enhanced schema with BASE_TYPE_NAME column
490+
var enhancedFields = originalSchema.FieldsList.ToList();
491+
enhancedFields.Add(new Field("BASE_TYPE_NAME", StringType.Default, true));
492+
Schema enhancedSchema = new Schema(enhancedFields, originalSchema.Metadata);
493+
494+
// Pre-allocate arrays to store our values
495+
int length = typeNames.Length;
496+
List<string> baseTypeNames = new List<string>(length);
497+
List<int> columnSizeValues = new List<int>(length);
498+
List<int> decimalDigitsValues = new List<int>(length);
499+
500+
// Process each row
501+
for (int i = 0; i < length; i++)
502+
{
503+
string? typeName = typeNames.GetString(i);
504+
short colType = (short)rowSet.Columns[dataTypeIndex].I32Val.Values.Values[i];
505+
int columnSize = originalColumnSizes.GetValue(i).GetValueOrDefault();
506+
int decimalDigits = originalDecimalDigits.GetValue(i).GetValueOrDefault();
507+
508+
// Create a TableInfo for this row
509+
var tableInfo = new HiveServer2Connection.TableInfo(string.Empty);
510+
511+
// Process all types through SetPrecisionScaleAndTypeName
512+
Connection.SetPrecisionScaleAndTypeName(colType, typeName ?? string.Empty, tableInfo, columnSize, decimalDigits);
513+
514+
// Get base type name
515+
string baseTypeName;
516+
if (tableInfo.BaseTypeName.Count > 0)
517+
{
518+
string? baseTypeNameValue = tableInfo.BaseTypeName[0];
519+
baseTypeName = baseTypeNameValue ?? string.Empty;
520+
}
521+
else
522+
{
523+
baseTypeName = typeName ?? string.Empty;
524+
}
525+
baseTypeNames.Add(baseTypeName);
526+
527+
// Get precision/scale values
528+
if (tableInfo.Precision.Count > 0)
529+
{
530+
int? precisionValue = tableInfo.Precision[0];
531+
columnSizeValues.Add(precisionValue.GetValueOrDefault(columnSize));
532+
}
533+
else
534+
{
535+
columnSizeValues.Add(columnSize);
536+
}
537+
538+
if (tableInfo.Scale.Count > 0)
539+
{
540+
int? scaleValue = tableInfo.Scale[0];
541+
decimalDigitsValues.Add(scaleValue.GetValueOrDefault(decimalDigits));
542+
}
543+
else
544+
{
545+
decimalDigitsValues.Add(decimalDigits);
546+
}
547+
}
548+
549+
// Create the Arrow arrays directly from our data arrays
550+
StringArray baseTypeNameArray = new StringArray.Builder().AppendRange(baseTypeNames).Build();
551+
Int32Array columnSizeArray = new Int32Array.Builder().AppendRange(columnSizeValues).Build();
552+
Int32Array decimalDigitsArray = new Int32Array.Builder().AppendRange(decimalDigitsValues).Build();
553+
554+
// Create enhanced data with modified columns
555+
var enhancedData = new List<IArrowArray>(originalData);
556+
enhancedData[columnSizeIndex] = columnSizeArray;
557+
enhancedData[decimalDigitsIndex] = decimalDigitsArray;
558+
enhancedData.Add(baseTypeNameArray);
559+
560+
return new QueryResult(rowCount, new HiveServer2Connection.HiveInfoArrowStream(enhancedSchema, enhancedData));
561+
}
436562
}
437563
}

csharp/src/Drivers/Apache/Hive2/SqlTypeNameParser.cs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,11 @@ internal abstract class SqlTypeNameParser<T> : ISqlTypeNameParser where T : SqlT
8989

9090
// Note: the INTERVAL sql type does not have an associated column type id.
9191
private static readonly HashSet<ISqlTypeNameParser> s_parsers = new HashSet<ISqlTypeNameParser>(s_parserMap.Values
92-
.Concat([SqlIntervalTypeParser.Default, SqlSimpleTypeParser.Default("VOID")]));
92+
.Concat([
93+
SqlIntervalTypeParser.Default,
94+
SqlSimpleTypeParser.Default("VOID"),
95+
SqlSimpleTypeParser.Default("VARIANT"),
96+
]));
9397

9498
/// <summary>
9599
/// Gets the base SQL type name without decoration or sub clauses

csharp/src/Drivers/Apache/Impala/ImpalaConnection.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ protected override Task<TRowSet> GetRowSetAsync(TGetSchemasResp response, Cancel
8686
protected internal override Task<TRowSet> GetRowSetAsync(TGetPrimaryKeysResp response, CancellationToken cancellationToken = default) =>
8787
FetchResultsAsync(response.OperationHandle, cancellationToken: cancellationToken);
8888

89-
protected override void SetPrecisionScaleAndTypeName(
89+
internal override void SetPrecisionScaleAndTypeName(
9090
short colType,
9191
string typeName,
9292
TableInfo? tableInfo,

csharp/src/Drivers/Apache/Spark/SparkConnection.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ public override AdbcStatement CreateStatement()
6363

6464
protected internal override int PositionRequiredOffset => 1;
6565

66-
protected override void SetPrecisionScaleAndTypeName(
66+
internal override void SetPrecisionScaleAndTypeName(
6767
short colType,
6868
string typeName,
6969
TableInfo? tableInfo,

0 commit comments

Comments
 (0)