diff --git a/coral-schema/src/main/java/com/linkedin/coral/schema/avro/AvroSerdeUtils.java b/coral-schema/src/main/java/com/linkedin/coral/schema/avro/AvroSerdeUtils.java new file mode 100644 index 000000000..3bad95745 --- /dev/null +++ b/coral-schema/src/main/java/com/linkedin/coral/schema/avro/AvroSerdeUtils.java @@ -0,0 +1,47 @@ +/** + * Copyright 2021 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.schema.avro; + +import java.util.List; + +import org.apache.avro.Schema; + + +/** + * Utilities useful only to the Hive AvroSerde itself. + * Please refer {@link org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils} for original implementation + */ +final class AvroSerdeUtils { + + public static final String AVRO_SCHEMA_LITERAL = "avro.schema.literal"; + + private AvroSerdeUtils() { + + } + + /** + * Determine if an Avro schema is of type Union[T, NULL]. Avro supports nullable + * types via a union of type T and null. This is a very common use case. + * As such, we want to silently convert it to just T and allow the value to be null. + * + * @return true if type represents Union[T, Null], false otherwise + */ + public static boolean isNullableType(Schema schema) { + return schema.getType().equals(Schema.Type.UNION) && schema.getTypes().size() == 2 + && (schema.getTypes().get(0).getType().equals(Schema.Type.NULL) + || schema.getTypes().get(1).getType().equals(Schema.Type.NULL)); + // [null, null] not allowed, so this check is ok. + } + + /** + * In a nullable type, get the schema for the non-nullable type. This method + * does no checking that the provides Schema is nullable. + */ + public static Schema getOtherTypeFromNullableType(Schema schema) { + List types = schema.getTypes(); + return types.get(0).getType().equals(Schema.Type.NULL) ? types.get(1) : types.get(0); + } +} diff --git a/coral-schema/src/main/java/com/linkedin/coral/schema/avro/SchemaUtilities.java b/coral-schema/src/main/java/com/linkedin/coral/schema/avro/SchemaUtilities.java index 3132b1122..74f6d64cc 100644 --- a/coral-schema/src/main/java/com/linkedin/coral/schema/avro/SchemaUtilities.java +++ b/coral-schema/src/main/java/com/linkedin/coral/schema/avro/SchemaUtilities.java @@ -26,7 +26,6 @@ import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.codehaus.jackson.JsonNode; @@ -674,15 +673,14 @@ private static List getPartitionCols(@Nonnull Table tableOrView) { private static String readSchemaFromSchemaLiteral(@Nonnull Table table) { Preconditions.checkNotNull(table); - String schemaStr = table.getParameters().get(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName()); + String schemaStr = table.getParameters().get(AvroSerdeUtils.AVRO_SCHEMA_LITERAL); if (Strings.isNullOrEmpty(schemaStr)) { - schemaStr = table.getSd().getSerdeInfo().getParameters() - .get(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName()); + schemaStr = table.getSd().getSerdeInfo().getParameters().get(AvroSerdeUtils.AVRO_SCHEMA_LITERAL); } if (Strings.isNullOrEmpty(schemaStr)) { LOG.debug("No avro schema defined under table or serde property {} for table {}", - AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName(), getCompleteName(table)); + AvroSerdeUtils.AVRO_SCHEMA_LITERAL, getCompleteName(table)); } return schemaStr; diff --git a/coral-schema/src/main/java/com/linkedin/coral/schema/avro/TypeInfoToAvroSchemaConverter.java b/coral-schema/src/main/java/com/linkedin/coral/schema/avro/TypeInfoToAvroSchemaConverter.java index 3f821952e..e2cd58a55 100644 --- a/coral-schema/src/main/java/com/linkedin/coral/schema/avro/TypeInfoToAvroSchemaConverter.java +++ b/coral-schema/src/main/java/com/linkedin/coral/schema/avro/TypeInfoToAvroSchemaConverter.java @@ -12,7 +12,6 @@ import org.apache.avro.Schema; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.hive.serde2.avro.AvroSerDe; -import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;