Refine search
public static boolean isOptionalPrimitive(Schema schema) { return schema.getType().equals(Schema.Type.UNION) && schema.getTypes().size() == 2 && ( (schema.getTypes().get(0).getType().equals(Schema.Type.NULL) && (isPrimitive(schema.getTypes().get(1)) || isPrimitiveArray(schema.getTypes().get(1)))) || (schema.getTypes().get(1).getType().equals(Schema.Type.NULL) && (isPrimitive(schema.getTypes().get(0)) || isPrimitiveArray(schema.getTypes().get(0)))) ); }
private static TypeInfo generateUnionTypeInfo(Schema schema, Set<Schema> seenSchemas) throws AvroSerdeException { assert schema.getType().equals(Schema.Type.UNION); List<Schema> types = schema.getTypes(); List<TypeInfo> typeInfos = new ArrayList<TypeInfo>(types.size()); for(Schema type : types) { typeInfos.add(generateTypeInfo(type, seenSchemas)); } return TypeInfoFactory.getUnionTypeInfo(typeInfos); }
/** * If the union schema is a nullable union, get the schema for the non-nullable type. * This method does no checking that the provided Schema is nullable. If the provided * union schema is non-nullable, it simply returns the union schema */ public static Schema getOtherTypeFromNullableType(Schema unionSchema) { final List<Schema> types = unionSchema.getTypes(); if (types.size() == 2) { // most common scenario if (types.get(0).getType() == Schema.Type.NULL) { return types.get(1); } if (types.get(1).getType() == Schema.Type.NULL) { return types.get(0); } // not a nullable union return unionSchema; } final List<Schema> itemSchemas = new ArrayList<>(); for (Schema itemSchema : types) { if (!Schema.Type.NULL.equals(itemSchema.getType())) { itemSchemas.add(itemSchema); } } if (itemSchemas.size() > 1) { return Schema.createUnion(itemSchemas); } else { return itemSchemas.get(0); } }
public static String generateHiveDDL(Schema avroSchema, String tableName) { Schema.Type schemaType = avroSchema.getType(); StringBuilder sb = new StringBuilder("CREATE EXTERNAL TABLE IF NOT EXISTS "); sb.append(tableName); sb.append(" ("); if (Schema.Type.RECORD.equals(schemaType)) { List<String> hiveColumns = new ArrayList<>(); List<Schema.Field> fields = avroSchema.getFields(); if (fields != null) { hiveColumns.addAll( fields.stream().map(field -> field.name() + " " + getHiveTypeFromAvroType(field.schema())).collect(Collectors.toList())); } sb.append(StringUtils.join(hiveColumns, ", ")); sb.append(") STORED AS ORC"); return sb.toString(); } else { throw new IllegalArgumentException("Avro schema is of type " + schemaType.getName() + ", not RECORD"); } }
/** * For the schema that is a UNION type with NULL and Record type, it provides Records type. * @param inputSchema * @return */ private static Schema getActualRecord(Schema inputSchema) { if (Type.RECORD.equals(inputSchema.getType())) { return inputSchema; } Preconditions.checkArgument(Type.UNION.equals(inputSchema.getType()), "Nested schema is only support with either record or union type of null with record"); Preconditions.checkArgument(inputSchema.getTypes().size() <= 2, "For union type in nested record, it should only have NULL and Record type"); for (Schema inner : inputSchema.getTypes()) { if (Type.NULL.equals(inner.getType())) { continue; } Preconditions.checkArgument(Type.RECORD.equals(inner.getType()), "For union type in nested record, it should only have NULL and Record type"); return inner; } throw new IllegalArgumentException(inputSchema + " is not supported."); }
private static TypeInfo generateRecordTypeInfo(Schema schema, Set<Schema> seenSchemas) throws AvroSerdeException { assert schema.getType().equals(Schema.Type.RECORD); if (seenSchemas == null) { seenSchemas = Collections.newSetFromMap(new IdentityHashMap<Schema, Boolean>()); } else if (seenSchemas.contains(schema)) { throw new AvroSerdeException( "Recursive schemas are not supported. Recursive schema was " + schema .getFullName()); } seenSchemas.add(schema); List<Schema.Field> fields = schema.getFields(); List<String> fieldNames = new ArrayList<String>(fields.size()); List<TypeInfo> typeInfos = new ArrayList<TypeInfo>(fields.size()); for(int i = 0; i < fields.size(); i++) { fieldNames.add(i, fields.get(i).name()); typeInfos.add(i, generateTypeInfo(fields.get(i).schema(), seenSchemas)); } return TypeInfoFactory.getStructTypeInfo(fieldNames, typeInfos); }
/*** * Flatten Union Schema * @param schema Union Schema to flatten * @param shouldPopulateLineage If lineage information should be tagged in the field, this is true when we are * un-nesting fields * @param flattenComplexTypes Flatten complex types recursively other than Record and Option * @return Flattened Union Schema */ private Schema flattenUnion(Schema schema, boolean shouldPopulateLineage, boolean flattenComplexTypes) { Preconditions.checkNotNull(schema); Preconditions.checkArgument(Schema.Type.UNION.equals(schema.getType())); Schema flattenedSchema; List<Schema> flattenedUnionMembers = new ArrayList<>(); if (null != schema.getTypes() && schema.getTypes().size() > 0) { for (Schema oldUnionMember : schema.getTypes()) { if (flattenComplexTypes) { // It's member might still recursively contain records flattenedUnionMembers.add(flatten(oldUnionMember, shouldPopulateLineage, flattenComplexTypes)); } else { flattenedUnionMembers.add(oldUnionMember); } } } flattenedSchema = Schema.createUnion(flattenedUnionMembers); return flattenedSchema; }
private static void produceFlattenedHelper(Field field, Map<String, Type> flattened) throws SchemaConversionException { Schema actualSchema = determineType(field.schema()); if (Type.RECORD.equals(actualSchema.getType())) { Map<String, Type> map = flatten(actualSchema); for (Entry<String, Type> entry : map.entrySet()) { String key = String.format("%s" + AVRO_NESTED_COLUMN_DELIMITER + "%s", field.name(), entry.getKey()); Type existing = flattened.put(key, entry.getValue()); Preconditions.checkArgument(existing == null, "Duplicate name detected in Avro schema. Field: " + key); } return; } Type existing = flattened.put(field.name(), actualSchema.getType()); if (existing != null) { //No duplicate name allowed when flattening (not considering name space we don't have any assumption between namespace and actual database field name) throw new SchemaConversionException("Duplicate name detected in Avro schema. " + field.name()); } }
/*** * Check if the Avro Schema is of type OPTION * ie. [null, RECORD] or [RECORD, null] * @param schema Avro Schema to check * @return Optional Avro Record if schema is of type OPTION */ private static Optional<Schema> isOfOptionType(Schema schema) { Preconditions.checkNotNull(schema); // If not of type UNION, cant be an OPTION if (!Schema.Type.UNION.equals(schema.getType())) { return Optional.<Schema>absent(); } // If has more than two members, can't be an OPTION List<Schema> types = schema.getTypes(); if (null != types && types.size() == 2) { Schema first = types.get(0); Schema second = types.get(1); // One member should be of type NULL and other of type RECORD if (Schema.Type.NULL.equals(first.getType()) && Schema.Type.RECORD.equals(second.getType())) { return Optional.of(second); } else if (Schema.Type.RECORD.equals(first.getType()) && Schema.Type.NULL.equals(second.getType())) { return Optional.of(first); } } return Optional.<Schema>absent(); }
/*** * Check if the Avro Schema is of type OPTION * ie. [null, TYPE] or [TYPE, null] * @param schema Avro Schema to check * @return Optional Avro Typed data if schema is of type OPTION */ private static Optional<Schema> isOfOptionType(Schema schema) { Preconditions.checkNotNull(schema); // If not of type UNION, cant be an OPTION if (!Schema.Type.UNION.equals(schema.getType())) { return Optional.<Schema>absent(); } // If has more than two members, can't be an OPTION List<Schema> types = schema.getTypes(); if (null != types && types.size() == 2) { Schema first = types.get(0); Schema second = types.get(1); // One member should be of type NULL and other of non NULL type if (Schema.Type.NULL.equals(first.getType()) && !Schema.Type.NULL.equals(second.getType())) { return Optional.of(second); } else if (!Schema.Type.NULL.equals(first.getType()) && Schema.Type.NULL.equals(second.getType())) { return Optional.of(first); } } return Optional.<Schema>absent(); }
/** * Determine if an Avro schema is of type Union[T, NULL]. Avro supports nullable * types via a union of type T and null. This is a very common use case. * As such, we want to silently convert it to just T and allow the value to be null. * * When a Hive union type is used with AVRO, the schema type becomes * Union[NULL, T1, T2, ...]. The NULL in the union should be silently removed * * @return true if type represents Union[T, Null], false otherwise */ public static boolean isNullableType(Schema schema) { if (!schema.getType().equals(Schema.Type.UNION)) { return false; } List<Schema> itemSchemas = schema.getTypes(); if (itemSchemas.size() < 2) { return false; } for (Schema itemSchema : itemSchemas) { if (Schema.Type.NULL.equals(itemSchema.getType())) { return true; } } // [null, null] not allowed, so this check is ok. return false; }