public static Schema getSchemaFromCols(Properties properties, List<String> columnNames, List<TypeInfo> columnTypes, String columnCommentProperty) { List<String> columnComments; if (columnCommentProperty == null || columnCommentProperty.isEmpty()) { columnComments = new ArrayList<String>(); } else { //Comments are separated by "\0" in columnCommentProperty, see method getSchema //in MetaStoreUtils where this string columns.comments is generated columnComments = Arrays.asList(columnCommentProperty.split("\0")); if (LOG.isDebugEnabled()) { LOG.debug("columnComments is " + columnCommentProperty); } } if (columnNames.size() != columnTypes.size()) { throw new IllegalArgumentException("AvroSerde initialization failed. Number of column " + "name and column type differs. columnNames = " + columnNames + ", columnTypes = " + columnTypes); } final String tableName = properties.getProperty(TABLE_NAME); final String tableComment = properties.getProperty(TABLE_COMMENT); TypeInfoToSchema typeInfoToSchema = new TypeInfoToSchema(); return typeInfoToSchema.convert(columnNames, columnTypes, columnComments, properties.getProperty(AvroSerdeUtils.AvroTableProperties.SCHEMA_NAMESPACE.getPropName()), properties.getProperty(AvroSerdeUtils.AvroTableProperties.SCHEMA_NAME.getPropName(), tableName), properties.getProperty(AvroSerdeUtils.AvroTableProperties.SCHEMA_DOC.getPropName(), tableComment)); }
private Schema createAvroSchema(TypeInfo typeInfo) { Schema schema = null; switch (typeInfo.getCategory()) { case PRIMITIVE: schema = createAvroPrimitive(typeInfo); break; case LIST: schema = createAvroArray(typeInfo); break; case MAP: schema = createAvroMap(typeInfo); break; case STRUCT: schema = createAvroRecord(typeInfo); break; case UNION: schema = createAvroUnion(typeInfo); break; } return wrapInUnionWithNull(schema); }
/** * Converts Hive schema to avro schema * * @param columnNames Names of the hive columns * @param columnTypes Hive Column types * @param namespace Namespace of Avro schema * @param name Avro schema name * @param doc Avro schema doc * @return Avro Schema */ public Schema convert(List<String> columnNames, List<TypeInfo> columnTypes, List<String> columnComments, String namespace, String name, String doc) { List<Schema.Field> fields = new ArrayList<Schema.Field>(); for (int i = 0; i < columnNames.size(); ++i) { final String comment = columnComments.size() > i ? columnComments.get(i) : null; final Schema.Field avroField = createAvroField(columnNames.get(i), columnTypes.get(i), comment); fields.addAll(getFields(avroField)); } if (name == null || name.isEmpty()) { name = "baseRecord"; } Schema avroSchema = Schema.createRecord(name, doc, namespace, false); avroSchema.setFields(fields); return avroSchema; }
private Schema createAvroUnion(TypeInfo typeInfo) { List<Schema> childSchemas = new ArrayList<Schema>(); for (TypeInfo childTypeInfo : ((UnionTypeInfo) typeInfo).getAllUnionObjectTypeInfos()) { final Schema childSchema = createAvroSchema(childTypeInfo); if (childSchema.getType() == Schema.Type.UNION) { childSchemas.addAll(childSchema.getTypes()); } else { childSchemas.add(childSchema); } } return Schema.createUnion(removeDuplicateNullSchemas(childSchemas)); }
private Schema.Field createAvroField(String name, TypeInfo typeInfo, String comment) { return new Schema.Field(name, createAvroSchema(typeInfo), comment, null); }
public AvroSchemaGenerator() { this.typeInfoToSchema = new TypeInfoToSchema(); }
public Schema getSchema(String columnNamesStr, String columnTypesStr, String columnCommentsStr, String namespace, String name, String doc) { List<String> columnNames = Arrays.asList(columnNamesStr.split(",")); List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypesStr); List<String> columnComments; if (columnCommentsStr.isEmpty()) { columnComments = new ArrayList<String>(); } else { columnComments = Arrays.asList(columnCommentsStr.split(",")); } return typeInfoToSchema.convert(columnNames, columnTypes, columnComments, namespace, name, doc); } }
private Schema createAvroUnion(TypeInfo typeInfo) { List<Schema> childSchemas = new ArrayList<Schema>(); for (TypeInfo childTypeInfo : ((UnionTypeInfo) typeInfo).getAllUnionObjectTypeInfos()) { final Schema childSchema = createAvroSchema(childTypeInfo); if (childSchema.getType() == Schema.Type.UNION) { childSchemas.addAll(childSchema.getTypes()); } else { childSchemas.add(childSchema); } } return Schema.createUnion(removeDuplicateNullSchemas(childSchemas)); }
private Schema createAvroArray(TypeInfo typeInfo) { ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo; Schema listSchema = createAvroSchema(listTypeInfo.getListElementTypeInfo()); return Schema.createArray(listSchema); }
private Schema createAvroSchema(TypeInfo typeInfo) { Schema schema = null; switch (typeInfo.getCategory()) { case PRIMITIVE: schema = createAvroPrimitive(typeInfo); break; case LIST: schema = createAvroArray(typeInfo); break; case MAP: schema = createAvroMap(typeInfo); break; case STRUCT: schema = createAvroRecord(typeInfo); break; case UNION: schema = createAvroUnion(typeInfo); break; } return wrapInUnionWithNull(schema); }
.collect(Collectors.toList()); List<String> comments = fields.stream().map(fs -> fs.getComment()).collect(Collectors.toList()); Schema schema = new TypeInfoToSchema().convert(colNames, typeInfos, comments, null, null, null); return getOrGenerateSchemaFile(schema);
private Schema createAvroRecord(TypeInfo typeInfo) { List<Schema.Field> childFields = new ArrayList<Schema.Field>(); final List<String> allStructFieldNames = ((StructTypeInfo) typeInfo).getAllStructFieldNames(); final List<TypeInfo> allStructFieldTypeInfos = ((StructTypeInfo) typeInfo).getAllStructFieldTypeInfos(); if (allStructFieldNames.size() != allStructFieldTypeInfos.size()) { throw new IllegalArgumentException("Failed to generate avro schema from hive schema. " + "name and column type differs. names = " + allStructFieldNames + ", types = " + allStructFieldTypeInfos); } for (int i = 0; i < allStructFieldNames.size(); ++i) { final TypeInfo childTypeInfo = allStructFieldTypeInfos.get(i); final Schema.Field grandChildSchemaField = createAvroField(allStructFieldNames.get(i), childTypeInfo, childTypeInfo.toString()); final List<Schema.Field> grandChildFields = getFields(grandChildSchemaField); childFields.addAll(grandChildFields); } Schema recordSchema = Schema.createRecord("record_" + recordCounter, typeInfo.toString(), null, false); ++recordCounter; recordSchema.setFields(childFields); return recordSchema; }
private Schema createAvroUnion(TypeInfo typeInfo) { List<Schema> childSchemas = new ArrayList<Schema>(); for (TypeInfo childTypeInfo : ((UnionTypeInfo) typeInfo).getAllUnionObjectTypeInfos()) { final Schema childSchema = createAvroSchema(childTypeInfo); if (childSchema.getType() == Schema.Type.UNION) { childSchemas.addAll(childSchema.getTypes()); } else { childSchemas.add(childSchema); } } return Schema.createUnion(removeDuplicateNullSchemas(childSchemas)); }
private Schema createAvroMap(TypeInfo typeInfo) { TypeInfo keyTypeInfo = ((MapTypeInfo) typeInfo).getMapKeyTypeInfo(); if (((PrimitiveTypeInfo) keyTypeInfo).getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) { throw new UnsupportedOperationException("Key of Map can only be a String"); } TypeInfo valueTypeInfo = ((MapTypeInfo) typeInfo).getMapValueTypeInfo(); Schema valueSchema = createAvroSchema(valueTypeInfo); return Schema.createMap(valueSchema); }
private Schema createAvroSchema(TypeInfo typeInfo) { Schema schema = null; switch (typeInfo.getCategory()) { case PRIMITIVE: schema = createAvroPrimitive(typeInfo); break; case LIST: schema = createAvroArray(typeInfo); break; case MAP: schema = createAvroMap(typeInfo); break; case STRUCT: schema = createAvroRecord(typeInfo); break; case UNION: schema = createAvroUnion(typeInfo); break; } return wrapInUnionWithNull(schema); }
public static Schema getSchemaFromCols(Properties properties, List<String> columnNames, List<TypeInfo> columnTypes, String columnCommentProperty) { List<String> columnComments; if (columnCommentProperty == null || columnCommentProperty.isEmpty()) { columnComments = new ArrayList<String>(); } else { //Comments are separated by "\0" in columnCommentProperty, see method getSchema //in MetaStoreUtils where this string columns.comments is generated columnComments = Arrays.asList(columnCommentProperty.split("\0")); LOG.info("columnComments is " + columnCommentProperty); } if (columnNames.size() != columnTypes.size()) { throw new IllegalArgumentException("AvroSerde initialization failed. Number of column " + "name and column type differs. columnNames = " + columnNames + ", columnTypes = " + columnTypes); } final String tableName = properties.getProperty(TABLE_NAME); final String tableComment = properties.getProperty(TABLE_COMMENT); TypeInfoToSchema typeInfoToSchema = new TypeInfoToSchema(); return typeInfoToSchema.convert(columnNames, columnTypes, columnComments, properties.getProperty(AvroSerdeUtils.AvroTableProperties.SCHEMA_NAMESPACE.getPropName()), properties.getProperty(AvroSerdeUtils.AvroTableProperties.SCHEMA_NAME.getPropName(), tableName), properties.getProperty(AvroSerdeUtils.AvroTableProperties.SCHEMA_DOC.getPropName(), tableComment)); }
/** * Converts Hive schema to avro schema * * @param columnNames Names of the hive columns * @param columnTypes Hive Column types * @param namespace Namespace of Avro schema * @param name Avro schema name * @param doc Avro schema doc * @return Avro Schema */ public Schema convert(List<String> columnNames, List<TypeInfo> columnTypes, List<String> columnComments, String namespace, String name, String doc) { List<Schema.Field> fields = new ArrayList<Schema.Field>(); for (int i = 0; i < columnNames.size(); ++i) { final String comment = columnComments.size() > i ? columnComments.get(i) : null; final Schema.Field avroField = createAvroField(columnNames.get(i), columnTypes.get(i), comment); fields.addAll(getFields(avroField)); } if (name == null || name.isEmpty()) { name = "baseRecord"; } Schema avroSchema = Schema.createRecord(name, doc, namespace, false); avroSchema.setFields(fields); return avroSchema; }
private Schema createAvroUnion(TypeInfo typeInfo) { List<Schema> childSchemas = new ArrayList<Schema>(); for (TypeInfo childTypeInfo : ((UnionTypeInfo) typeInfo).getAllUnionObjectTypeInfos()) { final Schema childSchema = createAvroSchema(childTypeInfo); if (childSchema.getType() == Schema.Type.UNION) { childSchemas.addAll(childSchema.getTypes()); } else { childSchemas.add(childSchema); } } return Schema.createUnion(removeDuplicateNullSchemas(childSchemas)); }
private Schema.Field createAvroField(String name, TypeInfo typeInfo, String comment) { return new Schema.Field(name, createAvroSchema(typeInfo), comment, null); }
private Schema createAvroSchema(TypeInfo typeInfo) { Schema schema = null; switch (typeInfo.getCategory()) { case PRIMITIVE: schema = createAvroPrimitive(typeInfo); break; case LIST: schema = createAvroArray(typeInfo); break; case MAP: schema = createAvroMap(typeInfo); break; case STRUCT: schema = createAvroRecord(typeInfo); break; case UNION: schema = createAvroUnion(typeInfo); break; } return wrapInUnionWithNull(schema); }