for (TrieNode child : node.children.values()) { Schema recordSchema = getActualRecord(inputSchema); Field innerSrcField = recordSchema.getField(child.val); Preconditions.checkNotNull(innerSrcField, child.val + " does not exist under " + recordSchema); new Field(innerSrcField.name(), innerSrcField.schema(), innerSrcField.doc(), innerSrcField.defaultValue())); } else { Schema innerSrcSchema = innerSrcField.schema(); if (Type.UNION.equals(inputSchema.getType())) { Preconditions.checkArgument(inputSchema.getTypes().size() <= 2, "For union type in nested record, it should only have NULL and Record type");
public static TypeInfo getOrcField(Schema fieldSchema) throws IllegalArgumentException { Schema.Type fieldType = fieldSchema.getType(); List<Schema> unionFieldSchemas = fieldSchema.getTypes(); unionFieldSchema -> !Schema.Type.NULL.equals(unionFieldSchema.getType())) .map(NiFiOrcUtils::getOrcField) .collect(Collectors.toList()); return orcFields.get(0); } else { return TypeInfoFactory.getUnionTypeInfo(orcFields); return TypeInfoFactory.getListTypeInfo(getOrcField(fieldSchema.getElementType())); return TypeInfoFactory.getMapTypeInfo( getPrimitiveOrcTypeFromPrimitiveAvroType(Schema.Type.STRING), getOrcField(fieldSchema.getValueType())); List<TypeInfo> orcFields = new ArrayList<>(avroFields.size()); avroFields.forEach(avroField -> { String fieldName = avroField.name(); orcFieldNames.add(fieldName); orcFields.add(getOrcField(avroField.schema())); }); return TypeInfoFactory.getStructTypeInfo(orcFieldNames, orcFields); throw new IllegalArgumentException("Did not recognize Avro type " + fieldType.getName());
private static String extractAvroTypeFromUnion(Schema.Field field) { if (field.schema().getTypes().size() >= 3) { LOG.warn("Avro schema field " + field.name() + " has 3 or more types: using the first non-null type"); } for (Schema schema : field.schema().getTypes()) { if (!schema.getType().toString().equalsIgnoreCase("NULL")) { return schema.getType().toString(); } } String message = "Avro schema field " + field.name() + " is a union, but it does not contain a non-null field type."; LOG.error(message); throw new RuntimeException(message); }
if (topLevel && !schema.getType().equals(Schema.Type.RECORD)) { throw new IllegalArgumentException( String.format("Schema for table must be of type RECORD. Received type: %s for dataset %s", schema.getType(), datasetName)); switch (schema.getType()) { case RECORD: isFirst = true; columns.append(", \n"); String type = generateAvroToHiveColumnMapping(field.schema(), hiveColumns, false, datasetName); if (hiveColumns.isPresent()) { hiveColumns.get().put(field.name(), type); String flattenSource = field.getProp("flatten_source"); if (StringUtils.isBlank(flattenSource)) { flattenSource = field.name(); isFirst = true; for (Schema unionMember : schema.getTypes()) { if (Schema.Type.NULL.equals(unionMember.getType())) { continue; String.format("DDL query generation failed for \"%s\" of dataset %s", schema, datasetName); log.error(exceptionMessage); throw new AvroRuntimeException(exceptionMessage);
public Array(int capacity, Schema schema) { if (schema == null || !Type.ARRAY.equals(schema.getType())) throw new AvroRuntimeException("Not an array schema: "+schema); this.schema = schema; if (capacity != 0) elements = new Object[capacity]; } public Array(Schema schema, Collection<T> c) {
public Record(Schema schema) { if (schema == null || !Type.RECORD.equals(schema.getType())) throw new AvroRuntimeException("Not a record schema: "+schema); this.schema = schema; this.values = new Object[schema.getFields().size()]; } public Record(Record other, boolean deepCopy) {
public static Object toObject(JsonNode jsonNode, Schema schema) { if (schema != null && schema.getType().equals(Schema.Type.UNION)) { return toObject(jsonNode, schema.getTypes().get(0)); return jsonNode.asBoolean(); } else if (jsonNode.isInt()) { if (schema == null || schema.getType().equals(Schema.Type.INT)) { return jsonNode.asInt(); } else if (schema.getType().equals(Schema.Type.LONG)) { return jsonNode.asLong(); return jsonNode.asLong(); } else if (jsonNode.isDouble()) { if (schema == null || schema.getType().equals(Schema.Type.DOUBLE)) { return jsonNode.asDouble(); } else if (schema.getType().equals(Schema.Type.FLOAT)) { return (float) jsonNode.asDouble(); if (schema == null || schema.getType().equals(Schema.Type.STRING) || schema.getType().equals(Schema.Type.ENUM)) { return jsonNode.asText(); } else if (schema.getType().equals(Schema.Type.BYTES) || schema.getType().equals(Schema.Type.FIXED)) { try { return jsonNode.getTextValue().getBytes(BYTES_CHARSET); } catch (UnsupportedEncodingException e) { throw new AvroRuntimeException(e); if (schema == null) { s = null;
@Test public void testUnsignedIntShouldBeLong() throws SQLException, IllegalArgumentException, IllegalAccessException { final ResultSetMetaData metadata = mock(ResultSetMetaData.class); when(metadata.getColumnCount()).thenReturn(1); when(metadata.getColumnType(1)).thenReturn(Types.INTEGER); when(metadata.getPrecision(1)).thenReturn(10); when(metadata.isSigned(1)).thenReturn(false); when(metadata.getColumnName(1)).thenReturn("Col1"); when(metadata.getTableName(1)).thenReturn("Table1"); final ResultSet rs = mock(ResultSet.class); when(rs.getMetaData()).thenReturn(metadata); Schema schema = JdbcCommon.createSchema(rs); Assert.assertNotNull(schema); Schema.Field field = schema.getField("Col1"); Schema fieldSchema = field.schema(); Assert.assertEquals(2, fieldSchema.getTypes().size()); boolean foundLongSchema = false; boolean foundNullSchema = false; for (Schema type : fieldSchema.getTypes()) { if (type.getType().equals(Schema.Type.LONG)) { foundLongSchema = true; } else if (type.getType().equals(Schema.Type.NULL)) { foundNullSchema = true; } } assertTrue(foundLongSchema); assertTrue(foundNullSchema); }
private static TypeInfo generateUnionTypeInfo(Schema schema, Set<Schema> seenSchemas) throws AvroSerdeException { assert schema.getType().equals(Schema.Type.UNION); List<Schema> types = schema.getTypes(); List<TypeInfo> typeInfos = new ArrayList<TypeInfo>(types.size()); for(Schema type : types) { typeInfos.add(generateTypeInfo(type, seenSchemas)); } return TypeInfoFactory.getUnionTypeInfo(typeInfos); }
public static String generateHiveDDL(Schema avroSchema, String tableName) { Schema.Type schemaType = avroSchema.getType(); StringBuilder sb = new StringBuilder("CREATE EXTERNAL TABLE IF NOT EXISTS "); sb.append(tableName); sb.append(" ("); if (Schema.Type.RECORD.equals(schemaType)) { List<String> hiveColumns = new ArrayList<>(); List<Schema.Field> fields = avroSchema.getFields(); if (fields != null) { hiveColumns.addAll( fields.stream().map(field -> field.name() + " " + getHiveTypeFromAvroType(field.schema())).collect(Collectors.toList())); } sb.append(StringUtils.join(hiveColumns, ", ")); sb.append(") STORED AS ORC"); return sb.toString(); } else { throw new IllegalArgumentException("Avro schema is of type " + schemaType.getName() + ", not RECORD"); } }
Object memberValue = fromByteBuffer(fieldSchema.getElementType(), hColumn.getValue()); Object memberValue = null; if (fieldSchema.getValueType().getType().equals(Type.UNION)){ Field memberField = fieldSchema.getField(memberName); Schema memberSchema = memberField.schema(); Type memberType = memberSchema.getType(); cassandraColumn.setValue(hColumn); if (memberType.equals(Type.UNION)){ HColumn<ByteBuffer, ByteBuffer> hc = getUnionTypeColumn(memberField.name() + CassandraStore.UNION_COL_SUFIX, this.hSuperColumn.getColumns().toArray()); Integer unionIndex = getUnionIndex(memberField.name(),hc); cassandraColumn.setUnionType(unionIndex); LOG.warn("Type: " + type.name() + " not supported for field: " + field.name());
private static TypeInfo generateRecordTypeInfo(Schema schema, Set<Schema> seenSchemas) throws AvroSerdeException { assert schema.getType().equals(Schema.Type.RECORD); if (seenSchemas == null) { seenSchemas = Collections.newSetFromMap(new IdentityHashMap<Schema, Boolean>()); } else if (seenSchemas.contains(schema)) { throw new AvroSerdeException( "Recursive schemas are not supported. Recursive schema was " + schema .getFullName()); } seenSchemas.add(schema); List<Schema.Field> fields = schema.getFields(); List<String> fieldNames = new ArrayList<String>(fields.size()); List<TypeInfo> typeInfos = new ArrayList<TypeInfo>(fields.size()); for(int i = 0; i < fields.size(); i++) { fieldNames.add(i, fields.get(i).name()); typeInfos.add(i, generateTypeInfo(fields.get(i).schema(), seenSchemas)); } return TypeInfoFactory.getStructTypeInfo(fieldNames, typeInfos); }
StringBuilder builder = new StringBuilder(prefix + "_UNION_"); List<Schema> branches = schema.getTypes(); int branchCounter = branches.size(); switch (branchSchema.getType()) { case RECORD: builder.append(StyleUtils.toUpperUnderScore(branchSchema.getName())); break; case ARRAY: builder.append(branchSchema.getType().toString()); builder.append('_'); builder.append(StyleUtils.toUpperUnderScore(branchSchema.getElementType().getName())); break; default: builder.append(branchSchema.getType().toString()); break;
GenericRecord avroRecord = new GenericData.Record(outputSchema); JsonElementConversionWithAvroSchemaFactory.JsonElementConverter converter; for (Schema.Field field : outputSchema.getFields()) { if (ignoreFields.contains(field.name())) { continue; if (type.equals(Schema.Type.UNION)) { nullable = true; List<Schema> types = field.schema().getTypes(); if (types.size() != 2) { throw new DataConversionException("Unions must be size 2, and contain one null"); if (field.schema().getTypes().get(0).getType().equals(Schema.Type.NULL)) { schema = field.schema().getTypes().get(1); type = schema.getType(); } else if (field.schema().getTypes().get(1).getType().equals(Schema.Type.NULL)) { schema = field.schema().getTypes().get(0); type = schema.getType(); if (type.equals(Schema.Type.RECORD)) { if (nullable && inputRecord.get(field.name()).isJsonNull()) { avroRecord.put(field.name(), null); converter = JsonElementConversionWithAvroSchemaFactory.getConvertor(field.name(), type.getName(), schema, workUnit, nullable, ignoreFields); avroRecord.put(field.name(), converter.convert(inputRecord.get(field.name())));
this.newFields = new ArrayList<>(); List<String> mapFieldNames = new ArrayList<>(); for (Field field : gobblinTrackingEventSchema.getFields()) { String curFieldName = field.name(); if (!field.schema().getType().equals(Schema.Type.MAP)) { if (fieldsRenameMap.containsKey(curFieldName)) { newFields.add( new Schema.Field(fieldsRenameMap.get(curFieldName), field.schema(), field.doc(), field.defaultValue())); } else { newFields.add(new Schema.Field(curFieldName, field.schema(), field.doc(), field.defaultValue())); String newFieldName = this.fieldsRenameMap.containsKey(fieldToFlatten) ? this.fieldsRenameMap.get(fieldToFlatten) : fieldToFlatten; newFields.add(new Field(newFieldName, Schema.create(Schema.Type.STRING), "", null));
private static TypeInfo generateRecordTypeInfo(Schema schema) throws HaivvreoException { assert schema.getType().equals(Schema.Type.RECORD); List<Schema.Field> fields = schema.getFields(); List<String> fieldNames = new ArrayList<String>(fields.size()); List<TypeInfo> typeInfos = new ArrayList<TypeInfo>(fields.size()); for(int i = 0; i < fields.size(); i++) { fieldNames.add(i, fields.get(i).name()); typeInfos.add(i, generateTypeInfo(fields.get(i).schema())); } return TypeInfoFactory.getStructTypeInfo(fieldNames, typeInfos); }
private Schema checkSchema(Schema schema, boolean mustBeRecord) { if (!mustBeRecord) { if (!SUPPORTED_TYPES.contains(schema.getType())) { throw new IllegalArgumentException("Unsupported type: " + schema.getType()); if (schema.getType() != Schema.Type.RECORD) { return schema; for (Schema.Field f : schema.getFields()) { Schema fs = f.schema(); if (isNullableSchema(fs)) { fs = getNonNull(fs); if (!SUPPORTED_TYPES.contains(st)) { throw new IllegalArgumentException(String.format( "Unsupported type '%s' for field '%s'", st.toString(), f.name()));
private static void checkProp(Schema s0) throws Exception { if(s0.getType().equals(Schema.Type.UNION)) return; // unions have no props assertNull(s0.getProp("foo")); Schema s1 = Schema.parse(s0.toString()); s1.addProp("foo", "bar"); assertEquals("bar", s1.getProp("foo")); assertNotEquals(s0, s1); Schema s2 = Schema.parse(s1.toString()); assertEquals("bar", s2.getProp("foo")); assertEquals(s1, s2); assertNotEquals(s0, s2); }
protected Map.Entry<String, Schema> findUnionMember(DataSchema dataSchema, Schema avroSchema) { AvroOverride avroOverride = getAvroOverride(dataSchema); String key = (avroOverride == null ? dataSchema.getUnionMemberKey() : avroOverride.getAvroSchemaFullName()); List<Schema> members = avroSchema.getTypes(); for (Schema member : members) { String name; switch (member.getType()) { case ENUM: case FIXED: case RECORD: name = member.getFullName(); break; default: name = member.getType().toString().toLowerCase(); } if (name.equals(key)) return new AbstractMap.SimpleEntry<String, Schema>(name, member); } appendMessage("cannot find %1$s in union %2$s", key, avroSchema); return null; }
/** * Generate a TypeInfo for an Avro Map. This is made slightly simpler in that * Avro only allows maps with strings for keys. */ private static TypeInfo generateMapTypeInfo(Schema schema, Set<Schema> seenSchemas) throws AvroSerdeException { assert schema.getType().equals(Schema.Type.MAP); Schema valueType = schema.getValueType(); TypeInfo ti = generateTypeInfo(valueType, seenSchemas); return TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.getPrimitiveTypeInfo("string"), ti); }