/** * Parses the given string CTL schema along with its dependencies as an * {@link org.apache.avro.Schema Avro schema}. * * @param avroSchema A string CTL schema to parse * @return A parsed CTL schema as an Avro schema * @throws Exception - if the given CTL schema is invalid and thus cannot be parsed. */ public static Schema parseStringCtlSchema(String avroSchema) throws Exception { Schema.Parser parser = new Schema.Parser(); ObjectMapper mapper = new ObjectMapper(); JsonNode node = mapper.readTree(avroSchema); JsonNode dependenciesNode = node.get(DEPENDENCIES); if (dependenciesNode != null && dependenciesNode.isArray()) { Map<String, Schema> types = new HashMap<>(); for (int i = 0; i < dependenciesNode.size(); i++) { JsonNode dependencyNode = dependenciesNode.get(i); Fqn fqn = new Fqn(dependencyNode.get(FQN).asText()); Schema fakeSchema = SchemaBuilder .record(fqn.getName()).namespace(fqn.getNamespace()) .fields() .endRecord(); types.put(fqn.getFqnString(), fakeSchema); } parser.addTypes(types); } return parser.parse(avroSchema); }
@Test public void testSwitchNamespace() { String originalNamespace = "originalNamespace"; String originalName = "originalName"; String newNamespace = "newNamespace"; Schema schema = SchemaBuilder.builder(originalNamespace).record(originalName).fields(). requiredDouble("double").optionalFloat("float").endRecord(); Map<String, String> map = Maps.newHashMap(); map.put(originalNamespace, newNamespace); Schema newSchema = AvroUtils.switchNamespace(schema, map); Assert.assertEquals(newSchema.getNamespace(), newNamespace); Assert.assertEquals(newSchema.getName(), originalName); for(Schema.Field field : newSchema.getFields()) { Assert.assertEquals(field, schema.getField(field.name())); } }
@Test public void testFixedDefaultValueDrop() { Schema md5 = SchemaBuilder.builder().fixed("MD5").size(16); Schema frec = SchemaBuilder.builder().record("test") .fields().name("hash").type(md5).withDefault(new byte[16]).endRecord(); Schema.Field field = frec.getField("hash"); Assert.assertNotNull(field.defaultVal()); Assert.assertArrayEquals(new byte[16], (byte[]) field.defaultVal()); }
public static File generateEnumTypes(String filename, int nrows, String[][] categories) throws IOException { assert categories.length == 2 : "Needs only 2 columns"; File parentDir = Files.createTempDir(); File f = new File(parentDir, filename); DatumWriter<GenericRecord> w = new GenericDatumWriter<GenericRecord>(); DataFileWriter<GenericRecord> dw = new DataFileWriter<GenericRecord>(w); Schema enumSchema1 = SchemaBuilder.enumeration("CEnum1").symbols(categories[0]); Schema enumSchema2 = SchemaBuilder.enumeration("CEnum2").symbols(categories[1]); Schema schema = SchemaBuilder.builder() .record("test_enum_types").fields() .name("CEnum").type(enumSchema1).noDefault() .name("CUEnum").type().optional().type(enumSchema2) .endRecord(); System.out.println(schema); int numOfCategories1 = categories[0].length; int numOfCategories2 = categories[1].length; try { dw.create(schema, f); for (int i = 0; i < nrows; i++) { GenericRecord gr = new GenericData.Record(schema); gr.put("CEnum", new GenericData.EnumSymbol(enumSchema1, categories[0][i % numOfCategories1])); gr.put("CUEnum", i % (numOfCategories2+1) == 0 ? null : new GenericData.EnumSymbol(enumSchema2, categories[1][i % numOfCategories2])); dw.append(gr); } return f; } finally { dw.close();; } }
public static void createEmptyAvroStream(final OutputStream outStream) throws IOException { final FieldAssembler<Schema> builder = SchemaBuilder.record("NiFi_ExecuteSQL_Record").namespace("any.data").fields(); final Schema schema = builder.endRecord(); final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); try (final DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter)) { dataFileWriter.create(schema, outStream); } }
@SuppressWarnings("fallthrough") private Schema getGranularityBasedSchema() { FieldAssembler<Schema> assembler = SchemaBuilder.record("GenericRecordTimePartition").namespace("gobblin.writer.partitioner").fields(); // Construct the fields in reverse order if (!Strings.isNullOrEmpty(this.writerPartitionSuffix)) { assembler = assembler.name(SUFFIX).type(Schema.create(Schema.Type.STRING)).noDefault(); } assembler = assembler.name(this.granularity.toString()).type(Schema.create(Schema.Type.STRING)).noDefault(); if (!Strings.isNullOrEmpty(this.writerPartitionPrefix)) { assembler = assembler.name(PREFIX).type(Schema.create(Schema.Type.STRING)).noDefault(); } Schema schema = assembler.endRecord(); Collections.reverse(schema.getFields()); return schema; }
@Test public void testSwitchName() { String originalName = "originalName"; String newName = "newName"; Schema schema = SchemaBuilder.record(originalName).fields(). requiredDouble("double").optionalFloat("float").endRecord(); Schema newSchema = AvroUtils.switchName(schema, newName); Assert.assertEquals(newSchema.getName(), newName); for(Schema.Field field : newSchema.getFields()) { Assert.assertEquals(field, schema.getField(field.name())); } Assert.assertEquals(newName, AvroUtils.switchName(schema, newName).getName()); Assert.assertEquals(schema, AvroUtils.switchName(AvroUtils.switchName(schema, newName), schema.getName())); }
private Schema getDateTimeFormatBasedSchema() { FieldAssembler<Schema> assembler = SchemaBuilder.record("GenericRecordTimePartition").namespace("gobblin.writer.partitioner").fields(); if (!Strings.isNullOrEmpty(this.writerPartitionPrefix)) { assembler = assembler.name(PREFIX).type(Schema.create(Schema.Type.STRING)).noDefault(); } assembler = assembler.name(PARTITIONED_PATH).type(Schema.create(Schema.Type.STRING)).noDefault(); if (!Strings.isNullOrEmpty(this.writerPartitionSuffix)) { assembler = assembler.name(SUFFIX).type(Schema.create(Schema.Type.STRING)).noDefault(); } return assembler.endRecord(); }
@Test public void testSplitSchemaBuild() { Schema s = SchemaBuilder .record("HandshakeRequest") .namespace("org.apache.avro.ipc").fields() .name("clientProtocol").type().optional().stringType() .name("meta").type().optional().map().values().bytesType() .endRecord(); String schemaString = s.toString(); int mid = schemaString.length() / 2; Schema parsedStringSchema = new org.apache.avro.Schema.Parser().parse(s.toString()); Schema parsedArrayOfStringSchema = new org.apache.avro.Schema.Parser().parse (schemaString.substring(0, mid), schemaString.substring(mid)); assertNotNull(parsedStringSchema); assertNotNull(parsedArrayOfStringSchema); assertEquals(parsedStringSchema.toString(), parsedArrayOfStringSchema.toString()); }
@Test public void testWriteNullableUUIDReadRequiredString() throws IOException { Schema nullableUuidSchema = SchemaBuilder.record(RecordWithUUID.class.getName()) .fields().optionalString("uuid").endRecord(); LogicalTypes.uuid().addToSchema( nullableUuidSchema.getField("uuid").schema().getTypes().get(1)); UUID u1 = UUID.randomUUID(); UUID u2 = UUID.randomUUID(); RecordWithUUID r1 = new RecordWithUUID(); r1.uuid = u1; RecordWithUUID r2 = new RecordWithUUID(); r2.uuid = u2; List<RecordWithStringUUID> expected = Arrays.asList( new RecordWithStringUUID(), new RecordWithStringUUID()); expected.get(0).uuid = u1.toString(); expected.get(1).uuid = u2.toString(); File test = write(REFLECT, nullableUuidSchema, r1, r2); // verify that the field's type overrides the logical type Schema uuidStringSchema = SchemaBuilder .record(RecordWithStringUUID.class.getName()) .fields().requiredString("uuid").endRecord(); Assert.assertEquals("Should read uuid as String without UUID conversion", expected, read(REFLECT.createDatumReader(uuidStringSchema), test)); }
@Test public void testUnionWithNull() { Schema nestedRecord = SchemaBuilder.record("nested").fields().requiredDouble("double") .requiredString("string").endRecord(); Schema union = SchemaBuilder.unionOf().nullType().and().type(nestedRecord).endUnion(); Schema schema = SchemaBuilder.record("record").fields().name("union").type(union).noDefault().endRecord(); Schema doubleSchema = AvroUtils.getFieldSchema(schema, "union.double").get(); Assert.assertEquals(doubleSchema.getType(), Schema.Type.DOUBLE); GenericRecord nested = new GenericData.Record(nestedRecord); nested.put("double", 10); nested.put("string", "testString"); GenericRecord record = new GenericData.Record(schema); record.put("union", nested); String stringValue = AvroUtils.getFieldValue(record, "union.string").get().toString(); Assert.assertEquals(stringValue, "testString"); } }
private void shouldDeserializeTypeCorrectly(final org.apache.avro.Schema avroSchema, final Object avroValue, final Schema ksqlSchema, final Object ksqlValue) { final SchemaRegistryClient schemaRegistryClient = new MockSchemaRegistryClient(); final org.apache.avro.Schema avroRecordSchema = org.apache.avro.SchemaBuilder.record("test_row") .fields() .name("field0") .type(avroSchema) .noDefault() .endRecord(); final Schema ksqlRecordSchema = SchemaBuilder.struct().field("field0", ksqlSchema).build(); final GenericRecord avroRecord = new GenericData.Record(avroRecordSchema); avroRecord.put("field0", avroValue); final GenericRow row = serializeDeserializeAvroRecord( ksqlRecordSchema, "test-topic", schemaRegistryClient, avroRecord); assertThat(row.getColumns().size(), equalTo(1)); assertThat(row.getColumns().get(0), equalTo(ksqlValue)); }
@Test public void testRecursiveRecord() { Schema schema = SchemaBuilder.record("LongList").fields() .name("value").type().longType().noDefault() .name("next").type().optional().type("LongList") .endRecord(); Assert.assertEquals("LongList", schema.getName()); List<Schema.Field> fields = schema.getFields(); Assert.assertEquals(2, fields.size()); Assert.assertEquals( new Schema.Field("value", Schema.create(Schema.Type.LONG), null, null), fields.get(0)); Assert.assertEquals( Schema.Type.UNION, fields.get(1).schema().getType()); Assert.assertEquals( Schema.Type.NULL, fields.get(1).schema().getTypes().get(0).getType()); Schema recordSchema = fields.get(1).schema().getTypes().get(1); Assert.assertEquals(Schema.Type.RECORD, recordSchema.getType()); Assert.assertEquals("LongList", recordSchema.getName()); Assert.assertEquals(NullNode.getInstance(), fields.get(1).defaultValue()); }
@Test public void testWriteNullableUUID() throws IOException { Schema nullableUuidSchema = SchemaBuilder.record(RecordWithUUID.class.getName()) .fields().optionalString("uuid").endRecord(); LogicalTypes.uuid().addToSchema( nullableUuidSchema.getField("uuid").schema().getTypes().get(1)); UUID u1 = UUID.randomUUID(); UUID u2 = UUID.randomUUID(); RecordWithUUID r1 = new RecordWithUUID(); r1.uuid = u1; RecordWithUUID r2 = new RecordWithUUID(); r2.uuid = u2; List<RecordWithStringUUID> expected = Arrays.asList( new RecordWithStringUUID(), new RecordWithStringUUID()); expected.get(0).uuid = u1.toString(); expected.get(1).uuid = u2.toString(); File test = write(REFLECT, nullableUuidSchema, r1, r2); // verify that the field's type overrides the logical type Schema nullableUuidStringSchema = SchemaBuilder .record(RecordWithStringUUID.class.getName()) .fields().optionalString("uuid").endRecord(); Assert.assertEquals("Should read uuid as String without UUID conversion", expected, read(ReflectData.get().createDatumReader(nullableUuidStringSchema), test)); }