/** * Parses the given string CTL schema along with its dependencies as an * {@link org.apache.avro.Schema Avro schema}. * * @param avroSchema A string CTL schema to parse * @return A parsed CTL schema as an Avro schema * @throws Exception - if the given CTL schema is invalid and thus cannot be parsed. */ public static Schema parseStringCtlSchema(String avroSchema) throws Exception { Schema.Parser parser = new Schema.Parser(); ObjectMapper mapper = new ObjectMapper(); JsonNode node = mapper.readTree(avroSchema); JsonNode dependenciesNode = node.get(DEPENDENCIES); if (dependenciesNode != null && dependenciesNode.isArray()) { Map<String, Schema> types = new HashMap<>(); for (int i = 0; i < dependenciesNode.size(); i++) { JsonNode dependencyNode = dependenciesNode.get(i); Fqn fqn = new Fqn(dependencyNode.get(FQN).asText()); Schema fakeSchema = SchemaBuilder .record(fqn.getName()).namespace(fqn.getNamespace()) .fields() .endRecord(); types.put(fqn.getFqnString(), fakeSchema); } parser.addTypes(types); } return parser.parse(avroSchema); }
@SuppressWarnings("fallthrough") private Schema getGranularityBasedSchema() { FieldAssembler<Schema> assembler = SchemaBuilder.record("GenericRecordTimePartition").namespace("gobblin.writer.partitioner").fields(); // Construct the fields in reverse order if (!Strings.isNullOrEmpty(this.writerPartitionSuffix)) { assembler = assembler.name(SUFFIX).type(Schema.create(Schema.Type.STRING)).noDefault(); } assembler = assembler.name(this.granularity.toString()).type(Schema.create(Schema.Type.STRING)).noDefault(); if (!Strings.isNullOrEmpty(this.writerPartitionPrefix)) { assembler = assembler.name(PREFIX).type(Schema.create(Schema.Type.STRING)).noDefault(); } Schema schema = assembler.endRecord(); Collections.reverse(schema.getFields()); return schema; }
@Test public void testSplitSchemaBuild() { Schema s = SchemaBuilder .record("HandshakeRequest") .namespace("org.apache.avro.ipc").fields() .name("clientProtocol").type().optional().stringType() .name("meta").type().optional().map().values().bytesType() .endRecord(); String schemaString = s.toString(); int mid = schemaString.length() / 2; Schema parsedStringSchema = new org.apache.avro.Schema.Parser().parse(s.toString()); Schema parsedArrayOfStringSchema = new org.apache.avro.Schema.Parser().parse (schemaString.substring(0, mid), schemaString.substring(mid)); assertNotNull(parsedStringSchema); assertNotNull(parsedArrayOfStringSchema); assertEquals(parsedStringSchema.toString(), parsedArrayOfStringSchema.toString()); }
@Test public void testNamespaces() { Schema s1 = SchemaBuilder.record("myrecord") .namespace("org.example") .fields() .name("myint").type().intType().noDefault() .endRecord(); Schema s3 = SchemaBuilder.record("org.example.myrecord") .namespace("org.example2") .fields() .name("myint").type().intType().noDefault()
@Test public void testSingleSubRecordExtraField() throws IOException { final Schema child = SchemaBuilder.record("Child") .namespace("org.apache.avro.nested") .fields() .requiredString("childField").endRecord(); final Schema parent = SchemaBuilder.record("Parent") .namespace("org.apache.avro.nested") .fields() .requiredString("parentField1") .name("child1").type(child).noDefault() .requiredString("parentField2").endRecord(); final String inputAsExpected = "{\n" + " \"parentField1\": \"parentValue1\",\n" + " \"child1\":{\n" + " \"childField\":\"childValue1\",\n" + //this field should be safely ignored " \"extraField\":\"extraValue\"\n" + " },\n" + " \"parentField2\":\"parentValue2\"\n" + "}"; final ByteArrayInputStream inputStream = new ByteArrayInputStream(inputAsExpected.getBytes()); final JsonDecoder decoder = DecoderFactory.get().jsonDecoder(parent, inputStream); final DatumReader<Object> reader = new GenericDatumReader<Object>(parent); final GenericData.Record decoded = (GenericData.Record) reader.read(null, decoder); assertThat(decoded.get("parentField1").toString(), equalTo("parentValue1")); assertThat(decoded.get("parentField2").toString(), equalTo("parentValue2")); assertThat(((GenericData.Record)decoded.get("child1")).get("childField").toString(), equalTo("childValue1")); }
@Test public void testRecord() { Schema schema = SchemaBuilder .record("myrecord").namespace("org.example").aliases("oldrecord").fields() .name("f0").aliases("f0alias").type().stringType().noDefault() .name("f1").doc("This is f1").type().longType().noDefault() .name("f2").type().nullable().booleanType().booleanDefault(true) .endRecord(); Assert.assertEquals("myrecord", schema.getName()); Assert.assertEquals("org.example", schema.getNamespace()); Assert.assertEquals("org.example.oldrecord", schema.getAliases().iterator().next()); Assert.assertFalse(schema.isError()); List<Schema.Field> fields = schema.getFields(); Assert.assertEquals(3, fields.size()); Assert.assertEquals( new Schema.Field("f0", Schema.create(Schema.Type.STRING), null, null), fields.get(0)); Assert.assertTrue(fields.get(0).aliases().contains("f0alias")); Assert.assertEquals( new Schema.Field("f1", Schema.create(Schema.Type.LONG), "This is f1", null), fields.get(1)); List<Schema> types = new ArrayList<>(); types.add(Schema.create(Schema.Type.BOOLEAN)); types.add(Schema.create(Schema.Type.NULL)); Schema optional = Schema.createUnion(types); Assert.assertEquals(new Schema.Field("f2", optional, null, true), fields.get(2)); }
@Test public void testSingleSubRecord() throws IOException { final Schema child = SchemaBuilder.record("Child") .namespace("org.apache.avro.nested") .fields() .requiredString("childField").endRecord(); final Schema parent = SchemaBuilder.record("Parent") .namespace("org.apache.avro.nested") .fields() .requiredString("parentField1") .name("child1").type(child).noDefault() .requiredString("parentField2").endRecord(); final String inputAsExpected = "{\n" + " \"parentField1\": \"parentValue1\",\n" + " \"child1\":{\n" + " \"childField\":\"childValue1\"\n" + " },\n" + " \"parentField2\":\"parentValue2\"\n" + "}"; final ByteArrayInputStream inputStream = new ByteArrayInputStream(inputAsExpected.getBytes()); final JsonDecoder decoder = DecoderFactory.get().jsonDecoder(parent, inputStream); final DatumReader<Object> reader = new GenericDatumReader<Object>(parent); final GenericData.Record decoded = (GenericData.Record) reader.read(null, decoder); assertThat(decoded.get("parentField1").toString(), equalTo("parentValue1")); assertThat(decoded.get("parentField2").toString(), equalTo("parentValue2")); assertThat(((GenericData.Record)decoded.get("child1")).get("childField").toString(), equalTo("childValue1")); }
@Test public void testConfluentAvroDeserializerForSchemaEvolution() throws IOException, RestClientException, SchemaRegistryException { WorkUnitState mockWorkUnitState = getMockWorkUnitState(0L, 10L); mockWorkUnitState.setProp("schema.registry.url", TEST_URL); Schema schemaV1 = SchemaBuilder.record(TEST_RECORD_NAME) .namespace(TEST_NAMESPACE).fields() .name(TEST_FIELD_NAME).type().stringType().noDefault() .endRecord(); Schema schemaV2 = SchemaBuilder.record(TEST_RECORD_NAME) .namespace(TEST_NAMESPACE).fields() .name(TEST_FIELD_NAME).type().stringType().noDefault() .optionalString(TEST_FIELD_NAME2).endRecord(); GenericRecord testGenericRecord = new GenericRecordBuilder(schemaV1).set(TEST_FIELD_NAME, "testValue").build(); SchemaRegistryClient mockSchemaRegistryClient = mock(SchemaRegistryClient.class); when(mockSchemaRegistryClient.getByID(any(Integer.class))).thenReturn(schemaV1); Serializer<Object> kafkaEncoder = new KafkaAvroSerializer(mockSchemaRegistryClient); Deserializer<Object> kafkaDecoder = new KafkaAvroDeserializer(mockSchemaRegistryClient); ByteBuffer testGenericRecordByteBuffer = ByteBuffer.wrap(kafkaEncoder.serialize(TEST_TOPIC_NAME, testGenericRecord)); KafkaSchemaRegistry<Integer, Schema> mockKafkaSchemaRegistry = mock(KafkaSchemaRegistry.class); when(mockKafkaSchemaRegistry.getLatestSchemaByTopic(TEST_TOPIC_NAME)).thenReturn(schemaV2); KafkaDeserializerExtractor kafkaDecoderExtractor = new KafkaDeserializerExtractor(mockWorkUnitState, Optional.fromNullable(Deserializers.CONFLUENT_AVRO), kafkaDecoder, mockKafkaSchemaRegistry); when(kafkaDecoderExtractor.getSchema()).thenReturn(schemaV2); ByteArrayBasedKafkaRecord mockMessageAndOffset = getMockMessageAndOffset(testGenericRecordByteBuffer); GenericRecord received = (GenericRecord) kafkaDecoderExtractor.decodeRecord(mockMessageAndOffset); Assert.assertEquals(received.toString(), "{\"testField\": \"testValue\", \"testField2\": null}"); }
final org.apache.avro.Schema entrySchema = org.apache.avro.SchemaBuilder.record("MapEntry") .namespace("io.confluent.connect.avro") .fields() .name("key")
@Test public void testRecordMissingRequiredFieldError() throws Exception { Schema schemaWithoutField = SchemaBuilder .record("MyRecord").namespace("ns") .fields() .name("field1").type().stringType().noDefault() .endRecord(); Schema schemaWithField = SchemaBuilder .record("MyRecord").namespace("ns") .fields() .name("field1").type().stringType().noDefault() .name("field2").type().stringType().noDefault() .endRecord(); GenericData.Record record = new GenericRecordBuilder(schemaWithoutField).set("field1", "someValue").build(); byte[] data = writeRecord(schemaWithoutField, record); try { readRecord(schemaWithField, data); Assert.fail("Expected exception not thrown"); } catch (AvroTypeException typeException) { Assert.assertEquals("Incorrect exception message", "Found ns.MyRecord, expecting ns.MyRecord, missing required field field2", typeException.getMessage()); } }
@Test public void shouldSerializeStruct() { final org.apache.avro.Schema avroSchema = org.apache.avro.SchemaBuilder.record(KsqlConstants.AVRO_SCHEMA_NAME + "_field0") .namespace(KsqlConstants.AVRO_SCHEMA_NAMESPACE) .fields() .name("field1") .type().unionOf().nullType().and().intType().endUnion() .nullDefault() .name("field2") .type().unionOf().nullType().and().stringType().endUnion() .nullDefault() .endRecord(); final GenericRecord avroValue = new GenericData.Record(avroSchema); avroValue.put("field1", 123); avroValue.put("field2", "foobar"); final Schema ksqlSchema = SchemaBuilder.struct() .field("field1", Schema.OPTIONAL_INT32_SCHEMA) .field("field2", Schema.OPTIONAL_STRING_SCHEMA) .optional() .build(); final Struct value = new Struct(ksqlSchema); value.put("field1", 123); value.put("field2", "foobar"); shouldSerializeTypeCorrectly(ksqlSchema, value, avroSchema, avroValue); }
@Test public void testConfluentAvroDeserializer() throws IOException, RestClientException { WorkUnitState mockWorkUnitState = getMockWorkUnitState(0L,10L); mockWorkUnitState.setProp("schema.registry.url", TEST_URL); Schema schema = SchemaBuilder.record(TEST_RECORD_NAME) .namespace(TEST_NAMESPACE).fields() .name(TEST_FIELD_NAME).type().stringType().noDefault() .endRecord(); GenericRecord testGenericRecord = new GenericRecordBuilder(schema).set(TEST_FIELD_NAME, "testValue").build(); SchemaRegistryClient mockSchemaRegistryClient = mock(SchemaRegistryClient.class); when(mockSchemaRegistryClient.getByID(any(Integer.class))).thenReturn(schema); Serializer<Object> kafkaEncoder = new KafkaAvroSerializer(mockSchemaRegistryClient); Deserializer<Object> kafkaDecoder = new KafkaAvroDeserializer(mockSchemaRegistryClient); ByteBuffer testGenericRecordByteBuffer = ByteBuffer.wrap(kafkaEncoder.serialize(TEST_TOPIC_NAME, testGenericRecord)); KafkaSchemaRegistry<Integer, Schema> mockKafkaSchemaRegistry = mock(KafkaSchemaRegistry.class); KafkaDeserializerExtractor kafkaDecoderExtractor = new KafkaDeserializerExtractor(mockWorkUnitState, Optional.fromNullable(Deserializers.CONFLUENT_AVRO), kafkaDecoder, mockKafkaSchemaRegistry); ByteArrayBasedKafkaRecord mockMessageAndOffset = getMockMessageAndOffset(testGenericRecordByteBuffer); Assert.assertEquals(kafkaDecoderExtractor.decodeRecord(mockMessageAndOffset), testGenericRecord); }
private static org.apache.avro.Schema buildAvroSchema( final String namespace, final String name, final Schema schema ) { final String avroName = avroify(name); final FieldAssembler<org.apache.avro.Schema> fieldAssembler = org.apache.avro.SchemaBuilder .record(avroName).namespace(namespace) .fields(); for (final Field field : schema.fields()) { final String fieldName = avroify(field.name()); final String fieldNamespace = namespace + "." + avroName; fieldAssembler .name(fieldName) .type(getAvroSchemaForField(fieldNamespace, fieldName, field.schema())) .withDefault(null); } return fieldAssembler.endRecord(); }
public DayPartitioner(State state, int numBranches, int branchId) { _withColumnNames = state.getPropAsBoolean(GoggleIngestionConfigurationKeys.KEY_INCLUDE_COLUMN_NAMES, false); _prefix = state.getProp(GoggleIngestionConfigurationKeys.KEY_PARTITIONER_PREFIX); _withPrefix = StringUtils.isNotBlank(_prefix); _dateColumn = state.getProp(GoggleIngestionConfigurationKeys.KEY_DATE_COLUMN_NAME, DEFAULT_DATE_COLUMN); _dateFormatter = DateTimeFormat.forPattern(state.getProp(GoggleIngestionConfigurationKeys.KEY_DATE_FORMAT, DEFAULT_DATE_FORMAT)); SchemaBuilder.FieldAssembler<Schema> assembler = SchemaBuilder.record(NAME).namespace(NAME_SPACE).fields(); Schema stringType = Schema.create(Schema.Type.STRING); if (_withPrefix) { assembler = assembler.name(PARTITION_COLUMN_PREFIX).type(stringType).noDefault(); } _partitionSchema = assembler.name(PARTITION_COLUMN_YEAR).type(stringType).noDefault().name(PARTITION_COLUMN_MONTH).type(stringType) .noDefault().name(PARTITION_COLUMN_DAY).type(stringType).noDefault().endRecord(); }
private void doTestRegisterAndGetLatest(Properties properties) throws SchemaRegistryException { SchemaRegistryClient schemaRegistryClient = new MockSchemaRegistryClient(); KafkaSchemaRegistry<Integer, Schema> kafkaSchemaRegistry = new ConfluentKafkaSchemaRegistry(properties, schemaRegistryClient); Schema schema1 = SchemaBuilder.record(TEST_RECORD_NAME + "1").namespace(TEST_NAMESPACE).fields().name(TEST_FIELD_NAME).type() .stringType().noDefault().endRecord(); Schema schema2 = SchemaBuilder.record(TEST_RECORD_NAME + "2").namespace(TEST_NAMESPACE).fields().name(TEST_FIELD_NAME).type() .stringType().noDefault().endRecord(); kafkaSchemaRegistry.register(schema1, TEST_TOPIC_NAME); kafkaSchemaRegistry.register(schema2, TEST_TOPIC_NAME); Assert.assertNotEquals(schema1, kafkaSchemaRegistry.getLatestSchemaByTopic(TEST_TOPIC_NAME)); Assert.assertEquals(schema2, kafkaSchemaRegistry.getLatestSchemaByTopic(TEST_TOPIC_NAME)); } }
@Test public void testAvroAliasOnField() { Schema expectedSchema = SchemaBuilder.record(ClassWithAliasOnField.class.getSimpleName()) .namespace("org.apache.avro.reflect.TestReflect").fields().name("primitiveField").aliases("aliasName") .type(Schema.create(org.apache.avro.Schema.Type.INT)).noDefault().endRecord(); check(ClassWithAliasOnField.class, expectedSchema.toString()); }
@Test public void testRegisterAndGetByKey() throws SchemaRegistryException { Properties properties = new Properties(); properties.setProperty(KafkaSchemaRegistry.KAFKA_SCHEMA_REGISTRY_URL, TEST_URL); SchemaRegistryClient schemaRegistryClient = new MockSchemaRegistryClient(); KafkaSchemaRegistry<Integer, Schema> kafkaSchemaRegistry = new ConfluentKafkaSchemaRegistry(properties, schemaRegistryClient); Schema schema = SchemaBuilder.record(TEST_RECORD_NAME).namespace(TEST_NAMESPACE).fields().name(TEST_FIELD_NAME).type() .stringType().noDefault().endRecord(); Integer id = kafkaSchemaRegistry.register(schema); Assert.assertEquals(schema, kafkaSchemaRegistry.getSchemaByKey(id)); }
private Schema getDateTimeFormatBasedSchema() { FieldAssembler<Schema> assembler = SchemaBuilder.record("GenericRecordTimePartition").namespace("gobblin.writer.partitioner").fields(); if (!Strings.isNullOrEmpty(this.writerPartitionPrefix)) { assembler = assembler.name(PREFIX).type(Schema.create(Schema.Type.STRING)).noDefault(); } assembler = assembler.name(PARTITIONED_PATH).type(Schema.create(Schema.Type.STRING)).noDefault(); if (!Strings.isNullOrEmpty(this.writerPartitionSuffix)) { assembler = assembler.name(SUFFIX).type(Schema.create(Schema.Type.STRING)).noDefault(); } return assembler.endRecord(); }
public static void createEmptyAvroStream(final OutputStream outStream) throws IOException { final FieldAssembler<Schema> builder = SchemaBuilder.record("NiFi_ExecuteSQL_Record").namespace("any.data").fields(); final Schema schema = builder.endRecord(); final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); try (final DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter)) { dataFileWriter.create(schema, outStream); } }
@Override public Object getSchema() { return SchemaBuilder.record(TEST_RECORD_NAME) .namespace(TEST_NAMESPACE).fields() .name(TEST_FIELD_NAME).type().stringType().noDefault() .endRecord(); } };