protected Schema fetchLatestPayloadSchema() throws Exception { Schema latestPayloadSchema = (Schema)registry.getLatestSchemaByTopic(payloadSchemaTopic); latestPayloadReader = new GenericDatumReader<>(latestPayloadSchema); return latestPayloadSchema; }
public KafkaAvroExtractor(WorkUnitState state) { super(state); this.schemaRegistry = state.contains(KafkaSchemaRegistry.KAFKA_SCHEMA_REGISTRY_CLASS) ? Optional.of(KafkaSchemaRegistry.<K, Schema> get(state.getProperties())) : Optional.<KafkaSchemaRegistry<K, Schema>> absent(); this.schema = getExtractorSchema(); if (this.schema.isPresent()) { this.reader = Optional.of(new GenericDatumReader<Record>(this.schema.get())); } else { log.error(String.format("Cannot find latest schema for topic %s. This topic will be skipped", this.topicName)); this.reader = Optional.absent(); } }
/** * Get the schema of a field * * @param record the input record which has the schema id * @param schemaIdLocation a dot separated location string the schema id * @return a schema referenced by the schema id */ protected Schema getFieldSchema(GenericRecord record, String schemaIdLocation) throws Exception { Optional<Object> schemaIdValue = AvroUtils.getFieldValue(record, schemaIdLocation); if (!schemaIdValue.isPresent()) { throw new Exception("Schema id with key " + schemaIdLocation + " not found in the record"); } String schemaKey = String.valueOf(schemaIdValue.get()); return (Schema) registry.getSchemaByKey(schemaKey); }
when(mockRegistry.getLatestSchemaByTopic(any())).thenReturn(latestPayloadSchema); when(mockRegistry.getSchemaByKey(any())).thenReturn(inputSchema.getField("nestedRecord").schema());
private void doTestRegisterAndGetLatest(Properties properties) throws SchemaRegistryException { SchemaRegistryClient schemaRegistryClient = new MockSchemaRegistryClient(); KafkaSchemaRegistry<Integer, Schema> kafkaSchemaRegistry = new ConfluentKafkaSchemaRegistry(properties, schemaRegistryClient); Schema schema1 = SchemaBuilder.record(TEST_RECORD_NAME + "1").namespace(TEST_NAMESPACE).fields().name(TEST_FIELD_NAME).type() .stringType().noDefault().endRecord(); Schema schema2 = SchemaBuilder.record(TEST_RECORD_NAME + "2").namespace(TEST_NAMESPACE).fields().name(TEST_FIELD_NAME).type() .stringType().noDefault().endRecord(); kafkaSchemaRegistry.register(schema1, TEST_TOPIC_NAME); kafkaSchemaRegistry.register(schema2, TEST_TOPIC_NAME); Assert.assertNotEquals(schema1, kafkaSchemaRegistry.getLatestSchemaByTopic(TEST_TOPIC_NAME)); Assert.assertEquals(schema2, kafkaSchemaRegistry.getLatestSchemaByTopic(TEST_TOPIC_NAME)); } }
@Test public void testRegisterAndGetByKey() throws SchemaRegistryException { Properties properties = new Properties(); properties.setProperty(KafkaSchemaRegistry.KAFKA_SCHEMA_REGISTRY_URL, TEST_URL); SchemaRegistryClient schemaRegistryClient = new MockSchemaRegistryClient(); KafkaSchemaRegistry<Integer, Schema> kafkaSchemaRegistry = new ConfluentKafkaSchemaRegistry(properties, schemaRegistryClient); Schema schema = SchemaBuilder.record(TEST_RECORD_NAME).namespace(TEST_NAMESPACE).fields().name(TEST_FIELD_NAME).type() .stringType().noDefault().endRecord(); Integer id = kafkaSchemaRegistry.register(schema); Assert.assertEquals(schema, kafkaSchemaRegistry.getSchemaByKey(id)); }
when(mockRegistry.getLatestSchemaByTopic(any())).thenReturn(latestPayloadSchema); when(mockRegistry.getSchemaByKey(any())).thenReturn(inputSchema.getField("nestedRecord").schema());
/** * Get the schema (metadata) of the extracted data records. * * @return the Kafka topic being extracted * @throws IOException if there is problem getting the schema */ @Override public String getSchema() throws IOException { try { return this.kafkaSchemaRegistry.getLatestSchemaByTopic(this.topicName); } catch (SchemaRegistryException e) { throw new RuntimeException(e); } } }
/** * Constructs a {@link KafkaSchemaRegistry} using the value of {@link #KAFKA_DESERIALIZER_TYPE}, if not set it * defaults to {@link SimpleKafkaSchemaRegistry}. */ private static KafkaSchemaRegistry<?, ?> getKafkaSchemaRegistry(Properties props) throws ReflectiveOperationException { Optional<Deserializers> deserializerType = Enums.getIfPresent(Deserializers.class, props.getProperty(KAFKA_DESERIALIZER_TYPE).toUpperCase()); if (deserializerType.isPresent()) { return ConstructorUtils.invokeConstructor(deserializerType.get().getSchemaRegistryClass(), props); } if (props.containsKey(KafkaSchemaRegistry.KAFKA_SCHEMA_REGISTRY_CLASS)) { return KafkaSchemaRegistry.get(props); } return new SimpleKafkaSchemaRegistry(props); }
/** * Get actual schema from registry and deserialize payload using it. */ @Override public Iterable<GenericRecord> convertRecord(String outputSchema, GenericRecord inputRecord, WorkUnitState workUnit) throws DataConversionException { try { String schemaIdField = workUnit.contains(PAYLOAD_SCHEMA_ID_FIELD) ? workUnit.getProp(PAYLOAD_SCHEMA_ID_FIELD) : DEFAULT_PAYLOAD_SCHEMA_ID_FIELD; String payloadField = workUnit.contains(PAYLOAD_FIELD) ? workUnit.getProp(PAYLOAD_FIELD) : DEFAULT_PAYLOAD_FIELD; String schemaKey = String.valueOf(inputRecord.get(schemaIdField)); Schema payloadSchema = (Schema) this.registry.getSchemaByKey(schemaKey); byte[] payload = getPayload(inputRecord, payloadField); GenericRecord outputRecord = deserializePayload(payload, payloadSchema); if (this.fieldRemover.isPresent()) { payloadSchema = this.fieldRemover.get().removeFields(payloadSchema); } return new SingleRecordIterable<>(AvroUtils.convertRecordSchema(outputRecord, payloadSchema)); } catch (IOException | SchemaRegistryException | ExecutionException e) { throw new DataConversionException(e); } }
protected Schema getLatestSchemaByTopic(String topic) { Preconditions.checkState(this.schemaRegistry.isPresent()); try { return this.schemaRegistry.get().getLatestSchemaByTopic(topic); } catch (SchemaRegistryException e) { log.error(String.format("Cannot find latest schema for topic %s. This topic will be skipped", topic), e); return null; } }
public KafkaSimpleStreamingExtractor(WorkUnitState state) { super(state); _consumer = KafkaSimpleStreamingSource.getKafkaConsumer(ConfigUtils.propertiesToConfig(state.getProperties())); closer.register(_consumer); _partition = new TopicPartition(KafkaSimpleStreamingSource.getTopicNameFromState(state), KafkaSimpleStreamingSource.getPartitionIdFromState(state)); _consumer.assign(Collections.singletonList(_partition)); this._schemaRegistry = state.contains(KafkaSchemaRegistry.KAFKA_SCHEMA_REGISTRY_CLASS) ? Optional .of(KafkaSchemaRegistry.<String, S>get(state.getProperties())) : Optional.<KafkaSchemaRegistry<String, S>>absent(); this.fetchTimeOut = state.getPropAsLong(AbstractBaseKafkaConsumerClient.CONFIG_KAFKA_FETCH_TIMEOUT_VALUE, AbstractBaseKafkaConsumerClient.CONFIG_KAFKA_FETCH_TIMEOUT_VALUE_DEFAULT); }
/** * Get the schema of a field * * @param record the input record which has the schema id * @param schemaIdLocation a dot separated location string the schema id * @return a schema referenced by the schema id */ protected Schema getFieldSchema(GenericRecord record, String schemaIdLocation) throws Exception { Optional<Object> schemaIdValue = AvroUtils.getFieldValue(record, schemaIdLocation); if (!schemaIdValue.isPresent()) { throw new Exception("Schema id with key " + schemaIdLocation + " not found in the record"); } String schemaKey = String.valueOf(schemaIdValue.get()); return (Schema) registry.getSchemaByKey(schemaKey); }
@Override public Object getSchema() { try { LOG.info("Getting schema for {}. Gap: {} HighWaterMark: {}", this.topicName, this.lowWatermark.getGap(this.highWatermark)); //If HighWatermark equals LowWatermark that might mean the workunit is an empty workunit if (this.lowWatermark.getGap(this.highWatermark) == 0) { LOG.info("Not getting schema for {} as the gap between high and low watermark is 0", this.topicName); return null; } return this.kafkaSchemaRegistry.getLatestSchemaByTopic(this.topicName); } catch (SchemaRegistryException e) { throw new RuntimeException(e); } }
public KafkaAvroExtractor(WorkUnitState state) { super(state); this.schemaRegistry = state.contains(KafkaSchemaRegistry.KAFKA_SCHEMA_REGISTRY_CLASS) ? Optional.of(KafkaSchemaRegistry.<K, Schema> get(state.getProperties())) : Optional.<KafkaSchemaRegistry<K, Schema>> absent(); this.schema = getExtractorSchema(); if (this.schema.isPresent()) { this.reader = Optional.of(new GenericDatumReader<Record>(this.schema.get())); } else { log.error(String.format("Cannot find latest schema for topic %s. This topic will be skipped", this.topicName)); this.reader = Optional.absent(); } }
/** * Get actual schema from registry and deserialize payload using it. */ @Override public Iterable<GenericRecord> convertRecord(String outputSchema, GenericRecord inputRecord, WorkUnitState workUnit) throws DataConversionException { try { String schemaIdField = workUnit.contains(PAYLOAD_SCHEMA_ID_FIELD) ? workUnit.getProp(PAYLOAD_SCHEMA_ID_FIELD) : DEFAULT_PAYLOAD_SCHEMA_ID_FIELD; String payloadField = workUnit.contains(PAYLOAD_FIELD) ? workUnit.getProp(PAYLOAD_FIELD) : DEFAULT_PAYLOAD_FIELD; String schemaKey = String.valueOf(inputRecord.get(schemaIdField)); Schema payloadSchema = (Schema) this.registry.getSchemaByKey(schemaKey); byte[] payload = getPayload(inputRecord, payloadField); GenericRecord outputRecord = deserializePayload(payload, payloadSchema); if (this.fieldRemover.isPresent()) { payloadSchema = this.fieldRemover.get().removeFields(payloadSchema); } return new SingleRecordIterable<>(AvroUtils.convertRecordSchema(outputRecord, payloadSchema)); } catch (IOException | SchemaRegistryException | ExecutionException e) { throw new DataConversionException(e); } }
/** * Get the schema (metadata) of the extracted data records. * * @return the schema of Kafka topic being extracted * @throws IOException if there is problem getting the schema */ @Override public S getSchema() throws IOException { try { if (_schemaRegistry.isPresent()) { return _schemaRegistry.get().getLatestSchemaByTopic(this._partition.topic()); } } catch (SchemaRegistryException e) { throw new RuntimeException(e); } return ((S) this._partition.topic()); }
/** * Constructs a {@link KafkaSchemaRegistry} using the value of {@link #KAFKA_DESERIALIZER_TYPE}, if not set it * defaults to {@link SimpleKafkaSchemaRegistry}. */ private static KafkaSchemaRegistry<?, ?> getKafkaSchemaRegistry(Properties props) throws ReflectiveOperationException { Optional<Deserializers> deserializerType = Enums.getIfPresent(Deserializers.class, props.getProperty(KAFKA_DESERIALIZER_TYPE).toUpperCase()); if (deserializerType.isPresent()) { return ConstructorUtils.invokeConstructor(deserializerType.get().getSchemaRegistryClass(), props); } if (props.containsKey(KafkaSchemaRegistry.KAFKA_SCHEMA_REGISTRY_CLASS)) { return KafkaSchemaRegistry.get(props); } return new SimpleKafkaSchemaRegistry(props); }
@Test public void testConfluentShouldNotQuerySchemaRegistryWhenTheGapIsZero() throws IOException, RestClientException, SchemaRegistryException { WorkUnitState mockWorkUnitState = getMockWorkUnitState(0L, 0L); mockWorkUnitState.setProp("schema.registry.url", TEST_URL); SchemaRegistryClient mockSchemaRegistryClient = mock(SchemaRegistryClient.class); Deserializer<Object> kafkaDecoder = new KafkaAvroDeserializer(mockSchemaRegistryClient); KafkaSchemaRegistry<Integer, Schema> mockKafkaSchemaRegistry = mock(KafkaSchemaRegistry.class); KafkaDeserializerExtractor kafkaDecoderExtractor = new KafkaDeserializerExtractor(mockWorkUnitState, Optional.fromNullable(Deserializers.CONFLUENT_AVRO), kafkaDecoder, mockKafkaSchemaRegistry); verify(mockKafkaSchemaRegistry, never()).getLatestSchemaByTopic(any()); kafkaDecoderExtractor.getSchema(); }
@Test public void testConfluentAvroDeserializerForSchemaEvolution() throws IOException, RestClientException, SchemaRegistryException { WorkUnitState mockWorkUnitState = getMockWorkUnitState(0L, 10L); mockWorkUnitState.setProp("schema.registry.url", TEST_URL); Schema schemaV1 = SchemaBuilder.record(TEST_RECORD_NAME) .namespace(TEST_NAMESPACE).fields() .name(TEST_FIELD_NAME).type().stringType().noDefault() .endRecord(); Schema schemaV2 = SchemaBuilder.record(TEST_RECORD_NAME) .namespace(TEST_NAMESPACE).fields() .name(TEST_FIELD_NAME).type().stringType().noDefault() .optionalString(TEST_FIELD_NAME2).endRecord(); GenericRecord testGenericRecord = new GenericRecordBuilder(schemaV1).set(TEST_FIELD_NAME, "testValue").build(); SchemaRegistryClient mockSchemaRegistryClient = mock(SchemaRegistryClient.class); when(mockSchemaRegistryClient.getByID(any(Integer.class))).thenReturn(schemaV1); Serializer<Object> kafkaEncoder = new KafkaAvroSerializer(mockSchemaRegistryClient); Deserializer<Object> kafkaDecoder = new KafkaAvroDeserializer(mockSchemaRegistryClient); ByteBuffer testGenericRecordByteBuffer = ByteBuffer.wrap(kafkaEncoder.serialize(TEST_TOPIC_NAME, testGenericRecord)); KafkaSchemaRegistry<Integer, Schema> mockKafkaSchemaRegistry = mock(KafkaSchemaRegistry.class); when(mockKafkaSchemaRegistry.getLatestSchemaByTopic(TEST_TOPIC_NAME)).thenReturn(schemaV2); KafkaDeserializerExtractor kafkaDecoderExtractor = new KafkaDeserializerExtractor(mockWorkUnitState, Optional.fromNullable(Deserializers.CONFLUENT_AVRO), kafkaDecoder, mockKafkaSchemaRegistry); when(kafkaDecoderExtractor.getSchema()).thenReturn(schemaV2); ByteArrayBasedKafkaRecord mockMessageAndOffset = getMockMessageAndOffset(testGenericRecordByteBuffer); GenericRecord received = (GenericRecord) kafkaDecoderExtractor.decodeRecord(mockMessageAndOffset); Assert.assertEquals(received.toString(), "{\"testField\": \"testValue\", \"testField2\": null}"); }