@Override protected Serializer<GenericRow> getSerializer( final Schema avroSchema, final org.apache.kafka.connect.data.Schema kafkaSchema, final String topicName ) { return new KsqlJsonSerializer(kafkaSchema); } }
private List<?> enforceFieldTypeForArray(final Schema fieldSchema, final List<?> arrayList) { final List<Object> array = new ArrayList<>(arrayList.size()); for (final Object item : arrayList) { array.add(enforceFieldType(fieldSchema.valueSchema(), item)); } return array; }
private boolean compareSchemas(final Schema schema1, final Schema schema2) { if (schema1.type() != schema2.type()) { return false; } switch (schema1.type()) { case STRUCT: return compareStructSchema(schema1, schema2); case ARRAY: return compareSchemas(schema1.valueSchema(), schema2.valueSchema()); case MAP: return compareSchemas(schema1.valueSchema(), schema2.valueSchema()) && compareSchemas(schema1.keySchema(), schema2.keySchema()); default: return true; } }
@Test public void shouldHandleStruct() throws IOException { final KsqlJsonSerializer jsonSerializer = new KsqlJsonSerializer(getSchemaWithStruct()); final GenericRow genericRow = getGenericRow(); final byte[] bytes = jsonSerializer.serialize("", genericRow); final ObjectMapper objectMapper = new ObjectMapper(); final JsonNode jsonNode = objectMapper.readTree(bytes); assertThat(jsonNode.size(), equalTo(7)); assertThat(jsonNode.get("ordertime").asLong(), equalTo(genericRow.getColumns().get(0))); assertThat(jsonNode.get("itemid").get("NAME").asText(), equalTo("Item_10")); }
@Test public void shouldSerializeRowWithNull() { final List columns = Arrays.asList(1511897796092L, 1L, "item_1", 10.0, null, null); final GenericRow genericRow = new GenericRow(columns); final KsqlJsonSerializer ksqlJsonDeserializer = new KsqlJsonSerializer(orderSchema); final byte[] bytes = ksqlJsonDeserializer.serialize("t1", genericRow); final String jsonString = new String(bytes, StandardCharsets.UTF_8); assertThat("Incorrect serialization.", jsonString, equalTo( "{\"ORDERTIME\":1511897796092,\"ORDERID\":1,\"ITEMID\":\"item_1\",\"ORDERUNITS\":10.0,\"ARRAYCOL\":null,\"MAPCOL\":null}")); }
@Override public Serde<GenericRow> getGenericRowSerde(final Schema schema, final KsqlConfig ksqlConfig, final boolean isInternal, final Supplier<SchemaRegistryClient> schemaRegistryClientFactory, final String loggerNamePrefix) { final Map<String, Object> serdeProps = new HashMap<>(); serdeProps.put("JsonPOJOClass", GenericRow.class); final Serializer<GenericRow> genericRowSerializer = new KsqlJsonSerializer(schema); genericRowSerializer.configure(serdeProps, false); final Deserializer<GenericRow> genericRowDeserializer = new KsqlJsonDeserializer( schema, isInternal, ProcessingLoggerFactory.getLogger( join(loggerNamePrefix, SerdeUtils.DESERIALIZER_LOGGER_NAME)) ); genericRowDeserializer.configure(serdeProps, false); return Serdes.serdeFrom(genericRowSerializer, genericRowDeserializer); } }
@Test public void testTopicMap() { final KsqlTopic ksqlTopic1 = new KsqlTopic("testTopic", "testTopicKafka", new KsqlJsonTopicSerDe(), false); metaStore.putTopic(ksqlTopic1); final KsqlTopic ksqlTopic2 = metaStore.getTopic("testTopic"); Assert.assertNotNull(ksqlTopic2); // Check non-existent topic final KsqlTopic ksqlTopic3 = metaStore.getTopic("TESTTOPIC_"); Assert.assertNull(ksqlTopic3); }
@Test public void shouldCreateJsonStringForStructIfDefinedAsVarchar() throws JsonProcessingException { final Schema schema = SchemaBuilder.struct() .field("itemid".toUpperCase(), Schema.OPTIONAL_STRING_SCHEMA) .build(); final KsqlJsonDeserializer deserializer = new KsqlJsonDeserializer( schema, false, recordLogger); final GenericRow expected = new GenericRow(Collections.singletonList( "{\"CATEGORY\":{\"ID\":2,\"NAME\":\"Food\"},\"ITEMID\":6,\"NAME\":\"Item_6\"}")); final GenericRow genericRow = deserializer.deserialize("", "{\"itemid\":{\"CATEGORY\":{\"ID\":2,\"NAME\":\"Food\"},\"ITEMID\":6,\"NAME\":\"Item_6\"}}".getBytes(StandardCharsets.UTF_8)); assertThat(genericRow, equalTo(expected)); }
@Before public void before() { orderSchema = SchemaBuilder.struct() .field("ordertime".toUpperCase(), org.apache.kafka.connect.data.Schema.OPTIONAL_INT64_SCHEMA) .field("orderid".toUpperCase(), org.apache.kafka.connect.data.Schema.OPTIONAL_INT64_SCHEMA) .field("itemid".toUpperCase(), org.apache.kafka.connect.data.Schema.OPTIONAL_STRING_SCHEMA) .field("orderunits".toUpperCase(), org.apache.kafka.connect.data.Schema.OPTIONAL_FLOAT64_SCHEMA) .field("arraycol".toUpperCase(), SchemaBuilder.array(org.apache.kafka.connect.data.Schema.OPTIONAL_FLOAT64_SCHEMA).optional().build()) .field("mapcol".toUpperCase(), SchemaBuilder.map(org.apache.kafka.connect.data.Schema.OPTIONAL_STRING_SCHEMA, org.apache.kafka.connect.data.Schema.OPTIONAL_FLOAT64_SCHEMA).optional().build()) .build(); ksqlJsonDeserializer = new KsqlJsonDeserializer( orderSchema, false, recordLogger); }
@Test public void shouldTreatNullAsNull() throws JsonProcessingException { final Map<String, Object> row = new HashMap<>(); row.put("ordertime", null); row.put("@orderid", null); row.put("itemid", null); row.put("orderunits", null); row.put("arrayCol", new Double[]{0.0, null}); row.put("mapCol", null); final GenericRow expected = new GenericRow(Arrays.asList(null, null, null, null, new Double[]{0.0, null}, null)); final GenericRow genericRow = ksqlJsonDeserializer.deserialize( "", objectMapper.writeValueAsBytes(row)); assertThat(genericRow, equalTo(expected)); }
private boolean compareStructSchema(final Schema schema1, final Schema schema2) { if (schema1.fields().size() != schema2.fields().size()) { return false; } for (int i = 0; i < schema1.fields().size(); i++) { if (!schema1.fields().get(i).name().equalsIgnoreCase(schema2.fields().get(i).name()) || !compareSchemas(schema1.fields().get(i).schema(), schema2.fields().get(i).schema())) { return false; } } return true; }
@Override public GenericRow deserialize(final String topic, final byte[] bytes) { try { final GenericRow row = getGenericRow(bytes); if (LOG.isTraceEnabled()) { LOG.trace("Deserialized row. topic:{}, row:{}", topic, row); } return row; } catch (final Exception e) { recordLogger.error( ProcessingLogMessageFactory.deserializationErrorMsg(e, Optional.ofNullable(bytes))); throw new SerializationException( "KsqlJsonDeserializer failed to deserialize data for topic: " + topic, e); } }
@SuppressWarnings("unchecked") private Object enforceFieldType(final Schema fieldSchema, final Object columnVal) { if (columnVal == null) { return null; } switch (fieldSchema.type()) { case BOOLEAN: return SerdeUtils.toBoolean(columnVal); case INT32: return SerdeUtils.toInteger(columnVal); case INT64: return SerdeUtils.toLong(columnVal); case FLOAT64: return SerdeUtils.toDouble(columnVal); case STRING: return processString(columnVal); case ARRAY: return enforceFieldTypeForArray(fieldSchema, (List<?>) columnVal); case MAP: return enforceFieldTypeForMap(fieldSchema, (Map<String, Object>) columnVal); case STRUCT: return enforceFieldTypeForStruct(fieldSchema, (Map<String, Object>) columnVal); default: throw new KsqlException("Type is not supported: " + fieldSchema.type()); } }
private Struct enforceFieldTypeForStruct( final Schema fieldSchema, final Map<String, ?> structMap) { final Struct columnStruct = new Struct(fieldSchema); final Map<String, String> caseInsensitiveStructFieldNameMap = getCaseInsensitiveFieldNameMap(structMap, false); fieldSchema.fields() .forEach( field -> columnStruct.put(field.name(), enforceFieldType( field.schema(), structMap.get( caseInsensitiveStructFieldNameMap.get(field.name().toUpperCase()) )))); return columnStruct; }
@Test public void shouldSerializeRowCorrectly() { final List columns = Arrays.asList(1511897796092L, 1L, "item_1", 10.0, Arrays.asList(100.0), Collections.singletonMap("key1", 100.0)); final GenericRow genericRow = new GenericRow(columns); final KsqlJsonSerializer ksqlJsonDeserializer = new KsqlJsonSerializer(orderSchema); final byte[] bytes = ksqlJsonDeserializer.serialize("t1", genericRow); final String jsonString = new String(bytes, StandardCharsets.UTF_8); assertThat("Incorrect serialization.", jsonString, equalTo( "{\"ORDERTIME\":1511897796092,\"ORDERID\":1,\"ITEMID\":\"item_1\",\"ORDERUNITS\":10.0,\"ARRAYCOL\":[100.0],\"MAPCOL\":{\"key1\":100.0}}")); }
FakeSourceNode(final String name) { super( new PlanNodeId("fake"), new KsqlStream<>( STATEMENT, name, SCHEMA, SCHEMA.fields().get(0), new MetadataTimestampExtractionPolicy(), new KsqlTopic(name, name, new KsqlJsonTopicSerDe(), false), Serdes.String()), SCHEMA); } }
@Test public void shouldDeserializeEvenWithMissingFields() throws JsonProcessingException { final Map<String, Object> orderRow = new HashMap<>(); orderRow.put("ordertime", 1511897796092L); orderRow.put("@orderid", 1L); orderRow.put("itemid", "Item_1"); orderRow.put("orderunits", 10.0); final byte[] jsonBytes = objectMapper.writeValueAsBytes(orderRow); final GenericRow genericRow = ksqlJsonDeserializer.deserialize("", jsonBytes); assertThat(genericRow.getColumns().size(), equalTo(6)); assertThat(genericRow.getColumns().get(0), equalTo(1511897796092L)); assertThat(genericRow.getColumns().get(1), equalTo(1L)); assertThat(genericRow.getColumns().get(2), equalTo("Item_1")); assertThat(genericRow.getColumns().get(3), equalTo(10.0)); assertThat(genericRow.getColumns().get(4), is(nullValue())); assertThat(genericRow.getColumns().get(5), is(nullValue())); }
private Map<String, Object> enforceFieldTypeForMap( final Schema fieldSchema, final Map<String, ?> columnMap) { final Map<String, Object> ksqlMap = new HashMap<>(); for (final Map.Entry<String, ?> e : columnMap.entrySet()) { ksqlMap.put( enforceFieldType(Schema.OPTIONAL_STRING_SCHEMA, e.getKey()).toString(), enforceFieldType(fieldSchema.valueSchema(), e.getValue()) ); } return ksqlMap; }
@SuppressWarnings("unchecked") private GenericRow getGenericRow(final byte[] rowJsonBytes) { final SchemaAndValue schemaAndValue = jsonConverter.toConnectData("topic", rowJsonBytes); final Map<String, Object> valueMap = (Map) schemaAndValue.value(); if (valueMap == null) { return null; } final Map<String, String> caseInsensitiveFieldNameMap = getCaseInsensitiveFieldNameMap(valueMap, true); final List<Object> columns = new ArrayList(schema.fields().size()); for (final Field field : schema.fields()) { final Object columnVal = valueMap.get(caseInsensitiveFieldNameMap.get(field.name())); columns.add(enforceFieldType(field.schema(), columnVal)); } return new GenericRow(columns); }
private StructuredDataSource buildDataSource(final String kafkaTopicName) { final Schema schema = SchemaBuilder.struct() .field("field0", Schema.OPTIONAL_INT32_SCHEMA) .build(); final KsqlTopic topic = new KsqlTopic("internal", kafkaTopicName, new KsqlJsonTopicSerDe(), true); return new KsqlStream<>( "query", "stream", schema, schema.fields().get(0), new MetadataTimestampExtractionPolicy(), topic, Serdes.String()); }