/** * * @param topic topic associated with the data * @param data serialized bytes * @return deserialized object */ public GenericRecord deserialize(String topic, byte[] data) throws SerializationException { return deserialize(topic, data, null); }
public void configure(Map<String, ?> configs, boolean isKey) { if (null == schemaRegistry) { schemaRegistry = LiAvroSerDeHelper.getSchemaRegistry(configs); } this.isKey = isKey; }
/** * Static method to get an MD5Digest from a binary byte representation * @param md5Bytes * @return a filled out MD5Digest */ public static MD5Digest fromBytes(byte[] md5Bytes) { Preconditions.checkArgument(md5Bytes.length == MD5_BYTES_LENGTH, "md5 bytes must be " + MD5_BYTES_LENGTH + " bytes in length, found " + md5Bytes.length + " bytes."); String md5String = Hex.encodeHexString(md5Bytes); return new MD5Digest(md5String, md5Bytes); }
@Test public void testValidString() throws NoSuchAlgorithmException, UnsupportedEncodingException { String message = "3432rdaesdfdsf2443223 234 324324 23423 e23e 23d"; byte[] md5digest = MessageDigest.getInstance("MD5").digest(message.getBytes("UTF-8")); String md5String = DigestUtils.md5Hex(message); Assert.assertNotNull(md5digest); MD5Digest md5 = MD5Digest.fromBytes(md5digest); Assert.assertEquals(md5.asString(), md5String); Assert.assertEquals(md5.asBytes(), md5digest); MD5Digest otherMd5 = MD5Digest.fromString(md5String); Assert.assertEquals(otherMd5.asBytes(), md5.asBytes()); }
throw new SerializationException(String.format("Unknown magic byte for topic: %s ", topic)); MD5Digest schemaId = MD5Digest.fromBytes(data, 1 ); // read start after the first byte (magic byte) Schema schema = _schemaRegistry.getById(schemaId); Decoder decoder = DecoderFactory.get().binaryDecoder(data, 1 + MD5Digest.MD5_BYTES_LENGTH, throw new SerializationException("Error during Deserialization", e);
public byte[] serialize(String topic, GenericRecord data) throws SerializationException { Schema schema = data.getSchema(); MD5Digest schemaId = null; try { schemaId = schemaRegistry.register(topic, schema); ByteArrayOutputStream out = new ByteArrayOutputStream(); // MAGIC_BYTE | schemaId-bytes | avro_payload out.write(LiAvroSerDeHelper.MAGIC_BYTE); out.write(schemaId.asBytes()); BinaryEncoder encoder = encoderFactory.directBinaryEncoder(out, null); DatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema); writer.write(data, encoder); encoder.flush(); byte[] bytes = out.toByteArray(); out.close(); return bytes; } catch (IOException | SchemaRegistryException e) { throw new SerializationException(e); } }
@Override public byte[] serialize(String topic, GenericRecord data) { try { return super.serialize(topic, data); } catch (org.apache.gobblin.kafka.serialize.SerializationException e) { throw new SerializationException(e); } } }
@Test public void testInvalidString() { String foobar = "clearly-bad-md5string"; try { MD5Digest md5Digest = MD5Digest.fromString(foobar); Assert.fail("Should have thrown an exception"); } catch (Exception e) { log.info("Found expected exception", e.getMessage()); } }
/** * Get a schema given an id * @param id * @return * @throws IOException * @throws SchemaRegistryException */ @Override public Schema getById(MD5Digest id) throws IOException, SchemaRegistryException { if (_schemaHashMap.containsKey(id)) { return _schemaHashMap.get(id); } else { throw new SchemaRegistryException("Could not find schema with id : " + id.asString()); } }
public SimpleKafkaConsumer(Properties props, KafkaCheckpoint checkpoint) { Config config = ConfigFactory.parseProperties(props); topic = config.getString("topic"); String zkConnect = config.getString("zookeeper.connect"); schemaRegistry = KafkaSchemaRegistryFactory.getSchemaRegistry(props); deserializer = new LiAvroDeserializer(schemaRegistry); /** TODO: Make Confluent schema registry integration configurable * HashMap<String, String> avroSerDeConfig = new HashMap<>(); * avroSerDeConfig.put("schema.registry.url", "http://localhost:8081"); * deserializer = new io.confluent.kafka.serializers.KafkaAvroDeserializer(); * deserializer.configure(avroSerDeConfig, false); * **/ Properties consumeProps = new Properties(); consumeProps.put("zookeeper.connect", zkConnect); consumeProps.put("group.id", "gobblin-tool-" + System.nanoTime()); consumeProps.put("zookeeper.session.timeout.ms", "10000"); consumeProps.put("zookeeper.sync.time.ms", "10000"); consumeProps.put("auto.commit.interval.ms", "10000"); consumeProps.put("auto.offset.reset", "smallest"); consumeProps.put("auto.commit.enable", "false"); //consumeProps.put("consumer.timeout.ms", "10000"); consumer = Consumer.createJavaConsumerConnector(new ConsumerConfig(consumeProps)); Map<String, List<KafkaStream<byte[], byte[]>>> consumerMap = consumer.createMessageStreams(ImmutableMap.of(topic, 1)); List<KafkaStream<byte[], byte[]>> streams = consumerMap.get(this.topic); stream = streams.get(0); iterator = stream.iterator(); }
private final MD5Digest generateId(Schema schema) { try { byte[] schemaBytes = schema.toString().getBytes("UTF-8"); byte[] md5bytes = MessageDigest.getInstance("MD5").digest(schemaBytes); MD5Digest md5Digest = MD5Digest.fromBytes(md5bytes); return md5Digest; } catch (UnsupportedEncodingException | NoSuchAlgorithmException e) { throw new IllegalStateException("Unexpected error trying to convert schema to bytes", e); } }
@Override public Converter<S, Schema, byte[], GenericRecord> init(WorkUnitState workUnit) { this.schemaRegistry = KafkaSchemaRegistryFactory.getSchemaRegistry(workUnit.getProperties()); this.deserializer = new LiAvroDeserializerBase(this.schemaRegistry); return this; }
/** * * @param topic topic associated with the data * @param data serialized bytes * @return deserialized object */ @Override public GenericRecord deserialize(String topic, byte[] data) { try { return super.deserialize(topic, data); } catch (org.apache.gobblin.kafka.serialize.SerializationException e) { throw new SerializationException("Error during Deserialization", e); } } }
@Override public byte[] serialize(String topic, GenericRecord data) { try { return super.serialize(topic, data); } catch (org.apache.gobblin.kafka.serialize.SerializationException e) { throw new SerializationException(e); } } }
schemaKey = headers[0].getValue().substring(SCHEMA_ID_HEADER_PREFIX.length()); MD5Digest schemaId = MD5Digest.fromString(schemaKey); return schemaId; } catch (Throwable t) {
String schemaUrl = this.url + GET_RESOURCE_BY_ID + key.asString();
/** * Static method to get an MD5Digest from a binary byte representation. * @param md5Bytes * @param offset in the byte array to start reading from * @return a filled out MD5Digest */ public static MD5Digest fromBytes(byte[] md5Bytes, int offset) { byte[] md5BytesCopy = Arrays.copyOfRange(md5Bytes, offset, offset + MD5_BYTES_LENGTH); //TODO: Replace this with a version that encodes without needing a copy. String md5String = Hex.encodeHexString(md5BytesCopy); return new MD5Digest(md5String, md5BytesCopy); }
/** * * @param topic topic associated with the data * @param data serialized bytes * @return deserialized object */ @Override public GenericRecord deserialize(String topic, byte[] data) { try { return super.deserialize(topic, data); } catch (org.apache.gobblin.kafka.serialize.SerializationException e) { throw new SerializationException("Error during Deserialization", e); } } }
/** * Static method to get an MD5Digest from a human-readable string representation * @param md5String * @return a filled out MD5Digest */ public static MD5Digest fromString(String md5String) { byte[] bytes; try { bytes = Hex.decodeHex(md5String.toCharArray()); return new MD5Digest(md5String, bytes); } catch (DecoderException e) { throw new IllegalArgumentException("Unable to convert md5string", e); } }
@Override public Iterable<GenericRecord> convertRecord(Schema outputSchema, byte[] inputRecord, WorkUnitState workUnit) throws DataConversionException { try { String topic = workUnit.getProp(KafkaSource.TOPIC_NAME); GenericRecord record = this.deserializer.deserialize(topic, inputRecord, outputSchema); return new SingleRecordIterable<>(record); } catch (SerializationException e) { log.error("Cannot decode one record.", e); return new EmptyIterable<GenericRecord>(); } } }