/** * It returns a writer with latest schema * @param schemaName Fully qualified schema name * @return An instance of {@link HDFSSchemaServiceWriter} */ public HDFSSchemaServiceWriter getWriter(@NotEmpty final String schemaName) { return new HDFSSchemaServiceWriter(getSchema(schemaName)); }
/** * It returns a reader with latest schema * @param schemaName Fully qualified schema name * @return An instance of {@link HDFSSchemaServiceReader} */ public HDFSSchemaServiceReader getReader(@NotEmpty final String schemaName) { return new HDFSSchemaServiceReader(getSchema(schemaName)); }
private String getSchemaNameFromFileName(@NotEmpty final String schemaFile) { return getFileParts(schemaFile)[0]; }
@Test public void testInvalidBytes() throws Exception { final HDFSSchemaService ss = getHdfsSchemaService(); final Schema wrongSchema = ss.getSchema("wrongSchema"); final GenericRecord data = new GenericRecordBuilder(wrongSchema).set("foo", "boo").build(); final byte[] bytes = ss.getWriter("wrongSchema", 1).write(data); try { ss.getReader(SCHEMA_NAME, 1).read(bytes); Assert.fail("Didn't throw error trying to read data"); } catch (InvalidDataException e) { // pass } }
@Test(expected = InvalidDataException.class) public void testInvalidGR() throws Exception { final HDFSSchemaService ss = getHdfsSchemaService(); final Schema wrongSchema = ss.getSchema("wrongSchema"); final GenericRecord data = new GenericRecordBuilder(wrongSchema).set("foo", "boo").build(); ss.getWriter(SCHEMA_NAME, 1).write(data); }
@Test(expected = JobRuntimeException.class) public void testSchemaNotFound() throws Exception { final HDFSSchemaService ss = getHdfsSchemaService(); ss.getSchema("schemaDNE"); }
final LocatedFileStatus f = fileIterator.next(); final String schemaFile = f.getPath().getName(); final String schemaFileSchemaName = getSchemaNameFromFileName(schemaFile); if (schemaFileSchemaName.equals(schemaName)) { final int schemaVersion = getSchemaVersionFromFileName(schemaFile); if (schemaVersion > resultSchemaVersion) { resultSchemaFile = f; String.format("Unable to find schema %s in %s", schemaName, conf.getPath())); } else { return getSchemaFromFile(resultSchemaFile);
final ISchemaService schemaService = new HDFSSchemaService(conf); final String schemaName = conf.getProperty(FileSourceConfiguration.SCHEMA).get(); final Schema schema = schemaService.getSchema(schemaName); jobManager.addSchema(schema); final Schema errorSchema = schemaService.getSchema(ERROR_SCHEMA_NAME);
@Override public Schema getWrappedSchema(@NotEmpty final String schemaName) { return getSchema(schemaName); }
@Override public HDFSSchemaServiceReader getReader(@NotEmpty final String schemaName, final int schemaVersion) { HDFSSchemaServiceReader reader; try { final HDFSSchemaServiceConfiguration conf = new HDFSSchemaServiceConfiguration(this.conf); Path schemaPath = new Path(conf.getPath(), String.format(AVRO_SCHEMA_FILE_PATTERN, schemaName, schemaVersion)); reader = new HDFSSchemaServiceReader(getSchemaFromPath(schemaPath)); } catch (IOException e) { throw new JobRuntimeException("Unable to load schema", e); } return reader; }
@Override public HDFSSchemaServiceWriter getWriter(@NotEmpty final String schemaName, final int schemaVersion) { HDFSSchemaServiceWriter writer; try { final HDFSSchemaServiceConfiguration conf = new HDFSSchemaServiceConfiguration(this.conf); Path schemaPath = new Path(conf.getPath(), String.format(AVRO_SCHEMA_FILE_PATTERN, schemaName, schemaVersion)); writer = new HDFSSchemaServiceWriter(getSchemaFromPath(schemaPath)); } catch (IOException e) { throw new JobRuntimeException("Unable to load schema", e); } return writer; }
private HDFSSchemaService getHdfsSchemaService() { final Configuration conf = new Configuration(); conf.setProperty(HDFSSchemaServiceConfiguration.PATH, "src/test/resources/schemas/schemasource"); return new HDFSSchemaService(conf); } }
private Schema getSchemaFromFile(@NonNull final LocatedFileStatus resultSchemaFile) throws IOException { return getSchemaFromPath(resultSchemaFile.getPath()); }
@Override public List<ConverterResult<byte[], AvroPayload>> convert(@NonNull final byte[] data) throws Exception { GenericRecord genericRecord = this.schemaServiceReader.read(data); for (Function<GenericRecord, GenericRecord> func : this.updateFunctions) { genericRecord = func.call(genericRecord); } return Collections.singletonList(new ConverterResult(new AvroPayload(genericRecord, this.fieldsToCache))); } }
@Override public GenericRecord read(final byte[] buffer) throws InvalidDataException { final DatumReader<GenericRecord> datumReader = new SpecificDatumReader<>(getSchema()); final ByteArrayInputStream stream = new ByteArrayInputStream(buffer); stream.reset(); final BinaryDecoder binaryDecoder = new DecoderFactory().binaryDecoder(stream, null); try { return datumReader.read(null, binaryDecoder); } catch (IOException e) { throw new InvalidDataException("Error decoding data", e); } } }
@Override public byte[] write(@NonNull final GenericRecord record) throws InvalidDataException { final SpecificDatumWriter<GenericRecord> datumWriter = new SpecificDatumWriter<>(getSchema()); final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); byteArrayOutputStream.reset(); final BinaryEncoder binaryEncoder = new EncoderFactory().binaryEncoder(byteArrayOutputStream, null); try { datumWriter.write(record, binaryEncoder); binaryEncoder.flush(); } catch (Exception e) { throw new InvalidDataException("Error encoding record", e); } return byteArrayOutputStream.toByteArray(); } }
public static List<List<byte[]>> generateMessages(@NonNull final List<Integer> messageCountList) throws FileNotFoundException { final Schema schema = getSchema("test_schema"); final TestKafkaSchemaService schemaService = new TestKafkaSchemaService(); final ISchemaService.ISchemaServiceWriter writer = schemaService.getWriter("test_schema", 1); final List<List<byte[]>> ret = new ArrayList<>(messageCountList.size()); messageCountList.stream().forEach( messageCount -> { ret.add(KafkaTestHelper.getTestData(schema, messageCount).stream().map( record -> { try { return writer.write(record); } catch (InvalidDataException e) { throw new RuntimeException(e); } } ).collect(Collectors.toList())); } ); return ret; }
@Test public void testSchema() throws Exception { final HDFSSchemaService ss = getHdfsSchemaService(); final Schema schema1 = ss.getSchema(SCHEMA_NAME, 1); final GenericRecord data1 = new GenericRecordBuilder(schema1).set("firstName", "Eric").set("lastName", "Sayle").build(); final byte[] bytes1 = ss.getWriter(SCHEMA_NAME, 1).write(data1); final GenericRecord output1 = ss.getReader(SCHEMA_NAME, 1).read(bytes1); Assert.assertEquals(output1.get("firstName").toString(), "Eric"); Assert.assertEquals(output1.get("lastName").toString(), "Sayle"); final Schema schema2 = ss.getSchema(SCHEMA_NAME); final GenericRecord data2 = new GenericRecordBuilder(schema2).set("firstName", "Eason").set("lastName", "Lu").set("middleName", "Fitzgerald").build(); final byte[] bytes2 = ss.getWriter(SCHEMA_NAME).write(data2); final GenericRecord output2 = ss.getReader(SCHEMA_NAME).read(bytes2); Assert.assertEquals(output2.get("firstName").toString(), "Eason"); Assert.assertEquals(output2.get("lastName").toString(), "Lu"); Assert.assertEquals(output2.get("middleName").toString(), "Fitzgerald"); }
private int getSchemaVersionFromFileName(@NotEmpty final String schemaFile) { return Integer.valueOf(getFileParts(schemaFile)[1]); }
/** * It fetches latest version of the schema. * @param schemaName Fully qualified schema name * @param schemaVersion schema version * @return Avro schema */ public Schema getSchema(@NotEmpty final String schemaName, final int schemaVersion) { try { final HDFSSchemaServiceConfiguration conf = new HDFSSchemaServiceConfiguration(this.conf); Path schemaPath = new Path(conf.getPath(), String.format(AVRO_SCHEMA_FILE_PATTERN, schemaName, schemaVersion)); return getSchemaFromPath(schemaPath); } catch (IOException e) { throw new JobRuntimeException("Unable to load schema", e); } }