/** * Creates a new record writer instance. * * @param writerSchema The writer schema for the records to write. * @param compressionCodec The compression type for the writer file. * @param outputStream The target output stream for the records. * @param syncInterval The sync interval for the writer file. */ protected RecordWriter<AvroKey<T>, NullWritable> create( Schema writerSchema, GenericData dataModel, CodecFactory compressionCodec, OutputStream outputStream, int syncInterval) throws IOException { return new AvroKeyRecordWriter<>(writerSchema, dataModel, compressionCodec, outputStream, syncInterval); } }
@Test public void testWrite() throws IOException { Schema writerSchema = Schema.create(Schema.Type.INT); GenericData dataModel = new ReflectData(); CodecFactory compressionCodec = CodecFactory.nullCodec(); ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); TaskAttemptContext context = createMock(TaskAttemptContext.class); replay(context); // Write an avro container file with two records: 1 and 2. AvroKeyRecordWriter<Integer> recordWriter = new AvroKeyRecordWriter<>( writerSchema, dataModel, compressionCodec, outputStream); recordWriter.write(new AvroKey<>(1), NullWritable.get()); recordWriter.write(new AvroKey<>(2), NullWritable.get()); recordWriter.close(context); verify(context); // Verify that the file was written as expected. InputStream inputStream = new ByteArrayInputStream(outputStream.toByteArray()); Schema readerSchema = Schema.create(Schema.Type.INT); DatumReader<Integer> datumReader = new SpecificDatumReader<>(readerSchema); DataFileStream<Integer> dataFileReader = new DataFileStream<>(inputStream, datumReader); assertTrue(dataFileReader.hasNext()); // Record 1. assertEquals(1, dataFileReader.next().intValue()); assertTrue(dataFileReader.hasNext()); // Record 2. assertEquals(2, dataFileReader.next().intValue()); assertFalse(dataFileReader.hasNext()); // No more records. dataFileReader.close(); }
AvroKeyRecordWriter<Integer> recordWriter = new AvroKeyRecordWriter<>( writerSchema, dataModel, compressionCodec, outputStream); long positionOne = recordWriter.sync();
@Override public RecordWriter<AvroKey<T>, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException { LOG.info("getRecordWriter for" + context); // Get the writer schema. String schemaString = context.getConfiguration().get(CONF_OUTPUT_KEY_SCHEMA); Schema writerSchema = schemaString != null ? RelaxedSchemaUtils.parseSchema(schemaString, context.getConfiguration()) : null; if (null == writerSchema) { throw new IOException("AvroKeyOutputFormat requires an output schema. Use AvroJob.setOutputKeySchema()."); } return new AvroKeyRecordWriter<T>(writerSchema, GenericData.get(), getCompressionCodec(context), getAvroFileOutputStream(context)); }
/** * Creates a new record writer instance. * * @param writerSchema The writer schema for the records to write. * @param compressionCodec The compression type for the writer file. * @param outputStream The target output stream for the records. * @param syncInterval The sync interval for the writer file. */ protected RecordWriter<AvroKey<T>, NullWritable> create( Schema writerSchema, GenericData dataModel, CodecFactory compressionCodec, OutputStream outputStream, int syncInterval) throws IOException { return new AvroKeyRecordWriter<T>(writerSchema, dataModel, compressionCodec, outputStream, syncInterval); } }