@Override public boolean hasNext() { return _avroReader.hasNext(); }
/** Expert: Return the next block in the file, as binary-encoded data. */ public ByteBuffer nextBlock() throws IOException { if (!hasNext()) throw new NoSuchElementException(); if (blockRemaining != blockCount) throw new IllegalStateException("Not at block start."); blockRemaining = 0; datumIn = null; return blockBuffer; }
/** Expert: Return the next block in the file, as binary-encoded data. */ public ByteBuffer nextBlock() throws IOException { if (!hasNext()) throw new NoSuchElementException(); if (blockRemaining != blockCount) throw new IllegalStateException("Not at block start."); blockRemaining = 0; datumIn = null; return blockBuffer; }
@Override protected GenericRecord nextAvroRecord() { if (!dataFileStream.hasNext()) { return null; } return dataFileStream.next(); }
/** Read the next datum from the file. * @param reuse an instance to reuse. * @throws NoSuchElementException if no more remain in the file. */ public D next(D reuse) throws IOException { if (!hasNext()) throw new NoSuchElementException(); D result = reader.read(reuse, datumIn); if (0 == --blockRemaining) { blockFinished(); } return result; }
/** Read the next datum from the file. * @param reuse an instance to reuse. * @throws NoSuchElementException if no more remain in the file. */ public D next(D reuse) throws IOException { if (!hasNext()) throw new NoSuchElementException(); D result = reader.read(reuse, datumIn); if (0 == --blockRemaining) { blockFinished(); } return result; }
@Override public Optional<Map<DecoderColumnHandle, FieldValueProvider>> decodeRow(byte[] data, Map<String, String> dataMap) { GenericRecord avroRecord; DataFileStream<GenericRecord> dataFileReader = null; try { // Assumes producer uses DataFileWriter or data comes in this particular format. // TODO: Support other forms for producers dataFileReader = new DataFileStream<>(new ByteArrayInputStream(data), avroRecordReader); if (!dataFileReader.hasNext()) { throw new PrestoException(GENERIC_INTERNAL_ERROR, "No avro record found"); } avroRecord = dataFileReader.next(); if (dataFileReader.hasNext()) { throw new PrestoException(GENERIC_INTERNAL_ERROR, "Unexpected extra record found"); } } catch (Exception e) { throw new PrestoException(GENERIC_INTERNAL_ERROR, "Decoding Avro record failed.", e); } finally { closeQuietly(dataFileReader); } return Optional.of(columnDecoders.entrySet().stream() .collect(toImmutableMap( Map.Entry::getKey, entry -> entry.getValue().decodeField(avroRecord)))); }
/**skips a number of records from the input*/ private long skip(long skip) throws IOException { long skipped = 0; while( 0 < skip && reader.hasNext()) { reader.next(reuse); skip--; skipped++; } if ((0 < skip) && hasNextInput()) { // goto next file nextInput(); skipped = skipped + skip(skip); } return skipped; }
public static void validateSortedFile(File file) throws Exception { DatumReader<ByteBuffer> reader = new GenericDatumReader<>(); try(InputStream in = new BufferedInputStream(new FileInputStream(file))) { try(DataFileStream<ByteBuffer> lines = new DataFileStream<>(in, reader)) { List<String> sortedLines = new ArrayList<>(Arrays.asList(LINES)); Collections.sort(sortedLines); for (String expectedLine : sortedLines) { ByteBuffer buf = lines.next(); byte[] b = new byte[buf.remaining()]; buf.get(b); assertEquals(expectedLine, new String(b, StandardCharsets.UTF_8).trim()); } assertFalse(lines.hasNext()); } } }
@Override public void run() { while (true) { while (avroDataStream.hasNext()) { if (keepIndexing == false) { return;
private long getNumberOfRecordsFromStream(InputStream in) throws IOException { final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(); try (DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(in, datumReader)) { GenericRecord record = null; long recordsFromStream = 0; while (dataFileReader.hasNext()) { // Reuse record object by passing it to next(). This saves us from // allocating and garbage collecting many objects for files with // many items. record = dataFileReader.next(record); recordsFromStream += 1; } return recordsFromStream; } }
@Test public void testConvertToAvroStreamForUnsignedIntegerWithPrecision10() throws SQLException, IOException { final String mockColumnName = "t_int"; final ResultSetMetaData metadata = mock(ResultSetMetaData.class); when(metadata.getColumnCount()).thenReturn(1); when(metadata.getColumnType(1)).thenReturn(Types.INTEGER); when(metadata.isSigned(1)).thenReturn(false); when(metadata.getPrecision(1)).thenReturn(10); when(metadata.getColumnName(1)).thenReturn(mockColumnName); when(metadata.getTableName(1)).thenReturn("table"); final ResultSet rs = resultSetReturningMetadata(metadata); final Long ret = 0L; when(rs.getObject(Mockito.anyInt())).thenReturn(ret); final InputStream instream = convertResultSetToAvroInputStream(rs); final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(); try (final DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(instream, datumReader)) { GenericRecord record = null; while (dataFileReader.hasNext()) { record = dataFileReader.next(record); assertEquals(Long.toString(ret), record.get(mockColumnName).toString()); } } }
@Test public void testConvertToAvroStreamForShort() throws SQLException, IOException { final ResultSetMetaData metadata = mock(ResultSetMetaData.class); when(metadata.getColumnCount()).thenReturn(1); when(metadata.getColumnType(1)).thenReturn(Types.TINYINT); when(metadata.getColumnName(1)).thenReturn("t_int"); when(metadata.getTableName(1)).thenReturn("table"); final ResultSet rs = resultSetReturningMetadata(metadata); final short s = 25; when(rs.getObject(Mockito.anyInt())).thenReturn(s); final InputStream instream = convertResultSetToAvroInputStream(rs); final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(); try (final DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(instream, datumReader)) { GenericRecord record = null; while (dataFileReader.hasNext()) { record = dataFileReader.next(record); assertEquals(Short.toString(s), record.get("t_int").toString()); } } }
@Test public void testConvertToAvroStreamForUnsignedIntegerWithPrecision1ReturnedAsLong_NIFI5612() throws SQLException, IOException { final String mockColumnName = "t_int"; final ResultSetMetaData metadata = mock(ResultSetMetaData.class); when(metadata.getColumnCount()).thenReturn(1); when(metadata.getColumnType(1)).thenReturn(Types.INTEGER); when(metadata.isSigned(1)).thenReturn(false); when(metadata.getPrecision(1)).thenReturn(1); when(metadata.getColumnName(1)).thenReturn(mockColumnName); when(metadata.getTableName(1)).thenReturn("table"); final ResultSet rs = resultSetReturningMetadata(metadata); final Long ret = 0L; when(rs.getObject(Mockito.anyInt())).thenReturn(ret); final InputStream instream = convertResultSetToAvroInputStream(rs); final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(); try (final DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(instream, datumReader)) { GenericRecord record = null; while (dataFileReader.hasNext()) { record = dataFileReader.next(record); assertEquals(Long.toString(ret), record.get(mockColumnName).toString()); } } }
@Test public void testConvertToAvroStreamForNumbers() throws SQLException, IOException { final ResultSetMetaData metadata = mock(ResultSetMetaData.class); when(metadata.getColumnCount()).thenReturn(1); when(metadata.getColumnType(1)).thenReturn(testParams.sqlType); when(metadata.isSigned(1)).thenReturn(testParams.signed); when(metadata.getPrecision(1)).thenReturn(testParams.precision); when(metadata.getColumnName(1)).thenReturn("t_int"); when(metadata.getTableName(1)).thenReturn("table"); final ResultSet rs = resultSetReturningMetadata(metadata); final int ret = 0; when(rs.getObject(Mockito.anyInt())).thenReturn(ret); final InputStream instream = convertResultSetToAvroInputStream(rs); final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(); try (final DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(instream, datumReader)) { GenericRecord record = null; while (dataFileReader.hasNext()) { record = dataFileReader.next(record); assertEquals(Integer.toString(ret), record.get("t_int").toString()); } } } }
/** writes records with the given samplerate * The record at position offset is guaranteed to be taken*/ private long writeRecords(long count, double samplerate) throws IOException { long written = 0; while(written < count && reader.hasNext()) { reuse = reader.next(reuse); sampleCounter = sampleCounter + samplerate; if (sampleCounter >= 1) { writer.append(reuse); written++; sampleCounter--; } } totalCopied = totalCopied + written; if (written < count && hasNextInput()) { // goto next file nextInput(); written = written + writeRecords(count - written, samplerate); } return written; }
@Test public void testConvertToBytes() throws ClassNotFoundException, SQLException, IOException { final Statement st = con.createStatement(); st.executeUpdate("insert into restaurants values (1, 'Irifunes', 'San Mateo')"); st.executeUpdate("insert into restaurants values (2, 'Estradas', 'Daly City')"); st.executeUpdate("insert into restaurants values (3, 'Prime Rib House', 'San Francisco')"); final ResultSet resultSet = st.executeQuery("select R.*, ROW_NUMBER() OVER () as rownr from restaurants R"); final ByteArrayOutputStream outStream = new ByteArrayOutputStream(); JdbcCommon.convertToAvroStream(resultSet, outStream, false); final byte[] serializedBytes = outStream.toByteArray(); assertNotNull(serializedBytes); System.out.println("Avro serialized result size in bytes: " + serializedBytes.length); st.close(); // Deserialize bytes to records final InputStream instream = new ByteArrayInputStream(serializedBytes); final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(); try (final DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(instream, datumReader)) { GenericRecord record = null; while (dataFileReader.hasNext()) { // Reuse record object by passing it to next(). This saves us from // allocating and garbage collecting many objects for files with // many items. record = dataFileReader.next(record); System.out.println(record); } } }
try (final DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(instream, datumReader)) { GenericRecord record = null; while (dataFileReader.hasNext()) {
@Test public void testWrite() throws IOException { Schema writerSchema = Schema.create(Schema.Type.INT); GenericData dataModel = new ReflectData(); CodecFactory compressionCodec = CodecFactory.nullCodec(); ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); TaskAttemptContext context = createMock(TaskAttemptContext.class); replay(context); // Write an avro container file with two records: 1 and 2. AvroKeyRecordWriter<Integer> recordWriter = new AvroKeyRecordWriter<>( writerSchema, dataModel, compressionCodec, outputStream); recordWriter.write(new AvroKey<>(1), NullWritable.get()); recordWriter.write(new AvroKey<>(2), NullWritable.get()); recordWriter.close(context); verify(context); // Verify that the file was written as expected. InputStream inputStream = new ByteArrayInputStream(outputStream.toByteArray()); Schema readerSchema = Schema.create(Schema.Type.INT); DatumReader<Integer> datumReader = new SpecificDatumReader<>(readerSchema); DataFileStream<Integer> dataFileReader = new DataFileStream<>(inputStream, datumReader); assertTrue(dataFileReader.hasNext()); // Record 1. assertEquals(1, dataFileReader.next().intValue()); assertTrue(dataFileReader.hasNext()); // Record 2. assertEquals(2, dataFileReader.next().intValue()); assertFalse(dataFileReader.hasNext()); // No more records. dataFileReader.close(); }
@Test public void testWriteAndRead() throws IOException { Schema schema = Schema.create(Type.STRING); // Write it DataFileWriter<Utf8> w = new DataFileWriter<>(new GenericDatumWriter<>(schema)); w.setCodec(CodecFactory.deflateCodec(6)); ByteArrayOutputStream baos = new ByteArrayOutputStream(); w.create(schema, baos); w.append(new Utf8("hello world")); w.append(new Utf8("hello moon")); w.sync(); w.append(new Utf8("bye bye world")); w.append(new Utf8("bye bye moon")); w.close(); // Read it DataFileStream<Utf8> r = new DataFileStream<>( new ByteArrayInputStream(baos.toByteArray()), new GenericDatumReader<>(schema)); assertEquals("hello world", r.next().toString()); assertEquals("hello moon", r.next().toString()); assertEquals("bye bye world", r.next().toString()); assertEquals("bye bye moon", r.next().toString()); assertFalse(r.hasNext()); } }