/** {@inheritDoc} */ @Override public boolean nextKeyValue() throws IOException, InterruptedException { boolean hasNext = super.nextKeyValue(); mCurrentRecord.datum(getCurrentRecord()); return hasNext; }
/** {@inheritDoc} */ @Override public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { if (!(inputSplit instanceof FileSplit)) { throw new IllegalArgumentException("Only compatible with FileSplits."); } FileSplit fileSplit = (FileSplit) inputSplit; // Open a seekable input stream to the Avro container file. SeekableInput seekableFileInput = createSeekableInput(context.getConfiguration(), fileSplit.getPath()); // Wrap the seekable input stream in an Avro DataFileReader. Configuration conf = context.getConfiguration(); GenericData dataModel = AvroSerialization.createDataModel(conf); DatumReader<T> datumReader = dataModel.createDatumReader(mReaderSchema); mAvroFileReader = createAvroFileReader(seekableFileInput, datumReader); // Initialize the start and end offsets into the file based on the boundaries of the // input split we're responsible for. We will read the first block that begins // after the input split start boundary. We will read up to but not including the // first block that starts after input split end boundary. // Sync to the closest block/record boundary just after beginning of our input split. mAvroFileReader.sync(fileSplit.getStart()); // Initialize the start position to the beginning of the first block of the input split. mStartPosition = mAvroFileReader.previousSync(); // Initialize the end position to the end of the input split (this isn't necessarily // on a block boundary so using this for reporting progress will be approximate. mEndPosition = fileSplit.getStart() + fileSplit.getLength(); }
/** {@inheritDoc} */ @Override public boolean nextKeyValue() throws IOException, InterruptedException { boolean hasNext = super.nextKeyValue(); if (hasNext) { AvroKeyValue<K, V> avroKeyValue = new AvroKeyValue<>(getCurrentRecord()); mCurrentKey.datum(avroKeyValue.getKey()); mCurrentValue.datum(avroKeyValue.getValue()); } else { mCurrentKey.datum(null); mCurrentValue.datum(null); } return hasNext; }
/** {@inheritDoc} */ @Override public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { if (!(inputSplit instanceof FileSplit)) { throw new IllegalArgumentException("Only compatible with FileSplits."); } FileSplit fileSplit = (FileSplit) inputSplit; // Open a seekable input stream to the Avro container file. SeekableInput seekableFileInput = createSeekableInput(context.getConfiguration(), fileSplit.getPath()); // Wrap the seekable input stream in an Avro DataFileReader. Configuration conf = context.getConfiguration(); GenericData dataModel = AvroSerialization.createDataModel(conf); DatumReader<T> datumReader = dataModel.createDatumReader(mReaderSchema); mAvroFileReader = createAvroFileReader(seekableFileInput, datumReader); // Initialize the start and end offsets into the file based on the boundaries of the // input split we're responsible for. We will read the first block that begins // after the input split start boundary. We will read up to but not including the // first block that starts after input split end boundary. // Sync to the closest block/record boundary just after beginning of our input split. mAvroFileReader.sync(fileSplit.getStart()); // Initialize the start position to the beginning of the first block of the input split. mStartPosition = mAvroFileReader.previousSync(); // Initialize the end position to the end of the input split (this isn't necessarily // on a block boundary so using this for reporting progress will be approximate. mEndPosition = fileSplit.getStart() + fileSplit.getLength(); }
@Override public synchronized boolean nextKeyValue() throws IOException, InterruptedException { data.clear(); int i = 0; while (super.nextKeyValue()) { T tmp = getCurrentRecord(); data.add(new JSONObject(tmp.toString())); i++; if (i == batchSize) { break; } } return (!data.isEmpty()); }
/** {@inheritDoc} */ @Override public boolean nextKeyValue() throws IOException, InterruptedException { boolean hasNext = super.nextKeyValue(); mCurrentRecord.datum(getCurrentRecord()); return hasNext; }
/** {@inheritDoc} */ @Override public boolean nextKeyValue() throws IOException, InterruptedException { boolean hasNext = super.nextKeyValue(); if (hasNext) { AvroKeyValue<K, V> avroKeyValue = new AvroKeyValue<K, V>(getCurrentRecord()); mCurrentKey.datum(avroKeyValue.getKey()); mCurrentValue.datum(avroKeyValue.getValue()); } else { mCurrentKey.datum(null); mCurrentValue.datum(null); } return hasNext; }