org.apache.hadoop.hive.ql.io.orc.RecordReader.nextBatch java code examples

@Override
public boolean next(NullWritable key, VectorizedRowBatch value) throws IOException {
 try {
  // Check and update partition cols if necessary. Ideally, this should be done
  // in CreateValue as the partition is constant per split. But since Hive uses
  // CombineHiveRecordReader and
  // as this does not call CreateValue for each new RecordReader it creates, this check is
  // required in next()
  if (addPartitionCols) {
   if (partitionValues != null) {
    rbCtx.addPartitionColsToBatch(value, partitionValues);
   }
   addPartitionCols = false;
  }
  if (!reader.nextBatch(value)) {
   return false;
  }
 } catch (Exception e) {
  throw new RuntimeException(e);
 }
 progress = reader.getProgress();
 return true;
}

@Override
public boolean next(NullWritable key, VectorizedRowBatch value) throws IOException {
 try {
  // Check and update partition cols if necessary. Ideally, this should be done
  // in CreateValue as the partition is constant per split. But since Hive uses
  // CombineHiveRecordReader and
  // as this does not call CreateValue for each new RecordReader it creates, this check is
  // required in next()
  if (addPartitionCols) {
   if (partitionValues != null) {
    rbCtx.addPartitionColsToBatch(value, partitionValues);
   }
   addPartitionCols = false;
  }
  if (!reader.nextBatch(value)) {
   return false;
  }
 } catch (Exception e) {
  throw new RuntimeException(e);
 }
 progress = reader.getProgress();
 return true;
}

public DeleteReaderValue(Reader deleteDeltaReader, Reader.Options readerOptions, int bucket,
  ValidTxnList validTxnList) throws IOException {
 this.recordReader  = deleteDeltaReader.rowsOptions(readerOptions);
 this.bucketForSplit = bucket;
 this.batch = deleteDeltaReader.getSchema().createRowBatch();
 if (!recordReader.nextBatch(batch)) { // Read the first batch.
  this.batch = null; // Oh! the first batch itself was null. Close the reader.
 }
 this.indexPtrInBatch = 0;
 this.validTxnList = validTxnList;
}

if (indexPtrInBatch >= batch.size) {
 if (recordReader.nextBatch(batch)) {

if (!baseReader.nextBatch(vectorizedRowBatchBase)) {
 return false;

DeleteReaderValue(Reader deleteDeltaReader, Path deleteDeltaFile,
  Reader.Options readerOptions, int bucket, ValidWriteIdList validWriteIdList,
  boolean isBucketedTable, final JobConf conf,
  OrcRawRecordMerger.KeyInterval keyInterval, OrcSplit orcSplit)
  throws IOException {
 this.reader = deleteDeltaReader;
 this.deleteDeltaFile = deleteDeltaFile;
 this.recordReader  = deleteDeltaReader.rowsOptions(readerOptions, conf);
 this.bucketForSplit = bucket;
 final boolean useDecimal64ColumnVector = HiveConf.getVar(conf, ConfVars
  .HIVE_VECTORIZED_INPUT_FORMAT_SUPPORTS_ENABLED).equalsIgnoreCase("decimal_64");
 if (useDecimal64ColumnVector) {
  this.batch = deleteDeltaReader.getSchema().createRowBatchV2();
 } else {
  this.batch = deleteDeltaReader.getSchema().createRowBatch();
 }
 if (!recordReader.nextBatch(batch)) { // Read the first batch.
  this.batch = null; // Oh! the first batch itself was null. Close the reader.
 }
 this.indexPtrInBatch = 0;
 this.validWriteIdList = validWriteIdList;
 this.isBucketedTable = isBucketedTable;
 if(batch != null) {
  checkBucketId();//check 1st batch
 }
 this.keyInterval = keyInterval;
 this.orcSplit = orcSplit;
 this.numEvents = deleteDeltaReader.getNumberOfRows();
 LOG.debug("Num events stats({},x,x)", numEvents);
}

if (indexPtrInBatch >= batch.size) {
 if (recordReader.nextBatch(batch)) {
  checkBucketId();
  indexPtrInBatch = 0; // After reading the batch, reset the pointer to beginning.

lcv = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[0]);
LongColumnVector future1 = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[1]);
assertEquals(true, rows.nextBatch(batch));
assertEquals(1000, batch.size);
assertEquals(true, future1.isRepeating);
  ((BytesColumnVector) batch.cols[2]).toString(r));
assertEquals(false, rows.nextBatch(batch));
rows.close();
lcv = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[0]);
future1 = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[1]);
assertEquals(true, rows.nextBatch(batch));
assertEquals(1000, batch.size);
assertEquals(true, future1.isRepeating);
 assertEquals("row " + r, r * 10001, lcv.vector[r]);
assertEquals(false, rows.nextBatch(batch));
rows.close();

lcv = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[0]);
LongColumnVector future1 = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[1]);
assertEquals(true, rows.nextBatch(batch));
assertEquals(1000, batch.size);
assertEquals(true, future1.isRepeating);
   ((BytesColumnVector) batch.cols[2]).toString(r));
assertEquals(false, rows.nextBatch(batch));
rows.close();
lcv = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[0]);
future1 = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[1]);
assertEquals(true, rows.nextBatch(batch));
assertEquals(1000, batch.size);
assertEquals(true, future1.isRepeating);
 assertEquals("row " + r, r * 10001, lcv.vector[r]);
assertEquals(false, rows.nextBatch(batch));
rows.close();

@Override
public boolean next(NullWritable key, VectorizedRowBatch value) throws IOException {
 if (!reader.hasNext()) {
  return false;
 }
 try {
  // Check and update partition cols if necessary. Ideally, this should be done
  // in CreateValue as the partition is constant per split. But since Hive uses
  // CombineHiveRecordReader and
  // as this does not call CreateValue for each new RecordReader it creates, this check is
  // required in next()
  if (addPartitionCols) {
   rbCtx.addPartitionColsToBatch(value);
   addPartitionCols = false;
  }
  reader.nextBatch(value);
 } catch (Exception e) {
  throw new RuntimeException(e);
 }
 progress = reader.getProgress();
 return true;
}

protected int populateData() {
 try {
  final int numRowsPerBatch = (int) this.numRowsPerBatch;
  int outputIdx = 0;
  // Consume the left over records from previous iteration
  if (offset > 0 && offset < hiveBatch.size) {
   int toRead = Math.min(hiveBatch.size - offset, numRowsPerBatch - outputIdx);
   copy(offset, toRead, outputIdx);
   outputIdx += toRead;
   offset += toRead;
  }
  while (outputIdx < numRowsPerBatch && hiveOrcReader.hasNext()) {
   offset = 0;
   hiveOrcReader.nextBatch(hiveBatch);
   int toRead = Math.min(hiveBatch.size, numRowsPerBatch - outputIdx);
   copy(offset, toRead, outputIdx);
   outputIdx += toRead;
   offset = toRead;
  }
  return outputIdx;
 } catch (Throwable t) {
  throw createExceptionWithContext("Failed to read data from ORC file", t);
 }
}

long rowCount = thisStripe.getNumberOfRows();
while (rows != rowCount) {
 batch = perStripe.nextBatch(batch);  // read orc file stripes in vectorizedRowBatch
 long currentBatchRow = batch.count();
 int nrows = (int)currentBatchRow;

Javadoc

Read the next row batch. The size of the batch to read cannot be controlled by the callers. Caller need to look at VectorizedRowBatch.size of the retunred object to know the batch size read.

Popular methods of RecordReader

next
Read the next row.
hasNext
Does the reader have more rows available.
close
Release the resources associated with the given reader.
getProgress
Get the progress of the reader through the rows.
getRowNumber
Get the row number of the row that will be returned by the following call to next().
seekToRow
Seek to a particular row number.

Popular in Java

Parsing JSON documents to java classes using gson
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
setRequestProperty (URLConnection)
getSupportFragmentManager (FragmentActivity)
InputStreamReader (java.io)
A class for turning a byte stream into a character stream. Data read from the source input stream is
HttpURLConnection (java.net)
An URLConnection for HTTP (RFC 2616 [http://tools.ietf.org/html/rfc2616]) used to send and receive d
ByteBuffer (java.nio)
A buffer for bytes. A byte buffer can be created in either one of the following ways: * #allocate
Collection (java.util)
Collection is the root of the collection hierarchy. It defines operations on data collections and t
Hashtable (java.util)
A plug-in replacement for JDK1.5 java.util.Hashtable. This version is based on org.cliffc.high_scale
SortedSet (java.util)
SortedSet is a Set which iterates over its elements in a sorted order. The order is determined eithe
Best IntelliJ plugins

How to use nextBatchmethodin org.apache.hadoop.hive.ql.io.orc.RecordReader

Best Java code snippets using org.apache.hadoop.hive.ql.io.orc.RecordReader.nextBatch (Showing top 12 results out of 315)

How to use
nextBatch
method
in
org.apache.hadoop.hive.ql.io.orc.RecordReader