org.apache.hadoop.hive.ql.io.orc.RecordReader java code examples

void next(OrcStruct next) throws IOException {
 if (recordReader.hasNext()) {
  nextRecord = (OrcStruct) recordReader.next(next);
  // set the key
  key.setValues(OrcRecordUpdater.getOriginalTransaction(nextRecord),
    OrcRecordUpdater.getBucket(nextRecord),
    OrcRecordUpdater.getRowId(nextRecord),
    OrcRecordUpdater.getCurrentTransaction(nextRecord),
    statementId);
  // if this record is larger than maxKey, we need to stop
  if (maxKey != null && key.compareRow(maxKey) > 0) {
   LOG.debug("key " + key + " > maxkey " + maxKey);
   nextRecord = null;
   recordReader.close();
  }
 } else {
  nextRecord = null;
  recordReader.close();
 }
}

 @Override
 public boolean nextKeyValue() throws IOException, InterruptedException {
  if (reader.hasNext()) {
   reader.next(value);
   progress = reader.getProgress();
   return true;
  } else {
   return false;
  }
 }
}

@Override
public boolean next(NullWritable key, VectorizedRowBatch value) throws IOException {
 try {
  // Check and update partition cols if necessary. Ideally, this should be done
  // in CreateValue as the partition is constant per split. But since Hive uses
  // CombineHiveRecordReader and
  // as this does not call CreateValue for each new RecordReader it creates, this check is
  // required in next()
  if (addPartitionCols) {
   if (partitionValues != null) {
    rbCtx.addPartitionColsToBatch(value, partitionValues);
   }
   addPartitionCols = false;
  }
  if (!reader.nextBatch(value)) {
   return false;
  }
 } catch (Exception e) {
  throw new RuntimeException(e);
 }
 progress = reader.getProgress();
 return true;
}

private ArrayList<SampleRec> dumpBucket(Path orcFile) throws IOException {
 org.apache.hadoop.fs.FileSystem fs = org.apache.hadoop.fs.FileSystem.getLocal(new Configuration());
 Reader reader = OrcFile.createReader(orcFile,
     OrcFile.readerOptions(conf).filesystem(fs));
 RecordReader rows = reader.rows();
 StructObjectInspector inspector = (StructObjectInspector) reader
     .getObjectInspector();
 System.out.format("Found Bucket File : %s \n", orcFile.getName());
 ArrayList<SampleRec> result = new ArrayList<SampleRec>();
 while (rows.hasNext()) {
  Object row = rows.next(null);
  SampleRec rec = (SampleRec) deserializeDeltaFileRow(row, inspector)[5];
  result.add(rec);
 }
 return result;
}

@Override
void next(OrcStruct next) throws IOException {
 if (recordReader.hasNext()) {
  long nextRowId = recordReader.getRowNumber();
     new LongWritable(nextRowId));
   nextRecord.setFieldValue(OrcRecordUpdater.ROW,
     recordReader.next(null));
  } else {
   nextRecord = next;
     .set(nextRowId);
   nextRecord.setFieldValue(OrcRecordUpdater.ROW,
     recordReader.next(OrcRecordUpdater.getRow(next)));
   recordReader.close();
  recordReader.close();

assertEquals(reader.getContentLength(), currentOffset);
RecordReader rows = reader.rows();
assertEquals(0, rows.getRowNumber());
assertEquals(0.0, rows.getProgress(), 0.000001);
assertEquals(true, rows.hasNext());
row = (OrcStruct) rows.next(null);
assertEquals(1, rows.getRowNumber());
inspector = reader.getObjectInspector();
assertEquals("struct<time:timestamp,union:uniontype<int,string>,decimal:decimal(38,18)>",
assertEquals(new HiveDecimalWritable(HiveDecimal.create("12345678.6547456")),
  row.getFieldValue(2));
row = (OrcStruct) rows.next(row);
assertEquals(2, rows.getRowNumber());
assertEquals(new TimestampWritableV2(Timestamp.valueOf("2000-03-20 12:00:00.123456789")),
  row.getFieldValue(0));
assertEquals(new HiveDecimalWritable(HiveDecimal.create("-5643.234")),
  row.getFieldValue(2));
row = (OrcStruct) rows.next(row);
assertEquals(null, row.getFieldValue(0));
assertEquals(null, row.getFieldValue(1));
assertEquals(null, row.getFieldValue(2));
row = (OrcStruct) rows.next(row);
assertEquals(null, row.getFieldValue(0));
union = (OrcUnion) row.getFieldValue(1);
assertEquals(null, union.getObject());
assertEquals(null, row.getFieldValue(2));

private Reader createMockOriginalReader() throws IOException {
 Reader reader = Mockito.mock(Reader.class, settings);
 RecordReader recordReader = Mockito.mock(RecordReader.class, settings);
 OrcStruct row1 = createOriginalRow("first");
 OrcStruct row2 = createOriginalRow("second");
 OrcStruct row3 = createOriginalRow("third");
 OrcStruct row4 = createOriginalRow("fourth");
 OrcStruct row5 = createOriginalRow("fifth");
 Mockito.when(reader.rowsOptions(Mockito.any(Reader.Options.class), Mockito.any(HiveConf.class)))
   .thenReturn(recordReader);
 Mockito.when(recordReader.hasNext()).
   thenReturn(true, true, true, true, true, false);
 Mockito.when(recordReader.getRowNumber()).thenReturn(0L, 1L, 2L, 3L, 4L);
 Mockito.when(recordReader.next(null)).thenReturn(row1);
 Mockito.when(recordReader.next(row1)).thenReturn(row2);
 Mockito.when(recordReader.next(row2)).thenReturn(row3);
 Mockito.when(recordReader.next(row3)).thenReturn(row4);
 Mockito.when(recordReader.next(row4)).thenReturn(row5);
 return reader;
}

setRow(row5, OrcRecordUpdater.INSERT_OPERATION, 40, 50, 61, 140, "fifth");
Mockito.when(recordReader.hasNext()).
  thenReturn(true, true, true, true, true, false);
Mockito.when(recordReader.getProgress()).thenReturn(1.0f);
Mockito.when(recordReader.next(null)).thenReturn(row1, row4);
Mockito.when(recordReader.next(row1)).thenReturn(row2);
Mockito.when(recordReader.next(row2)).thenReturn(row3);
Mockito.when(recordReader.next(row3)).thenReturn(row5);
assertEquals(1.0, merger.getProgress(), 0.01);
merger.close();
Mockito.verify(rr).close();
Mockito.verify(rr).getProgress();

  .getMapKeyObjectInspector();
RecordReader rows = reader.rows();
Object row = rows.next(null);
assertNotNull(row);
assertEquals(true, rows.hasNext());
rows.seekToRow(7499);
row = rows.next(null);
assertEquals(true,
  bo.get(readerInspector.getStructFieldData(row, fields.get(0))));
assertEquals(false, rows.hasNext());
rows.close();

@Override
public void close() throws IOException {
 reader.close();
}

  rows.seekToRow(i - (COUNT - 1));
 rows.seekToRow(i);
 row = (OrcStruct) rows.next(row);
 BigRow expected = createRandomRow(intValues, doubleValues,
   stringValues, byteValues, words, i);
 compareList(expected.list, (List<OrcStruct>) row.getFieldValue(10));
rows.close();
Iterator<StripeInformation> stripeIterator =
 reader.getStripes().iterator();
  .range(offsetOfStripe2, offsetOfStripe4 - offsetOfStripe2)
  .include(columns));
rows.seekToRow(lastRowOfStripe2);
for(int i = 0; i < 2; ++i) {
 row = (OrcStruct) rows.next(row);
 BigRow expected = createRandomRow(intValues, doubleValues,
                  stringValues, byteValues, words,
 assertEquals(expected.string1, row.getFieldValue(8));
rows.close();

private static OrcFileInfo rewrite(RecordReader reader, Writer writer, BitSet rowsToDelete, int inputRowCount)
    throws IOException
{
  Object object = null;
  int row = 0;
  long rowCount = 0;
  long uncompressedSize = 0;
  row = rowsToDelete.nextClearBit(row);
  if (row < inputRowCount) {
    reader.seekToRow(row);
  }
  while (row < inputRowCount) {
    if (Thread.currentThread().isInterrupted()) {
      throw new InterruptedIOException();
    }
    // seekToRow() is extremely expensive
    if (reader.getRowNumber() < row) {
      reader.next(object);
      continue;
    }
    object = reader.next(object);
    writer.addRow(object);
    rowCount++;
    uncompressedSize += uncompressedSize(object);
    row = rowsToDelete.nextClearBit(row + 1);
  }
  return new OrcFileInfo(rowCount, uncompressedSize);
}

lcv = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[0]);
LongColumnVector future1 = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[1]);
assertEquals(true, rows.nextBatch(batch));
assertEquals(1000, batch.size);
assertEquals(true, future1.isRepeating);
  ((BytesColumnVector) batch.cols[2]).toString(r));
assertEquals(false, rows.nextBatch(batch));
rows.close();
lcv = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[0]);
future1 = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[1]);
assertEquals(true, rows.nextBatch(batch));
assertEquals(1000, batch.size);
assertEquals(true, future1.isRepeating);
 assertEquals("row " + r, r * 10001, lcv.vector[r]);
assertEquals(false, rows.nextBatch(batch));
rows.close();

@Override
public boolean next(NullWritable key, VectorizedRowBatch value) throws IOException {
 if (!reader.hasNext()) {
  return false;
 }
 try {
  // Check and update partition cols if necessary. Ideally, this should be done
  // in CreateValue as the partition is constant per split. But since Hive uses
  // CombineHiveRecordReader and
  // as this does not call CreateValue for each new RecordReader it creates, this check is
  // required in next()
  if (addPartitionCols) {
   rbCtx.addPartitionColsToBatch(value);
   addPartitionCols = false;
  }
  reader.nextBatch(value);
 } catch (Exception e) {
  throw new RuntimeException(e);
 }
 progress = reader.getProgress();
 return true;
}

rr.seekToRow(lastRow);
OrcStruct row = (OrcStruct) rr.next(null);
StructObjectInspector soi = (StructObjectInspector) reader.getObjectInspector();

Object row = rows.next(null);

protected int populateData() {
 try {
  final int numRowsPerBatch = (int) this.numRowsPerBatch;
  int outputIdx = 0;
  // Consume the left over records from previous iteration
  if (offset > 0 && offset < hiveBatch.size) {
   int toRead = Math.min(hiveBatch.size - offset, numRowsPerBatch - outputIdx);
   copy(offset, toRead, outputIdx);
   outputIdx += toRead;
   offset += toRead;
  }
  while (outputIdx < numRowsPerBatch && hiveOrcReader.hasNext()) {
   offset = 0;
   hiveOrcReader.nextBatch(hiveBatch);
   int toRead = Math.min(hiveBatch.size, numRowsPerBatch - outputIdx);
   copy(offset, toRead, outputIdx);
   outputIdx += toRead;
   offset = toRead;
  }
  return outputIdx;
 } catch (Throwable t) {
  throw createExceptionWithContext("Failed to read data from ORC file", t);
 }
}

@Test
public void emptyFile() throws Exception {
 ObjectInspector inspector;
 synchronized (TestOrcFile.class) {
  inspector = ObjectInspectorFactory.getReflectionObjectInspector
    (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
 }
 Writer writer = OrcFile.createWriter(testFilePath,
                    OrcFile.writerOptions(conf)
                    .inspector(inspector)
                    .stripeSize(1000)
                    .compress(CompressionKind.NONE)
                    .bufferSize(100));
 writer.close();
 Reader reader = OrcFile.createReader(testFilePath,
   OrcFile.readerOptions(conf).filesystem(fs));
 assertEquals(false, reader.rows().hasNext());
 assertEquals(CompressionKind.NONE, reader.getCompression());
 assertEquals(0, reader.getNumberOfRows());
 assertEquals(0, reader.getCompressionSize());
 assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
 assertEquals(3, reader.getContentLength());
 assertEquals(false, reader.getStripes().iterator().hasNext());
}

@Override
public float getProgress() throws IOException {
 //this is not likely to do the right thing for Compaction of "original" files when there are copyN files
 return baseReader == null ? 1 : baseReader.getProgress();
}

public DeleteReaderValue(Reader deleteDeltaReader, Reader.Options readerOptions, int bucket,
  ValidTxnList validTxnList) throws IOException {
 this.recordReader  = deleteDeltaReader.rowsOptions(readerOptions);
 this.bucketForSplit = bucket;
 this.batch = deleteDeltaReader.getSchema().createRowBatch();
 if (!recordReader.nextBatch(batch)) { // Read the first batch.
  this.batch = null; // Oh! the first batch itself was null. Close the reader.
 }
 this.indexPtrInBatch = 0;
 this.validTxnList = validTxnList;
}

Javadoc

A row-by-row iterator for ORC files.

Most used methods

next
Read the next row.
hasNext
Does the reader have more rows available.
close
Release the resources associated with the given reader.
getProgress
Get the progress of the reader through the rows.
getRowNumber
Get the row number of the row that will be returned by the following call to next().
nextBatch
Read the next row batch. The size of the batch to read cannot be controlled by the callers. Caller n
seekToRow
Seek to a particular row number.

Popular in Java

Updating database using SQL prepared statement
onRequestPermissionsResult (Fragment)
onCreateOptionsMenu (Activity)
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
HttpServer (com.sun.net.httpserver)
This class implements a simple HTTP server. A HttpServer is bound to an IP address and port number a
Format (java.text)
The base class for all formats. This is an abstract base class which specifies the protocol for clas
Callable (java.util.concurrent)
A task that returns a result and may throw an exception. Implementors define a single method with no
Executor (java.util.concurrent)
An object that executes submitted Runnable tasks. This interface provides a way of decoupling task s
Window (java.awt)
A Window object is a top-level window with no borders and no menubar. The default layout for a windo
JOptionPane (javax.swing)
Top Vim plugins

How to useRecordReader in org.apache.hadoop.hive.ql.io.orc

Best Java code snippets using org.apache.hadoop.hive.ql.io.orc.RecordReader (Showing top 20 results out of 315)

How to use
RecordReader
in
org.apache.hadoop.hive.ql.io.orc