org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch java code examples

@Override
public void write(KeyValue keyValue) throws IOException {
  rowIndex = batch.size++;
  VectorColumnFiller.fillRow(rowIndex, converters, schema, batch,
      gson.fromJson(new String(keyValue.getValue()),
          JsonObject.class));
  if (batch.size == batch.getMaxSize()) {
    writer.addRowBatch(batch);
    batch.reset();
  }
}

/**
 * Make a new (scratch) batch, which is exactly "like" the batch provided, except that it's empty
 * @param batch the batch to imitate
 * @return the new batch
 * @throws HiveException
 */
public static VectorizedRowBatch makeLike(VectorizedRowBatch batch) throws HiveException {
 VectorizedRowBatch newBatch = new VectorizedRowBatch(batch.numCols);
 for (int i = 0; i < batch.numCols; i++) {
  if (batch.cols[i] != null) {
   newBatch.cols[i] = makeLikeColumnVector(batch.cols[i]);
   newBatch.cols[i].init();
  }
 }
 newBatch.projectedColumns = Arrays.copyOf(batch.projectedColumns,
   batch.projectedColumns.length);
 newBatch.projectionSize = batch.projectionSize;
 newBatch.reset();
 return newBatch;
}

private void copyFromBase(VectorizedRowBatch value) {
 assert !isOriginal;
 if (isFlatPayload) {
  int payloadCol = includeAcidColumns ? OrcRecordUpdater.ROW : 0;
   // Ignore the struct column and just copy all the following data columns.
   System.arraycopy(vectorizedRowBatchBase.cols, payloadCol + 1, value.cols, 0,
     vectorizedRowBatchBase.cols.length - payloadCol - 1);
 } else {
  StructColumnVector payloadStruct =
    (StructColumnVector) vectorizedRowBatchBase.cols[OrcRecordUpdater.ROW];
  // Transfer columnVector objects from base batch to outgoing batch.
  System.arraycopy(payloadStruct.fields, 0, value.cols, 0, value.getDataColumnCount());
 }
 if (rowIdProjected) {
  recordIdColumnVector.fields[0] = vectorizedRowBatchBase.cols[OrcRecordUpdater.ORIGINAL_WRITEID];
  recordIdColumnVector.fields[1] = vectorizedRowBatchBase.cols[OrcRecordUpdater.BUCKET];
  recordIdColumnVector.fields[2] = vectorizedRowBatchBase.cols[OrcRecordUpdater.ROW_ID];
 }
}
private ColumnVector[] handleOriginalFile(

public Object deserialize(Writable writable) {
 final ArrowWrapperWritable arrowWrapperWritable = (ArrowWrapperWritable) writable;
 final VectorSchemaRoot vectorSchemaRoot = arrowWrapperWritable.getVectorSchemaRoot();
 final List<FieldVector> fieldVectors = vectorSchemaRoot.getFieldVectors();
 final int fieldCount = fieldVectors.size();
 final int rowCount = vectorSchemaRoot.getRowCount();
 vectorizedRowBatch.ensureSize(rowCount);
 if (rows == null || rows.length < rowCount ) {
  rows = new Object[rowCount][];
  for (int rowIndex = 0; rowIndex < rowCount; rowIndex++) {
   rows[rowIndex] = new Object[fieldCount];
  }
 }
 for (int fieldIndex = 0; fieldIndex < fieldCount; fieldIndex++) {
  final FieldVector fieldVector = fieldVectors.get(fieldIndex);
  final int projectedCol = vectorizedRowBatch.projectedColumns[fieldIndex];
  final ColumnVector columnVector = vectorizedRowBatch.cols[projectedCol];
  final TypeInfo typeInfo = serDe.rowTypeInfo.getAllStructFieldTypeInfos().get(fieldIndex);
  read(fieldVector, columnVector, typeInfo);
 }
 for (int rowIndex = 0; rowIndex < rowCount; rowIndex++) {
  vectorExtractRow.extractRow(vectorizedRowBatch, rowIndex, rows[rowIndex]);
 }
 vectorizedRowBatch.reset();
 return rows;
}

void flushInternalBatch() throws IOException {
 if (internalBatch.size != 0) {
  super.addRowBatch(internalBatch);
  internalBatch.reset();
 }
}

    Math.min(vectorizedTestingReducerBatchSize, batch.getMaxSize()) :
    batch.getMaxSize());
Preconditions.checkState(maxSize > 0);
int rowIdx = 0;
 batch.reset();
} catch (Exception e) {
 String rowString = null;
 try {
  rowString = batch.toString();
 } catch (Exception e2) {
  rowString = "[Error getting row data with exception "

/**
 * Reduce the batch size for a vectorized row batch
 */
public static void setBatchSize(VectorizedRowBatch batch, int size) {
 assert (size <= batch.getMaxSize());
 batch.size = size;
}

private VectorizedRowBatch newBufferedBatch(VectorizedRowBatch batch) throws HiveException {
 final int bufferedColumnCount = bufferedColumnMap.length;
 VectorizedRowBatch newBatch = new VectorizedRowBatch(bufferedColumnCount);
 for (int i = 0; i < bufferedColumnCount; i++) {
  newBatch.cols[i] =
    VectorizedBatchUtil.makeLikeColumnVector(batch.cols[bufferedColumnMap[i]]);
  newBatch.cols[i].init();
 }
 return newBatch;
}

VectorizedRowBatch result = new VectorizedRowBatch(totalColumnCount);
result.setPartitionInfo(dataColumnCount, partitionColumnCount);
result.reset();
return result;

@Override
public boolean nextBatch(VectorizedRowBatch batch) throws IOException {
 try {
  if (rowInStripe >= rowCountInStripe) {
   currentStripe += 1;
   if (currentStripe >= stripes.size()) {
    batch.size = 0;
    return false;
   }
   readStripe();
  }
  int batchSize = computeBatchSize(batch.getMaxSize());
  rowInStripe += batchSize;
  reader.setVectorColumnCount(batch.getDataColumnCount());
  reader.nextBatch(batch, batchSize);
  batch.selectedInUse = false;
  batch.size = batchSize;
  advanceToNextRow(reader, rowInStripe + rowBaseInStripe, true);
  return batch.size  != 0;
 } catch (IOException e) {
  // Rethrow exception with file name in log message
  throw new IOException("Error reading file: " + path, e);
 }
}

 this.destinationBatch = new VectorizedRowBatch(sourceIncludes.size());
 int inclBatchIx = 0;
 for (Integer columnId : sourceIncludes) {
  destinationBatch.cols[inclBatchIx++] = sourceBatch.cols[columnId];
 destinationBatch.setPartitionInfo(sourceIncludes.size(), 0);
} else {
 this.destinationBatch = sourceBatch;

VectorizedRowBatch inputVrb = new VectorizedRowBatch(
  acidColCount + 1 + vrb.getDataColumnCount());

/**
 * Fills an ORC batch into an array of Row.
 *
 * @param rows The batch of rows need to be filled.
 * @param schema The schema of the ORC data.
 * @param batch The ORC data.
 * @param selectedFields The list of selected ORC fields.
 * @return The number of rows that were filled.
 */
static int fillRows(Row[] rows, TypeDescription schema, VectorizedRowBatch batch, int[] selectedFields) {
  int rowsToRead = Math.min((int) batch.count(), rows.length);
  List<TypeDescription> fieldTypes = schema.getChildren();
  // read each selected field
  for (int fieldIdx = 0; fieldIdx < selectedFields.length; fieldIdx++) {
    int orcIdx = selectedFields[fieldIdx];
    readField(rows, fieldIdx, fieldTypes.get(orcIdx), batch.cols[orcIdx], rowsToRead);
  }
  return rowsToRead;
}

vectorizedRowBatch.setPartitionInfo(1, 0); // set data column count as 1.
long previousPayload = Long.MIN_VALUE;
while (vectorizedReader.next(null, vectorizedRowBatch)) {

@Override public boolean next(NullWritable nullWritable, VectorizedRowBatch vectorizedRowBatch) throws IOException {
 vectorizedRowBatch.reset();
 try {
  return readNextBatch(vectorizedRowBatch, recordsCursor) > 0;
 } catch (SerDeException e) {
  throw new IOException("Serde exception", e);
 }
}

final int maxSize = batch.getMaxSize();
Preconditions.checkState(maxSize > 0);
int rowIdx = 0;
  reducer.process(batch, tag);
 batch.reset();
} catch (Exception e) {
 String rowString = null;
 try {
  rowString = batch.toString();
 } catch (Exception e2) {
  rowString = "[Error getting row data with exception "

/**
 * Reduce the batch size for a vectorized row batch
 */
public static void setBatchSize(VectorizedRowBatch batch, int size) {
 assert (size <= batch.getMaxSize());
 batch.size = size;
}

public FakeDataReader(int size, int numCols) {
 this.size = size;
 this.numCols = numCols;
 vrg = new VectorizedRowBatch(numCols, len);
 for (int i = 0; i < numCols; i++) {
  try {
   Thread.sleep(2);
  } catch (InterruptedException ignore) {}
  vrg.cols[i] = getLongVector(len);
 }
}

final int totalColumnCount =
  nonScratchColumnCount + scratchColumnTypeNames.length;
VectorizedRowBatch result = new VectorizedRowBatch(totalColumnCount);
result.setPartitionInfo(dataColumnCount, partitionColumnCount);
result.reset();
return result;

@Override
public boolean nextBatch(VectorizedRowBatch batch) throws IOException {
 try {
  if (rowInStripe >= rowCountInStripe) {
   currentStripe += 1;
   if (currentStripe >= stripes.size()) {
    batch.size = 0;
    return false;
   }
   readStripe();
  }
  int batchSize = computeBatchSize(batch.getMaxSize());
  rowInStripe += batchSize;
  reader.setVectorColumnCount(batch.getDataColumnCount());
  reader.nextBatch(batch, batchSize);
  batch.selectedInUse = false;
  batch.size = batchSize;
  advanceToNextRow(reader, rowInStripe + rowBaseInStripe, true);
  return batch.size  != 0;
 } catch (IOException e) {
  // Rethrow exception with file name in log message
  throw new IOException("Error reading file: " + path, e);
 }
}

Javadoc

A VectorizedRowBatch is a set of rows, organized with each column as a vector. It is the unit of query execution, organized to minimize the cost per row and achieve high cycles-per-instruction. The major fields are public by design to allow fast and convenient access by the vectorized query execution code.

Most used methods

reset
Resets the row batch to default state - sets selectedInUse to false - sets size to 0 - sets endOfFil
getMaxSize
Returns the maximum size of the batch (number of rows it can hold)
<init>
Return a batch with the specified number of columns and rows. Only call this constructor directly fo
getDataColumnCount
setPartitionInfo
count
Return count of qualifying rows.
toString
appendVectorType
ensureSize
Set the maximum number of rows in the batch. Data is not preserved.
setValueWriters
stringify
toUTF8

Popular in Java

Start an intent from android
notifyDataSetChanged (ArrayAdapter)
setRequestProperty (URLConnection)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
InputStreamReader (java.io)
A class for turning a byte stream into a character stream. Data read from the source input stream is
BitSet (java.util)
The BitSet class implements abit array [http://en.wikipedia.org/wiki/Bit_array]. Each element is eit
TreeSet (java.util)
TreeSet is an implementation of SortedSet. All optional operations (adding and removing) are support
Component (java.awt)
A component is an object having a graphical representation that can be displayed on the screen and t
Annotation (javassist.bytecode.annotation)
The annotation structure.An instance of this class is returned bygetAnnotations() in AnnotationsAttr
Get (org.apache.hadoop.hbase.client)
Used to perform Get operations on a single row. To get everything for a row, instantiate a Get objec
From CI to AI: The AI layer in your organization

How to useVectorizedRowBatch in org.apache.hadoop.hive.ql.exec.vector

Best Java code snippets using org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch (Showing top 20 results out of 315)

How to use
VectorizedRowBatch
in
org.apache.hadoop.hive.ql.exec.vector