@Override public void write(KeyValue keyValue) throws IOException { rowIndex = batch.size++; VectorColumnFiller.fillRow(rowIndex, converters, schema, batch, gson.fromJson(new String(keyValue.getValue()), JsonObject.class)); if (batch.size == batch.getMaxSize()) { writer.addRowBatch(batch); batch.reset(); } }
/** * Make a new (scratch) batch, which is exactly "like" the batch provided, except that it's empty * @param batch the batch to imitate * @return the new batch * @throws HiveException */ public static VectorizedRowBatch makeLike(VectorizedRowBatch batch) throws HiveException { VectorizedRowBatch newBatch = new VectorizedRowBatch(batch.numCols); for (int i = 0; i < batch.numCols; i++) { if (batch.cols[i] != null) { newBatch.cols[i] = makeLikeColumnVector(batch.cols[i]); newBatch.cols[i].init(); } } newBatch.projectedColumns = Arrays.copyOf(batch.projectedColumns, batch.projectedColumns.length); newBatch.projectionSize = batch.projectionSize; newBatch.reset(); return newBatch; }
private void copyFromBase(VectorizedRowBatch value) { assert !isOriginal; if (isFlatPayload) { int payloadCol = includeAcidColumns ? OrcRecordUpdater.ROW : 0; // Ignore the struct column and just copy all the following data columns. System.arraycopy(vectorizedRowBatchBase.cols, payloadCol + 1, value.cols, 0, vectorizedRowBatchBase.cols.length - payloadCol - 1); } else { StructColumnVector payloadStruct = (StructColumnVector) vectorizedRowBatchBase.cols[OrcRecordUpdater.ROW]; // Transfer columnVector objects from base batch to outgoing batch. System.arraycopy(payloadStruct.fields, 0, value.cols, 0, value.getDataColumnCount()); } if (rowIdProjected) { recordIdColumnVector.fields[0] = vectorizedRowBatchBase.cols[OrcRecordUpdater.ORIGINAL_WRITEID]; recordIdColumnVector.fields[1] = vectorizedRowBatchBase.cols[OrcRecordUpdater.BUCKET]; recordIdColumnVector.fields[2] = vectorizedRowBatchBase.cols[OrcRecordUpdater.ROW_ID]; } } private ColumnVector[] handleOriginalFile(
public Object deserialize(Writable writable) { final ArrowWrapperWritable arrowWrapperWritable = (ArrowWrapperWritable) writable; final VectorSchemaRoot vectorSchemaRoot = arrowWrapperWritable.getVectorSchemaRoot(); final List<FieldVector> fieldVectors = vectorSchemaRoot.getFieldVectors(); final int fieldCount = fieldVectors.size(); final int rowCount = vectorSchemaRoot.getRowCount(); vectorizedRowBatch.ensureSize(rowCount); if (rows == null || rows.length < rowCount ) { rows = new Object[rowCount][]; for (int rowIndex = 0; rowIndex < rowCount; rowIndex++) { rows[rowIndex] = new Object[fieldCount]; } } for (int fieldIndex = 0; fieldIndex < fieldCount; fieldIndex++) { final FieldVector fieldVector = fieldVectors.get(fieldIndex); final int projectedCol = vectorizedRowBatch.projectedColumns[fieldIndex]; final ColumnVector columnVector = vectorizedRowBatch.cols[projectedCol]; final TypeInfo typeInfo = serDe.rowTypeInfo.getAllStructFieldTypeInfos().get(fieldIndex); read(fieldVector, columnVector, typeInfo); } for (int rowIndex = 0; rowIndex < rowCount; rowIndex++) { vectorExtractRow.extractRow(vectorizedRowBatch, rowIndex, rows[rowIndex]); } vectorizedRowBatch.reset(); return rows; }
void flushInternalBatch() throws IOException { if (internalBatch.size != 0) { super.addRowBatch(internalBatch); internalBatch.reset(); } }
Math.min(vectorizedTestingReducerBatchSize, batch.getMaxSize()) : batch.getMaxSize()); Preconditions.checkState(maxSize > 0); int rowIdx = 0; batch.reset(); } catch (Exception e) { String rowString = null; try { rowString = batch.toString(); } catch (Exception e2) { rowString = "[Error getting row data with exception "
/** * Reduce the batch size for a vectorized row batch */ public static void setBatchSize(VectorizedRowBatch batch, int size) { assert (size <= batch.getMaxSize()); batch.size = size; }
private VectorizedRowBatch newBufferedBatch(VectorizedRowBatch batch) throws HiveException { final int bufferedColumnCount = bufferedColumnMap.length; VectorizedRowBatch newBatch = new VectorizedRowBatch(bufferedColumnCount); for (int i = 0; i < bufferedColumnCount; i++) { newBatch.cols[i] = VectorizedBatchUtil.makeLikeColumnVector(batch.cols[bufferedColumnMap[i]]); newBatch.cols[i].init(); } return newBatch; }
VectorizedRowBatch result = new VectorizedRowBatch(totalColumnCount); result.setPartitionInfo(dataColumnCount, partitionColumnCount); result.reset(); return result;
@Override public boolean nextBatch(VectorizedRowBatch batch) throws IOException { try { if (rowInStripe >= rowCountInStripe) { currentStripe += 1; if (currentStripe >= stripes.size()) { batch.size = 0; return false; } readStripe(); } int batchSize = computeBatchSize(batch.getMaxSize()); rowInStripe += batchSize; reader.setVectorColumnCount(batch.getDataColumnCount()); reader.nextBatch(batch, batchSize); batch.selectedInUse = false; batch.size = batchSize; advanceToNextRow(reader, rowInStripe + rowBaseInStripe, true); return batch.size != 0; } catch (IOException e) { // Rethrow exception with file name in log message throw new IOException("Error reading file: " + path, e); } }
this.destinationBatch = new VectorizedRowBatch(sourceIncludes.size()); int inclBatchIx = 0; for (Integer columnId : sourceIncludes) { destinationBatch.cols[inclBatchIx++] = sourceBatch.cols[columnId]; destinationBatch.setPartitionInfo(sourceIncludes.size(), 0); } else { this.destinationBatch = sourceBatch;
VectorizedRowBatch inputVrb = new VectorizedRowBatch( acidColCount + 1 + vrb.getDataColumnCount());
/** * Fills an ORC batch into an array of Row. * * @param rows The batch of rows need to be filled. * @param schema The schema of the ORC data. * @param batch The ORC data. * @param selectedFields The list of selected ORC fields. * @return The number of rows that were filled. */ static int fillRows(Row[] rows, TypeDescription schema, VectorizedRowBatch batch, int[] selectedFields) { int rowsToRead = Math.min((int) batch.count(), rows.length); List<TypeDescription> fieldTypes = schema.getChildren(); // read each selected field for (int fieldIdx = 0; fieldIdx < selectedFields.length; fieldIdx++) { int orcIdx = selectedFields[fieldIdx]; readField(rows, fieldIdx, fieldTypes.get(orcIdx), batch.cols[orcIdx], rowsToRead); } return rowsToRead; }
vectorizedRowBatch.setPartitionInfo(1, 0); // set data column count as 1. long previousPayload = Long.MIN_VALUE; while (vectorizedReader.next(null, vectorizedRowBatch)) {
@Override public boolean next(NullWritable nullWritable, VectorizedRowBatch vectorizedRowBatch) throws IOException { vectorizedRowBatch.reset(); try { return readNextBatch(vectorizedRowBatch, recordsCursor) > 0; } catch (SerDeException e) { throw new IOException("Serde exception", e); } }
final int maxSize = batch.getMaxSize(); Preconditions.checkState(maxSize > 0); int rowIdx = 0; reducer.process(batch, tag); batch.reset(); } catch (Exception e) { String rowString = null; try { rowString = batch.toString(); } catch (Exception e2) { rowString = "[Error getting row data with exception "
/** * Reduce the batch size for a vectorized row batch */ public static void setBatchSize(VectorizedRowBatch batch, int size) { assert (size <= batch.getMaxSize()); batch.size = size; }
public FakeDataReader(int size, int numCols) { this.size = size; this.numCols = numCols; vrg = new VectorizedRowBatch(numCols, len); for (int i = 0; i < numCols; i++) { try { Thread.sleep(2); } catch (InterruptedException ignore) {} vrg.cols[i] = getLongVector(len); } }
final int totalColumnCount = nonScratchColumnCount + scratchColumnTypeNames.length; VectorizedRowBatch result = new VectorizedRowBatch(totalColumnCount); result.setPartitionInfo(dataColumnCount, partitionColumnCount); result.reset(); return result;
@Override public boolean nextBatch(VectorizedRowBatch batch) throws IOException { try { if (rowInStripe >= rowCountInStripe) { currentStripe += 1; if (currentStripe >= stripes.size()) { batch.size = 0; return false; } readStripe(); } int batchSize = computeBatchSize(batch.getMaxSize()); rowInStripe += batchSize; reader.setVectorColumnCount(batch.getDataColumnCount()); reader.nextBatch(batch, batchSize); batch.selectedInUse = false; batch.size = batchSize; advanceToNextRow(reader, rowInStripe + rowBaseInStripe, true); return batch.size != 0; } catch (IOException e) { // Rethrow exception with file name in log message throw new IOException("Error reading file: " + path, e); } }