@Override public boolean next(NullWritable key, VectorizedRowBatch value) throws IOException { try { // Check and update partition cols if necessary. Ideally, this should be done // in CreateValue as the partition is constant per split. But since Hive uses // CombineHiveRecordReader and // as this does not call CreateValue for each new RecordReader it creates, this check is // required in next() if (addPartitionCols) { if (partitionValues != null) { rbCtx.addPartitionColsToBatch(value, partitionValues); } addPartitionCols = false; } if (!reader.nextBatch(value)) { return false; } } catch (Exception e) { throw new RuntimeException(e); } progress = reader.getProgress(); return true; }
@Override public boolean next(NullWritable key, VectorizedRowBatch value) throws IOException { try { // Check and update partition cols if necessary. Ideally, this should be done // in CreateValue as the partition is constant per split. But since Hive uses // CombineHiveRecordReader and // as this does not call CreateValue for each new RecordReader it creates, this check is // required in next() if (addPartitionCols) { if (partitionValues != null) { rbCtx.addPartitionColsToBatch(value, partitionValues); } addPartitionCols = false; } if (!reader.nextBatch(value)) { return false; } } catch (Exception e) { throw new RuntimeException(e); } progress = reader.getProgress(); return true; }
public DeleteReaderValue(Reader deleteDeltaReader, Reader.Options readerOptions, int bucket, ValidTxnList validTxnList) throws IOException { this.recordReader = deleteDeltaReader.rowsOptions(readerOptions); this.bucketForSplit = bucket; this.batch = deleteDeltaReader.getSchema().createRowBatch(); if (!recordReader.nextBatch(batch)) { // Read the first batch. this.batch = null; // Oh! the first batch itself was null. Close the reader. } this.indexPtrInBatch = 0; this.validTxnList = validTxnList; }
if (indexPtrInBatch >= batch.size) { if (recordReader.nextBatch(batch)) {
if (!baseReader.nextBatch(vectorizedRowBatchBase)) { return false;
DeleteReaderValue(Reader deleteDeltaReader, Path deleteDeltaFile, Reader.Options readerOptions, int bucket, ValidWriteIdList validWriteIdList, boolean isBucketedTable, final JobConf conf, OrcRawRecordMerger.KeyInterval keyInterval, OrcSplit orcSplit) throws IOException { this.reader = deleteDeltaReader; this.deleteDeltaFile = deleteDeltaFile; this.recordReader = deleteDeltaReader.rowsOptions(readerOptions, conf); this.bucketForSplit = bucket; final boolean useDecimal64ColumnVector = HiveConf.getVar(conf, ConfVars .HIVE_VECTORIZED_INPUT_FORMAT_SUPPORTS_ENABLED).equalsIgnoreCase("decimal_64"); if (useDecimal64ColumnVector) { this.batch = deleteDeltaReader.getSchema().createRowBatchV2(); } else { this.batch = deleteDeltaReader.getSchema().createRowBatch(); } if (!recordReader.nextBatch(batch)) { // Read the first batch. this.batch = null; // Oh! the first batch itself was null. Close the reader. } this.indexPtrInBatch = 0; this.validWriteIdList = validWriteIdList; this.isBucketedTable = isBucketedTable; if(batch != null) { checkBucketId();//check 1st batch } this.keyInterval = keyInterval; this.orcSplit = orcSplit; this.numEvents = deleteDeltaReader.getNumberOfRows(); LOG.debug("Num events stats({},x,x)", numEvents); }
if (indexPtrInBatch >= batch.size) { if (recordReader.nextBatch(batch)) { checkBucketId(); indexPtrInBatch = 0; // After reading the batch, reset the pointer to beginning.
lcv = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[0]); LongColumnVector future1 = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[1]); assertEquals(true, rows.nextBatch(batch)); assertEquals(1000, batch.size); assertEquals(true, future1.isRepeating); ((BytesColumnVector) batch.cols[2]).toString(r)); assertEquals(false, rows.nextBatch(batch)); rows.close(); lcv = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[0]); future1 = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[1]); assertEquals(true, rows.nextBatch(batch)); assertEquals(1000, batch.size); assertEquals(true, future1.isRepeating); assertEquals("row " + r, r * 10001, lcv.vector[r]); assertEquals(false, rows.nextBatch(batch)); rows.close();
lcv = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[0]); LongColumnVector future1 = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[1]); assertEquals(true, rows.nextBatch(batch)); assertEquals(1000, batch.size); assertEquals(true, future1.isRepeating); ((BytesColumnVector) batch.cols[2]).toString(r)); assertEquals(false, rows.nextBatch(batch)); rows.close(); lcv = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[0]); future1 = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[1]); assertEquals(true, rows.nextBatch(batch)); assertEquals(1000, batch.size); assertEquals(true, future1.isRepeating); assertEquals("row " + r, r * 10001, lcv.vector[r]); assertEquals(false, rows.nextBatch(batch)); rows.close();
@Override public boolean next(NullWritable key, VectorizedRowBatch value) throws IOException { if (!reader.hasNext()) { return false; } try { // Check and update partition cols if necessary. Ideally, this should be done // in CreateValue as the partition is constant per split. But since Hive uses // CombineHiveRecordReader and // as this does not call CreateValue for each new RecordReader it creates, this check is // required in next() if (addPartitionCols) { rbCtx.addPartitionColsToBatch(value); addPartitionCols = false; } reader.nextBatch(value); } catch (Exception e) { throw new RuntimeException(e); } progress = reader.getProgress(); return true; }
protected int populateData() { try { final int numRowsPerBatch = (int) this.numRowsPerBatch; int outputIdx = 0; // Consume the left over records from previous iteration if (offset > 0 && offset < hiveBatch.size) { int toRead = Math.min(hiveBatch.size - offset, numRowsPerBatch - outputIdx); copy(offset, toRead, outputIdx); outputIdx += toRead; offset += toRead; } while (outputIdx < numRowsPerBatch && hiveOrcReader.hasNext()) { offset = 0; hiveOrcReader.nextBatch(hiveBatch); int toRead = Math.min(hiveBatch.size, numRowsPerBatch - outputIdx); copy(offset, toRead, outputIdx); outputIdx += toRead; offset = toRead; } return outputIdx; } catch (Throwable t) { throw createExceptionWithContext("Failed to read data from ORC file", t); } }
long rowCount = thisStripe.getNumberOfRows(); while (rows != rowCount) { batch = perStripe.nextBatch(batch); // read orc file stripes in vectorizedRowBatch long currentBatchRow = batch.count(); int nrows = (int)currentBatchRow;