void next(OrcStruct next) throws IOException { if (recordReader.hasNext()) { nextRecord = (OrcStruct) recordReader.next(next); // set the key key.setValues(OrcRecordUpdater.getOriginalTransaction(nextRecord), OrcRecordUpdater.getBucket(nextRecord), OrcRecordUpdater.getRowId(nextRecord), OrcRecordUpdater.getCurrentTransaction(nextRecord), statementId); // if this record is larger than maxKey, we need to stop if (maxKey != null && key.compareRow(maxKey) > 0) { LOG.debug("key " + key + " > maxkey " + maxKey); nextRecord = null; recordReader.close(); } } else { nextRecord = null; recordReader.close(); } }
@Override public boolean nextKeyValue() throws IOException, InterruptedException { if (reader.hasNext()) { reader.next(value); progress = reader.getProgress(); return true; } else { return false; } } }
@Override public boolean next(NullWritable key, VectorizedRowBatch value) throws IOException { try { // Check and update partition cols if necessary. Ideally, this should be done // in CreateValue as the partition is constant per split. But since Hive uses // CombineHiveRecordReader and // as this does not call CreateValue for each new RecordReader it creates, this check is // required in next() if (addPartitionCols) { if (partitionValues != null) { rbCtx.addPartitionColsToBatch(value, partitionValues); } addPartitionCols = false; } if (!reader.nextBatch(value)) { return false; } } catch (Exception e) { throw new RuntimeException(e); } progress = reader.getProgress(); return true; }
private ArrayList<SampleRec> dumpBucket(Path orcFile) throws IOException { org.apache.hadoop.fs.FileSystem fs = org.apache.hadoop.fs.FileSystem.getLocal(new Configuration()); Reader reader = OrcFile.createReader(orcFile, OrcFile.readerOptions(conf).filesystem(fs)); RecordReader rows = reader.rows(); StructObjectInspector inspector = (StructObjectInspector) reader .getObjectInspector(); System.out.format("Found Bucket File : %s \n", orcFile.getName()); ArrayList<SampleRec> result = new ArrayList<SampleRec>(); while (rows.hasNext()) { Object row = rows.next(null); SampleRec rec = (SampleRec) deserializeDeltaFileRow(row, inspector)[5]; result.add(rec); } return result; }
@Override void next(OrcStruct next) throws IOException { if (recordReader.hasNext()) { long nextRowId = recordReader.getRowNumber(); new LongWritable(nextRowId)); nextRecord.setFieldValue(OrcRecordUpdater.ROW, recordReader.next(null)); } else { nextRecord = next; .set(nextRowId); nextRecord.setFieldValue(OrcRecordUpdater.ROW, recordReader.next(OrcRecordUpdater.getRow(next))); recordReader.close(); recordReader.close();
assertEquals(reader.getContentLength(), currentOffset); RecordReader rows = reader.rows(); assertEquals(0, rows.getRowNumber()); assertEquals(0.0, rows.getProgress(), 0.000001); assertEquals(true, rows.hasNext()); row = (OrcStruct) rows.next(null); assertEquals(1, rows.getRowNumber()); inspector = reader.getObjectInspector(); assertEquals("struct<time:timestamp,union:uniontype<int,string>,decimal:decimal(38,18)>", assertEquals(new HiveDecimalWritable(HiveDecimal.create("12345678.6547456")), row.getFieldValue(2)); row = (OrcStruct) rows.next(row); assertEquals(2, rows.getRowNumber()); assertEquals(new TimestampWritableV2(Timestamp.valueOf("2000-03-20 12:00:00.123456789")), row.getFieldValue(0)); assertEquals(new HiveDecimalWritable(HiveDecimal.create("-5643.234")), row.getFieldValue(2)); row = (OrcStruct) rows.next(row); assertEquals(null, row.getFieldValue(0)); assertEquals(null, row.getFieldValue(1)); assertEquals(null, row.getFieldValue(2)); row = (OrcStruct) rows.next(row); assertEquals(null, row.getFieldValue(0)); union = (OrcUnion) row.getFieldValue(1); assertEquals(null, union.getObject()); assertEquals(null, row.getFieldValue(2));
private Reader createMockOriginalReader() throws IOException { Reader reader = Mockito.mock(Reader.class, settings); RecordReader recordReader = Mockito.mock(RecordReader.class, settings); OrcStruct row1 = createOriginalRow("first"); OrcStruct row2 = createOriginalRow("second"); OrcStruct row3 = createOriginalRow("third"); OrcStruct row4 = createOriginalRow("fourth"); OrcStruct row5 = createOriginalRow("fifth"); Mockito.when(reader.rowsOptions(Mockito.any(Reader.Options.class), Mockito.any(HiveConf.class))) .thenReturn(recordReader); Mockito.when(recordReader.hasNext()). thenReturn(true, true, true, true, true, false); Mockito.when(recordReader.getRowNumber()).thenReturn(0L, 1L, 2L, 3L, 4L); Mockito.when(recordReader.next(null)).thenReturn(row1); Mockito.when(recordReader.next(row1)).thenReturn(row2); Mockito.when(recordReader.next(row2)).thenReturn(row3); Mockito.when(recordReader.next(row3)).thenReturn(row4); Mockito.when(recordReader.next(row4)).thenReturn(row5); return reader; }
setRow(row5, OrcRecordUpdater.INSERT_OPERATION, 40, 50, 61, 140, "fifth"); Mockito.when(recordReader.hasNext()). thenReturn(true, true, true, true, true, false); Mockito.when(recordReader.getProgress()).thenReturn(1.0f); Mockito.when(recordReader.next(null)).thenReturn(row1, row4); Mockito.when(recordReader.next(row1)).thenReturn(row2); Mockito.when(recordReader.next(row2)).thenReturn(row3); Mockito.when(recordReader.next(row3)).thenReturn(row5); assertEquals(1.0, merger.getProgress(), 0.01); merger.close(); Mockito.verify(rr).close(); Mockito.verify(rr).getProgress();
.getMapKeyObjectInspector(); RecordReader rows = reader.rows(); Object row = rows.next(null); assertNotNull(row); assertEquals(true, rows.hasNext()); rows.seekToRow(7499); row = rows.next(null); assertEquals(true, bo.get(readerInspector.getStructFieldData(row, fields.get(0)))); assertEquals(false, rows.hasNext()); rows.close();
@Override public void close() throws IOException { reader.close(); }
rows.seekToRow(i - (COUNT - 1)); rows.seekToRow(i); row = (OrcStruct) rows.next(row); BigRow expected = createRandomRow(intValues, doubleValues, stringValues, byteValues, words, i); compareList(expected.list, (List<OrcStruct>) row.getFieldValue(10)); rows.close(); Iterator<StripeInformation> stripeIterator = reader.getStripes().iterator(); .range(offsetOfStripe2, offsetOfStripe4 - offsetOfStripe2) .include(columns)); rows.seekToRow(lastRowOfStripe2); for(int i = 0; i < 2; ++i) { row = (OrcStruct) rows.next(row); BigRow expected = createRandomRow(intValues, doubleValues, stringValues, byteValues, words, assertEquals(expected.string1, row.getFieldValue(8)); rows.close();
private static OrcFileInfo rewrite(RecordReader reader, Writer writer, BitSet rowsToDelete, int inputRowCount) throws IOException { Object object = null; int row = 0; long rowCount = 0; long uncompressedSize = 0; row = rowsToDelete.nextClearBit(row); if (row < inputRowCount) { reader.seekToRow(row); } while (row < inputRowCount) { if (Thread.currentThread().isInterrupted()) { throw new InterruptedIOException(); } // seekToRow() is extremely expensive if (reader.getRowNumber() < row) { reader.next(object); continue; } object = reader.next(object); writer.addRow(object); rowCount++; uncompressedSize += uncompressedSize(object); row = rowsToDelete.nextClearBit(row + 1); } return new OrcFileInfo(rowCount, uncompressedSize); }
lcv = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[0]); LongColumnVector future1 = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[1]); assertEquals(true, rows.nextBatch(batch)); assertEquals(1000, batch.size); assertEquals(true, future1.isRepeating); ((BytesColumnVector) batch.cols[2]).toString(r)); assertEquals(false, rows.nextBatch(batch)); rows.close(); lcv = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[0]); future1 = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[1]); assertEquals(true, rows.nextBatch(batch)); assertEquals(1000, batch.size); assertEquals(true, future1.isRepeating); assertEquals("row " + r, r * 10001, lcv.vector[r]); assertEquals(false, rows.nextBatch(batch)); rows.close();
@Override public boolean next(NullWritable key, VectorizedRowBatch value) throws IOException { if (!reader.hasNext()) { return false; } try { // Check and update partition cols if necessary. Ideally, this should be done // in CreateValue as the partition is constant per split. But since Hive uses // CombineHiveRecordReader and // as this does not call CreateValue for each new RecordReader it creates, this check is // required in next() if (addPartitionCols) { rbCtx.addPartitionColsToBatch(value); addPartitionCols = false; } reader.nextBatch(value); } catch (Exception e) { throw new RuntimeException(e); } progress = reader.getProgress(); return true; }
Object row = rows.next(null);
protected int populateData() { try { final int numRowsPerBatch = (int) this.numRowsPerBatch; int outputIdx = 0; // Consume the left over records from previous iteration if (offset > 0 && offset < hiveBatch.size) { int toRead = Math.min(hiveBatch.size - offset, numRowsPerBatch - outputIdx); copy(offset, toRead, outputIdx); outputIdx += toRead; offset += toRead; } while (outputIdx < numRowsPerBatch && hiveOrcReader.hasNext()) { offset = 0; hiveOrcReader.nextBatch(hiveBatch); int toRead = Math.min(hiveBatch.size, numRowsPerBatch - outputIdx); copy(offset, toRead, outputIdx); outputIdx += toRead; offset = toRead; } return outputIdx; } catch (Throwable t) { throw createExceptionWithContext("Failed to read data from ORC file", t); } }
@Test public void emptyFile() throws Exception { ObjectInspector inspector; synchronized (TestOrcFile.class) { inspector = ObjectInspectorFactory.getReflectionObjectInspector (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(1000) .compress(CompressionKind.NONE) .bufferSize(100)); writer.close(); Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs)); assertEquals(false, reader.rows().hasNext()); assertEquals(CompressionKind.NONE, reader.getCompression()); assertEquals(0, reader.getNumberOfRows()); assertEquals(0, reader.getCompressionSize()); assertEquals(false, reader.getMetadataKeys().iterator().hasNext()); assertEquals(3, reader.getContentLength()); assertEquals(false, reader.getStripes().iterator().hasNext()); }
@Override public float getProgress() throws IOException { //this is not likely to do the right thing for Compaction of "original" files when there are copyN files return baseReader == null ? 1 : baseReader.getProgress(); }
public DeleteReaderValue(Reader deleteDeltaReader, Reader.Options readerOptions, int bucket, ValidTxnList validTxnList) throws IOException { this.recordReader = deleteDeltaReader.rowsOptions(readerOptions); this.bucketForSplit = bucket; this.batch = deleteDeltaReader.getSchema().createRowBatch(); if (!recordReader.nextBatch(batch)) { // Read the first batch. this.batch = null; // Oh! the first batch itself was null. Close the reader. } this.indexPtrInBatch = 0; this.validTxnList = validTxnList; }