public RecordBatches toRecordBatches() { return new RecordBatches(batches.stream() .map(t -> RecordBatchHolder.newRecordBatchHolder(t, 0, t.getRecordCount())) .collect(Collectors.toList()) ); } }
/** * Helper method that creates an empty batch from schema in Arrow footer. * @return * @throws IOException */ private RecordBatchHolder getEmptyBatch() throws IOException { final FileStatus fileStatus = dfs.getFileStatus(path); final long len = fileStatus.getLen(); inputStream.seek(len - (MAGIC_STRING_LENGTH + FOOTER_OFFSET_SIZE)); final long footerOffset = inputStream.readLong(); // Read the footer inputStream.seek(footerOffset); ArrowFileFormat.ArrowFileFooter footer = ArrowFileFormat.ArrowFileFooter.parseDelimitedFrom(inputStream); BatchSchema footerSchema = BatchSchema.newBuilder().addSerializedFields(footer.getFieldList()).build(); final VectorContainer vectorContainer = new VectorContainer(); try (RollbackCloseable rollback = new RollbackCloseable()) { rollback.add(vectorContainer); for(Field field : footerSchema) { vectorContainer.add(TypeHelper.getNewVector(field, allocator)); } rollback.commit(); } catch (Exception e) { throw new IOException(e); } vectorContainer.setRecordCount(0); vectorContainer.buildSchema(); return newRecordBatchHolder(new RecordBatchData(vectorContainer, allocator), 0, 0); }
final int batchEnd = (int) Math.min(currentBatchCount, batchStart + remaining); final RecordBatchHolder batchHolder = newRecordBatchHolder( new RecordBatchData(vectorContainer, allocator), batchStart,
@Test public void testDataTrunc() throws Exception { Pair<? extends ValueVector, ResultVerifier> varChar1 = testVarCharVector(0, 0); Pair<? extends ValueVector, ResultVerifier> varChar2 = testVarCharVector(5, 5); Pair<? extends ValueVector, ResultVerifier> varChar3 = testVarCharVector(10, 10); Pair<? extends ValueVector, ResultVerifier> date1 = testDateMilliVector(0, 0); Pair<? extends ValueVector, ResultVerifier> date2 = testDateMilliVector(5, 5); Pair<? extends ValueVector, ResultVerifier> date3 = testDateMilliVector(10, 10); RecordBatchData batch1 = createRecordBatch(varChar1.getKey(), date1.getKey()); RecordBatchData batch2 = createRecordBatch(varChar2.getKey(), date2.getKey()); RecordBatchData batch3 = createRecordBatch(varChar3.getKey(), date3.getKey()); JobLoader jobLoader = mock(JobLoader.class); when(jobLoader.load(anyInt(), anyInt())).thenReturn( new RecordBatches(asList( newRecordBatchHolder(batch1, 0, 5), newRecordBatchHolder(batch2, 0, 5), newRecordBatchHolder(batch3, 0, 5) )) ); try (JobData dataInput = new JobDataWrapper(new JobDataImpl(jobLoader, TEST_JOB_ID))) { JobDataFragment truncDataInput = dataInput.truncate(10); DataPOJO dataOutput = OBJECT_MAPPER.readValue(OBJECT_MAPPER.writeValueAsString(truncDataInput), DataPOJO.class); assertEquals(truncDataInput.getColumns().toString(), dataOutput.getColumns().toString()); assertEquals(truncDataInput.getReturnedRowCount(), dataOutput.getReturnedRowCount()); varChar1.getValue().verify(dataOutput); varChar2.getValue().verify(dataOutput); date1.getValue().verify(dataOutput); date2.getValue().verify(dataOutput); } }
@Test public void testDataRange() throws Exception { Pair<? extends ValueVector, ResultVerifier> varChar1 = testVarCharVector(0, 0); Pair<? extends ValueVector, ResultVerifier> varChar2 = testVarCharVector(0, 5); Pair<? extends ValueVector, ResultVerifier> varChar3 = testVarCharVector(5, 10); Pair<? extends ValueVector, ResultVerifier> date1 = testDateMilliVector(0, 0); Pair<? extends ValueVector, ResultVerifier> date2 = testDateMilliVector(0, 5); Pair<? extends ValueVector, ResultVerifier> date3 = testDateMilliVector(5, 10); RecordBatchData batch1 = createRecordBatch(varChar1.getKey(), date1.getKey()); RecordBatchData batch2 = createRecordBatch(varChar2.getKey(), date2.getKey()); RecordBatchData batch3 = createRecordBatch(varChar3.getKey(), date3.getKey()); JobLoader jobLoader = mock(JobLoader.class); when(jobLoader.load(anyInt(), anyInt())).thenReturn( new RecordBatches(asList( newRecordBatchHolder(batch1, 0, 5), newRecordBatchHolder(batch2, 0, 5), newRecordBatchHolder(batch3, 0, 5) )) ); try (JobData dataInput = new JobDataWrapper(new JobDataImpl(jobLoader, TEST_JOB_ID))) { JobDataFragment rangeDataInput = dataInput.range(5, 10); DataPOJO dataOutput = OBJECT_MAPPER.readValue(OBJECT_MAPPER.writeValueAsString(rangeDataInput), DataPOJO.class); assertEquals(rangeDataInput.getColumns().toString(), dataOutput.getColumns().toString()); assertEquals(rangeDataInput.getReturnedRowCount(), dataOutput.getReturnedRowCount()); varChar2.getValue().verify(dataOutput); varChar3.getValue().verify(dataOutput); date2.getValue().verify(dataOutput); date3.getValue().verify(dataOutput); } }
private static com.dremio.dac.model.job.JobDataFragment createDataObject(ValueVector... vv) { RecordBatchData batch = createRecordBatch(vv); return new JobDataFragmentWrapper(0, new JobDataFragmentImpl( new RecordBatches(asList(newRecordBatchHolder(batch, 0, batch.getRecordCount()))), 0, TEST_JOB_ID)); }
RecordBatchData data2 = createRecordBatch(varChar2.getKey(), date2.getKey()); RecordBatchData data3 = createRecordBatch(varChar3.getKey(), date3.getKey()); recordBatches.add(newRecordBatchHolder(data1, 0, data1.getRecordCount())); recordBatches.add(newRecordBatchHolder(data2, 0, data2.getRecordCount())); recordBatches.add(newRecordBatchHolder(data3, 0, data3.getRecordCount())); when(jobLoader.load(anyInt(), anyInt())).thenReturn( new RecordBatches(asList( newRecordBatchHolder(data1, 2, 5), newRecordBatchHolder(data2, 1, 3), newRecordBatchHolder(data3, 0, 4) )) );