@Test public void emptyFile() throws Exception { ObjectInspector inspector; synchronized (TestOrcFile.class) { inspector = ObjectInspectorFactory.getReflectionObjectInspector (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(1000) .compress(CompressionKind.NONE) .bufferSize(100)); writer.close(); Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs)); assertEquals(false, reader.rows().hasNext()); assertEquals(CompressionKind.NONE, reader.getCompression()); assertEquals(0, reader.getNumberOfRows()); assertEquals(0, reader.getCompressionSize()); assertEquals(false, reader.getMetadataKeys().iterator().hasNext()); assertEquals(3, reader.getContentLength()); assertEquals(false, reader.getStripes().iterator().hasNext()); }
OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(1000) .compress(CompressionKind.SNAPPY) .bufferSize(100));
OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(5000) .compress(CompressionKind.SNAPPY) .bufferSize(1000)
OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(1000) .compress(CompressionKind.NONE) .bufferSize(100));
if (!options.isWritingBase()) { opts.bufferSize(OrcRecordUpdater.DELTA_BUFFER_SIZE) .stripeSize(OrcRecordUpdater.DELTA_STRIPE_SIZE) .blockPadding(false) .compress(CompressionKind.NONE)
.memory(getMemoryManager(conf)) .inspector(inspector) .stripeSize(stripeSize) .bufferSize(bufferSize) .compress(compress);
.compress(CompressionKind.NONE) .callback(indexBuilder) .stripeSize(128));
OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(100000) .bufferSize(10000) .blockPadding(false));
OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000).bufferSize(10000) .version(OrcFile.Version.V_0_11)); List<Timestamp> tslist = Lists.newArrayList();
.inspector(inspector) .fileSystem(fs) .stripeSize(stripeSize) .compress(compress) .bufferSize(bufferSize)
.inspector(inspector) .compress(CompressionKind.NONE) .stripeSize(50000) .bufferSize(100) .rowIndexStride(0)
if (!options.isWritingBase()) { opts.bufferSize(OrcRecordUpdater.DELTA_BUFFER_SIZE) .stripeSize(OrcRecordUpdater.DELTA_STRIPE_SIZE) .blockPadding(false); if(!MetastoreConf.getBoolVar(options.getConfiguration(),
.inspector(inspector) .compress(CompressionKind.NONE) .stripeSize(50000) .bufferSize(100) .rowIndexStride(0)
OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(10000) .bufferSize(10000)); for (int row = 0; row < 1000; row++) {
.inspector(inspector) .fileSystem(fs) .stripeSize(stripeSize) .compress(compress) .bufferSize(bufferSize)
OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000).bufferSize(10000));
@Test public void testOrcSerDeStatsMap() throws Exception { ObjectInspector inspector; synchronized (TestOrcSerDeStats.class) { inspector = ObjectInspectorFactory.getReflectionObjectInspector (MapStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(10000) .bufferSize(10000)); for (int row = 0; row < 1000; row++) { Map<String, Double> test = new HashMap<String, Double>(); for (int i = 0; i < 10; i++) { test.put("hi" + i, 2.0); } writer.addRow(new MapStruct(test)); } writer.close(); // stats from writer assertEquals(1000, writer.getNumberOfRows()); assertEquals(950000, writer.getRawDataSize()); Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs)); // stats from reader assertEquals(1000, reader.getNumberOfRows()); assertEquals(950000, reader.getRawDataSize()); assertEquals(950000, reader.getRawDataSizeOfColumns(Lists.newArrayList("map1"))); }
@Test public void testOrcSerDeStatsList() throws Exception { ObjectInspector inspector; synchronized (TestOrcSerDeStats.class) { inspector = ObjectInspectorFactory.getReflectionObjectInspector (ListStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(10000) .bufferSize(10000)); for (int row = 0; row < 5000; row++) { List<String> test = new ArrayList<String>(); for (int i = 0; i < 1000; i++) { test.add("hi"); } writer.addRow(new ListStruct(test)); } writer.close(); assertEquals(5000, writer.getNumberOfRows()); assertEquals(430000000, writer.getRawDataSize()); Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs)); // stats from reader assertEquals(5000, reader.getNumberOfRows()); assertEquals(430000000, reader.getRawDataSize()); assertEquals(430000000, reader.getRawDataSizeOfColumns(Lists.newArrayList("list1"))); }
private void createOrcFile(int stripSize, int numberOfRows) throws IOException { ObjectInspector inspector; synchronized (TestOrcFileStripeMergeRecordReader.class) { inspector = ObjectInspectorFactory.getReflectionObjectInspector (StringIntIntIntRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } Writer writer = OrcFile.createWriter(tmpPath, OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(stripSize) .compress(CompressionKind.ZLIB) .bufferSize(5000) .rowIndexStride(1000)); Random rand = new Random(157); for (int i = 0; i < numberOfRows; i++) { writer.addRow(new StringIntIntIntRow( Integer.toBinaryString(i), rand.nextInt(), rand.nextInt(), rand.nextInt() )); } writer.close(); }
static WriterOptions createOrcWriterOptions(ObjectInspector sourceOi, Configuration conf, CacheWriter cacheWriter, int allocSize) throws IOException { return OrcFile.writerOptions(conf).stripeSize(Long.MAX_VALUE).blockSize(Long.MAX_VALUE) .rowIndexStride(Integer.MAX_VALUE) // For now, do not limit this - one RG per split .blockPadding(false).compress(CompressionKind.NONE).version(Version.CURRENT) .encodingStrategy(EncodingStrategy.SPEED).bloomFilterColumns(null).inspector(sourceOi) .physicalWriter(cacheWriter).memory(MEMORY_MANAGER).bufferSize(allocSize); }