@Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { if (writer == null) { // a row with no columns ObjectInspector inspector = ObjectInspectorFactory. getStandardStructObjectInspector(new ArrayList<String>(), new ArrayList<ObjectInspector>()); options.inspector(inspector); writer = OrcFile.createWriter(path, options); } writer.close(); } }
@Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { if (writer == null) { // a row with no columns ObjectInspector inspector = ObjectInspectorFactory. getStandardStructObjectInspector(new ArrayList<String>(), new ArrayList<ObjectInspector>()); options.inspector(inspector); writer = OrcFile.createWriter(path, options); } writer.close(); } }
@Test public void emptyFile() throws Exception { ObjectInspector inspector; synchronized (TestOrcFile.class) { inspector = ObjectInspectorFactory.getReflectionObjectInspector (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(1000) .compress(CompressionKind.NONE) .bufferSize(100)); writer.close(); Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs)); assertEquals(false, reader.rows().hasNext()); assertEquals(CompressionKind.NONE, reader.getCompression()); assertEquals(0, reader.getNumberOfRows()); assertEquals(0, reader.getCompressionSize()); assertEquals(false, reader.getMetadataKeys().iterator().hasNext()); assertEquals(3, reader.getContentLength()); assertEquals(false, reader.getStripes().iterator().hasNext()); }
opts.inspector(options.getInspector()) .callback(watcher); final Writer writer = OrcFile.createWriter(filename, opts);
Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf) .inspector(inspector) .compress(CompressionKind.NONE) .stripeSize(50000)
.inspector(inspector) .stripeSize(10000) .bufferSize(10000));
OrcFile.WriterOptions opts = OrcFile.writerOptions(conf).inspector(inspector).compress(CompressionKind.ZLIB); Writer writer = OrcFile.createWriter(new Path(testFilePath, "-0"), opts); writer.close(); LlapDaemonInfo.initialize("test", new Configuration()); LlapProxy.setDaemon(true); opts = OrcFile.writerOptions(conf).inspector(inspector).compress(CompressionKind.ZLIB); writer = OrcFile.createWriter(new Path(testFilePath, "-1"), opts); writer.close(); opts = OrcFile.writerOptions(conf).inspector(inspector).compress(CompressionKind.ZLIB); writer = OrcFile.createWriter(new Path(testFilePath, "-2"), opts); writer.close();
OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000).bufferSize(10000));
int rowIndexStride) throws IOException { return createWriter(path, writerOptions(conf) .inspector(inspector) .fileSystem(fs) .stripeSize(stripeSize)
@Test public void testOrcSerDeStatsList() throws Exception { ObjectInspector inspector; synchronized (TestOrcSerDeStats.class) { inspector = ObjectInspectorFactory.getReflectionObjectInspector (ListStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(10000) .bufferSize(10000)); for (int row = 0; row < 5000; row++) { List<String> test = new ArrayList<String>(); for (int i = 0; i < 1000; i++) { test.add("hi"); } writer.addRow(new ListStruct(test)); } writer.close(); assertEquals(5000, writer.getNumberOfRows()); assertEquals(430000000, writer.getRawDataSize()); Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs)); // stats from reader assertEquals(5000, reader.getNumberOfRows()); assertEquals(430000000, reader.getRawDataSize()); assertEquals(430000000, reader.getRawDataSizeOfColumns(Lists.newArrayList("list1"))); }
@Test public void testOrcSerDeStatsMap() throws Exception { ObjectInspector inspector; synchronized (TestOrcSerDeStats.class) { inspector = ObjectInspectorFactory.getReflectionObjectInspector (MapStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(10000) .bufferSize(10000)); for (int row = 0; row < 1000; row++) { Map<String, Double> test = new HashMap<String, Double>(); for (int i = 0; i < 10; i++) { test.put("hi" + i, 2.0); } writer.addRow(new MapStruct(test)); } writer.close(); // stats from writer assertEquals(1000, writer.getNumberOfRows()); assertEquals(950000, writer.getRawDataSize()); Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs)); // stats from reader assertEquals(1000, reader.getNumberOfRows()); assertEquals(950000, reader.getRawDataSize()); assertEquals(950000, reader.getRawDataSizeOfColumns(Lists.newArrayList("map1"))); }
@Test public void testBitPack64Large() throws Exception { ObjectInspector inspector; synchronized (TestOrcFile.class) { inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } int size = 1080832; long[] inp = new long[size]; Random rand = new Random(1234); for (int i = 0; i < size; i++) { inp[i] = rand.nextLong(); } List<Long> input = Lists.newArrayList(Longs.asList(inp)); Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector).compress(CompressionKind.ZLIB)); for (Long l : input) { writer.addRow(l); } writer.close(); Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs)); RecordReader rows = reader.rows(); int idx = 0; while (rows.hasNext()) { Object row = rows.next(null); Assert.assertEquals(input.get(idx++).longValue(), ((LongWritable) row).get()); } }
private void createOrcFile(int stripSize, int numberOfRows) throws IOException { ObjectInspector inspector; synchronized (TestOrcFileStripeMergeRecordReader.class) { inspector = ObjectInspectorFactory.getReflectionObjectInspector (StringIntIntIntRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } Writer writer = OrcFile.createWriter(tmpPath, OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(stripSize) .compress(CompressionKind.ZLIB) .bufferSize(5000) .rowIndexStride(1000)); Random rand = new Random(157); for (int i = 0; i < numberOfRows; i++) { writer.addRow(new StringIntIntIntRow( Integer.toBinaryString(i), rand.nextInt(), rand.nextInt(), rand.nextInt() )); } writer.close(); }
@Override public void write(NullWritable nullWritable, OrcSerdeRow row) throws IOException { if (writer == null) { options.inspector(row.getInspector()); writer = OrcFile.createWriter(path, options); } writer.addRow(row.getRow()); }
@Override public void write(Writable row) throws IOException { OrcSerdeRow serdeRow = (OrcSerdeRow) row; if (writer == null) { options.inspector(serdeRow.getInspector()); writer = OrcFile.createWriter(path, options); } writer.addRow(serdeRow.getRow()); }
@Override public void write(NullWritable key, OrcSerdeRow row) throws IOException, InterruptedException { if (writer == null) { options.inspector(row.getInspector()); writer = OrcFile.createWriter(path, options); } writer.addRow(row.getRow()); }
@Override public void write(Writable row) throws IOException { OrcSerdeRow serdeRow = (OrcSerdeRow) row; if (writer == null) { options.inspector(serdeRow.getInspector()); writer = OrcFile.createWriter(path, options); } writer.addRow(serdeRow.getRow()); }
static WriterOptions createOrcWriterOptions(ObjectInspector sourceOi, Configuration conf, CacheWriter cacheWriter, int allocSize) throws IOException { return OrcFile.writerOptions(conf).stripeSize(Long.MAX_VALUE).blockSize(Long.MAX_VALUE) .rowIndexStride(Integer.MAX_VALUE) // For now, do not limit this - one RG per split .blockPadding(false).compress(CompressionKind.NONE).version(Version.CURRENT) .encodingStrategy(EncodingStrategy.SPEED).bloomFilterColumns(null).inspector(sourceOi) .physicalWriter(cacheWriter).memory(MEMORY_MANAGER).bufferSize(allocSize); }
@Override public void write(NullWritable nullWritable, OrcSerdeRow row) throws IOException { if (writer == null) { options.inspector(row.getInspector()); writer = OrcFile.createWriter(path, options); } writer.addRow(row.getRow()); }
@Override public void write(NullWritable key, OrcSerdeRow row) throws IOException, InterruptedException { if (writer == null) { options.inspector(row.getInspector()); writer = OrcFile.createWriter(path, options); } writer.addRow(row.getRow()); }