protected Writer createOrcWriter(CacheWriter cacheWriter, Configuration conf, Path path, StructObjectInspector oi) throws IOException { // TODO: this is currently broken. We need to set memory manager to a bogus implementation // to avoid problems with memory manager actually tracking the usage. return OrcFile.createWriter(path, createOrcWriterOptions(oi, conf, cacheWriter, allocSize)); } }
private void initWriter() throws IOException { if (writer == null) { writer = OrcFile.createWriter(path, writerOptions); AcidUtils.OrcAcidVersion.setAcidVersionInDataFile(writer); AcidUtils.OrcAcidVersion.writeVersionFile(path.getParent(), fs); } }
@Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { if (writer == null) { // a row with no columns ObjectInspector inspector = ObjectInspectorFactory. getStandardStructObjectInspector(new ArrayList<String>(), new ArrayList<ObjectInspector>()); options.inspector(inspector); writer = OrcFile.createWriter(path, options); } writer.close(); } }
@Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { if (writer == null) { // a row with no columns ObjectInspector inspector = ObjectInspectorFactory. getStandardStructObjectInspector(new ArrayList<String>(), new ArrayList<ObjectInspector>()); options.inspector(inspector); writer = OrcFile.createWriter(path, options); } writer.close(); } }
@Override public void flush() throws IOException { // We only support flushes on files with multiple transactions, because // flushes create significant overhead in HDFS. Record updaters with a // single transaction should be closed rather than flushed. if (flushLengths == null) { throw new IllegalStateException("Attempting to flush a RecordUpdater on " + path + " with a single transaction."); } if (writer == null) { writer = OrcFile.createWriter(path, writerOptions); } long len = writer.writeIntermediateFooter(); flushLengths.writeLong(len); OrcInputFormat.SHIMS.hflush(flushLengths); }
@Override public void write(NullWritable key, OrcSerdeRow row) throws IOException, InterruptedException { if (writer == null) { options.inspector(row.getInspector()); writer = OrcFile.createWriter(path, options); } writer.addRow(row.getRow()); }
@Override public void write(NullWritable nullWritable, OrcSerdeRow row) throws IOException { if (writer == null) { options.inspector(row.getInspector()); writer = OrcFile.createWriter(path, options); } writer.addRow(row.getRow()); }
@Override public void write(Writable row) throws IOException { OrcSerdeRow serdeRow = (OrcSerdeRow) row; if (writer == null) { options.inspector(serdeRow.getInspector()); writer = OrcFile.createWriter(path, options); } writer.addRow(serdeRow.getRow()); }
@Override public void write(NullWritable key, OrcSerdeRow row) throws IOException, InterruptedException { if (writer == null) { options.inspector(row.getInspector()); writer = OrcFile.createWriter(path, options); } writer.addRow(row.getRow()); }
@Override public void write(NullWritable nullWritable, OrcSerdeRow row) throws IOException { if (writer == null) { options.inspector(row.getInspector()); writer = OrcFile.createWriter(path, options); } writer.addRow(row.getRow()); }
@Override public void write(Writable row) throws IOException { OrcSerdeRow serdeRow = (OrcSerdeRow) row; if (writer == null) { options.inspector(serdeRow.getInspector()); writer = OrcFile.createWriter(path, options); } writer.addRow(serdeRow.getRow()); }
private static void createFileWithOnlyUserMetadata(File file, Map<String, String> metadata) throws IOException { Configuration conf = new Configuration(); OrcFile.WriterOptions writerOptions = new OrcWriterOptions(conf) .memory(new NullMemoryManager(conf)) .inspector(createSettableStructObjectInspector("test", BIGINT)) .compress(SNAPPY); Writer writer = OrcFile.createWriter(new Path(file.toURI()), writerOptions); for (Map.Entry<String, String> entry : metadata.entrySet()) { writer.addUserMetadata(entry.getKey(), ByteBuffer.wrap(entry.getValue().getBytes(UTF_8))); } writer.close(); }
private void writeFile(ObjectInspector inspector, Path filePath) throws IOException { Writer writer = OrcFile.createWriter( fs, filePath, conf, inspector, 100000, CompressionKind.NONE, 10000, 10000); writeData(writer); writer.close(); }
int bufferSize, int rowIndexStride) throws IOException { return createWriter(path, writerOptions(conf) .inspector(inspector) .fileSystem(fs)
private void createOrcFile(int stripSize, int numberOfRows) throws IOException { ObjectInspector inspector; synchronized (TestOrcFileStripeMergeRecordReader.class) { inspector = ObjectInspectorFactory.getReflectionObjectInspector (StringIntIntIntRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } Writer writer = OrcFile.createWriter(tmpPath, OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(stripSize) .compress(CompressionKind.ZLIB) .bufferSize(5000) .rowIndexStride(1000)); Random rand = new Random(157); for (int i = 0; i < numberOfRows; i++) { writer.addRow(new StringIntIntIntRow( Integer.toBinaryString(i), rand.nextInt(), rand.nextInt(), rand.nextInt() )); } writer.close(); }
@Test public void emptyFile() throws Exception { ObjectInspector inspector; synchronized (TestOrcFile.class) { inspector = ObjectInspectorFactory.getReflectionObjectInspector (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(1000) .compress(CompressionKind.NONE) .bufferSize(100)); writer.close(); Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs)); assertEquals(false, reader.rows().hasNext()); assertEquals(CompressionKind.NONE, reader.getCompression()); assertEquals(0, reader.getNumberOfRows()); assertEquals(0, reader.getCompressionSize()); assertEquals(false, reader.getMetadataKeys().iterator().hasNext()); assertEquals(3, reader.getContentLength()); assertEquals(false, reader.getStripes().iterator().hasNext()); }
@Test public void testOrcSerDeStatsMap() throws Exception { ObjectInspector inspector; synchronized (TestOrcSerDeStats.class) { inspector = ObjectInspectorFactory.getReflectionObjectInspector (MapStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(10000) .bufferSize(10000)); for (int row = 0; row < 1000; row++) { Map<String, Double> test = new HashMap<String, Double>(); for (int i = 0; i < 10; i++) { test.put("hi" + i, 2.0); } writer.addRow(new MapStruct(test)); } writer.close(); // stats from writer assertEquals(1000, writer.getNumberOfRows()); assertEquals(950000, writer.getRawDataSize()); Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs)); // stats from reader assertEquals(1000, reader.getNumberOfRows()); assertEquals(950000, reader.getRawDataSize()); assertEquals(950000, reader.getRawDataSizeOfColumns(Lists.newArrayList("map1"))); }
@Test public void testOrcSerDeStatsList() throws Exception { ObjectInspector inspector; synchronized (TestOrcSerDeStats.class) { inspector = ObjectInspectorFactory.getReflectionObjectInspector (ListStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(10000) .bufferSize(10000)); for (int row = 0; row < 5000; row++) { List<String> test = new ArrayList<String>(); for (int i = 0; i < 1000; i++) { test.add("hi"); } writer.addRow(new ListStruct(test)); } writer.close(); assertEquals(5000, writer.getNumberOfRows()); assertEquals(430000000, writer.getRawDataSize()); Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs)); // stats from reader assertEquals(5000, reader.getNumberOfRows()); assertEquals(430000000, reader.getRawDataSize()); assertEquals(430000000, reader.getRawDataSizeOfColumns(Lists.newArrayList("list1"))); }
@Test public void testBitPack64Large() throws Exception { ObjectInspector inspector; synchronized (TestOrcFile.class) { inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } int size = 1080832; long[] inp = new long[size]; Random rand = new Random(1234); for (int i = 0; i < size; i++) { inp[i] = rand.nextLong(); } List<Long> input = Lists.newArrayList(Longs.asList(inp)); Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector).compress(CompressionKind.ZLIB)); for (Long l : input) { writer.addRow(l); } writer.close(); Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs)); RecordReader rows = reader.rows(); int idx = 0; while (rows.hasNext()) { Object row = rows.next(null); Assert.assertEquals(input.get(idx++).longValue(), ((LongWritable) row).get()); } }
Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000).bufferSize(10000));