@Override public void writeOneRow(Writable value) throws IOException { Object row = null; try { row = sourceSerDe.deserialize(value); } catch (SerDeException e) { throw new IOException(e); } orcWriter.addRow(row); }
@Override public void write(Writable w) throws IOException { OrcStruct orc = (OrcStruct) w; watcher.addKey( ((IntWritable) orc.getFieldValue(OrcRecordUpdater.OPERATION)).get(), ((LongWritable) orc.getFieldValue(OrcRecordUpdater.ORIGINAL_WRITEID)).get(), ((IntWritable) orc.getFieldValue(OrcRecordUpdater.BUCKET)).get(), ((LongWritable) orc.getFieldValue(OrcRecordUpdater.ROW_ID)).get()); writer.addRow(w); }
@Override public void write(Writable w) throws IOException { OrcStruct orc = (OrcStruct) w; watcher.addKey( ((IntWritable) orc.getFieldValue(OrcRecordUpdater.OPERATION)).get(), ((LongWritable) orc.getFieldValue(OrcRecordUpdater.ORIGINAL_TRANSACTION)).get(), ((IntWritable) orc.getFieldValue(OrcRecordUpdater.BUCKET)).get(), ((LongWritable) orc.getFieldValue(OrcRecordUpdater.ROW_ID)).get()); writer.addRow(w); }
@Override public void write(Writable row) throws IOException { OrcSerdeRow serdeRow = (OrcSerdeRow) row; if (writer == null) { options.inspector(serdeRow.getInspector()); writer = OrcFile.createWriter(path, options); } writer.addRow(serdeRow.getRow()); }
@Override public void write(NullWritable key, OrcSerdeRow row) throws IOException, InterruptedException { if (writer == null) { options.inspector(row.getInspector()); writer = OrcFile.createWriter(path, options); } writer.addRow(row.getRow()); }
@Override public void write(NullWritable nullWritable, OrcSerdeRow row) throws IOException { if (writer == null) { options.inspector(row.getInspector()); writer = OrcFile.createWriter(path, options); } writer.addRow(row.getRow()); }
@Override public void write(final Record record) throws IOException { if (recordFields != null) { for (int i = 0; i < numRecordFields; i++) { final RecordField field = recordFields.get(i); final DataType fieldType = field.getDataType(); final String fieldName = field.getFieldName(); Object o = record.getValue(field); try { workingRow[i] = NiFiOrcUtils.convertToORCObject(NiFiOrcUtils.getOrcField(fieldType, hiveFieldNames), o, hiveFieldNames); } catch (ArrayIndexOutOfBoundsException aioobe) { final String errorMsg = "Index out of bounds for column " + i + ", type " + fieldName + ", and object " + o.toString(); throw new IOException(errorMsg, aioobe); } } orcWriter.addRow(NiFiOrcUtils.createOrcStruct(orcSchema, workingRow)); } }
@Override public void write(NullWritable key, OrcSerdeRow row) throws IOException, InterruptedException { if (writer == null) { options.inspector(row.getInspector()); writer = OrcFile.createWriter(path, options); } writer.addRow(row.getRow()); }
@Override public void write(NullWritable nullWritable, OrcSerdeRow row) throws IOException { if (writer == null) { options.inspector(row.getInspector()); writer = OrcFile.createWriter(path, options); } writer.addRow(row.getRow()); }
@Override public void write(Writable row) throws IOException { OrcSerdeRow serdeRow = (OrcSerdeRow) row; if (writer == null) { options.inspector(serdeRow.getInspector()); writer = OrcFile.createWriter(path, options); } writer.addRow(serdeRow.getRow()); }
private void writeData(Writer writer) throws IOException { for (int i = 0; i < 25000; i++) { if (i == 0) { writer.addRow(new AllTypesRow(2L, "foo", 0.8, HiveDecimal.create("1.2"), new Timestamp(0))); } else if (i == 5000) { writer.addRow(new AllTypesRow(13L, "bar", 80.0, HiveDecimal.create("2.2"), new Timestamp( 5000))); } else if (i == 10000) { writer.addRow(new AllTypesRow(29L, "cat", 8.0, HiveDecimal.create("3.3"), new Timestamp( 10000))); } else if (i == 15000) { writer.addRow(new AllTypesRow(70L, "dog", 1.8, HiveDecimal.create("4.4"), new Timestamp( 15000))); } else if (i == 20000) { writer.addRow(new AllTypesRow(5L, "eat", 0.8, HiveDecimal.create("5.5"), new Timestamp( 20000))); } else { writer.addRow(new AllTypesRow(100L, "zebra", 8.0, HiveDecimal.create("0.0"), new Timestamp( 250000))); } } } }
private void addSimpleEvent(int operation, long currentTransaction, long rowId, Object row) throws IOException { this.operation.set(operation); this.currentTransaction.set(currentTransaction); // If this is an insert, originalTransaction should be set to this transaction. If not, // it will be reset by the following if anyway. long originalTransaction = currentTransaction; if (operation == DELETE_OPERATION || operation == UPDATE_OPERATION) { Object rowIdValue = rowInspector.getStructFieldData(row, recIdField); originalTransaction = origTxnInspector.get( recIdInspector.getStructFieldData(rowIdValue, originalTxnField)); rowId = rowIdInspector.get(recIdInspector.getStructFieldData(rowIdValue, rowIdField)); } else if(operation == INSERT_OPERATION) { rowId += rowIdOffset; } this.rowId.set(rowId); this.originalTransaction.set(originalTransaction); item.setFieldValue(OrcRecordUpdater.OPERATION, new IntWritable(operation)); item.setFieldValue(OrcRecordUpdater.ROW, (operation == DELETE_OPERATION ? null : row)); indexBuilder.addKey(operation, originalTransaction, bucket.get(), rowId); if (writer == null) { writer = OrcFile.createWriter(path, writerOptions); } writer.addRow(item); }
private void createOrcFile(int stripSize, int numberOfRows) throws IOException { ObjectInspector inspector; synchronized (TestOrcFileStripeMergeRecordReader.class) { inspector = ObjectInspectorFactory.getReflectionObjectInspector (StringIntIntIntRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } Writer writer = OrcFile.createWriter(tmpPath, OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(stripSize) .compress(CompressionKind.ZLIB) .bufferSize(5000) .rowIndexStride(1000)); Random rand = new Random(157); for (int i = 0; i < numberOfRows; i++) { writer.addRow(new StringIntIntIntRow( Integer.toBinaryString(i), rand.nextInt(), rand.nextInt(), rand.nextInt() )); } writer.close(); }
for (int i = 0; i < 21000; ++i) { if ((i % 7) != 0) { writer.addRow(new MyRecord(((i % 3) == 0), (byte)(i % 5), i, (long) 200, (short) (300 + i), (double) (400 + i), words[r1.nextInt(words.length)], Timestamp.valueOf(LocalDateTime.now().toString()), Date.valueOf(dates[i % 3]), HiveDecimal.create(decimalStrings[i % decimalStrings.length]))); } else { writer.addRow(new MyRecord(null, null, i, (long) 200, null, null, null, null, null, null));
/** * The INSERT event always uses {@link #bucket} that this {@link RecordUpdater} was created with * thus even for unbucketed tables, the N in bucket_N file name matches writerId/bucketId even for * late split */ private void addSimpleEvent(int operation, long currentWriteId, long rowId, Object row) throws IOException { this.operation.set(operation); this.currentWriteId.set(currentWriteId); Integer currentBucket = null; // If this is an insert, originalWriteId should be set to this transaction. If not, // it will be reset by the following if anyway. long originalWriteId = currentWriteId; if (operation == DELETE_OPERATION || operation == UPDATE_OPERATION) { Object rowIdValue = rowInspector.getStructFieldData(row, recIdField); originalWriteId = origWriteIdInspector.get( recIdInspector.getStructFieldData(rowIdValue, originalWriteIdField)); rowId = rowIdInspector.get(recIdInspector.getStructFieldData(rowIdValue, rowIdField)); currentBucket = setBucket(bucketInspector.get( recIdInspector.getStructFieldData(rowIdValue, bucketField)), operation); } this.rowId.set(rowId); this.originalWriteId.set(originalWriteId); item.setFieldValue(OrcRecordUpdater.OPERATION, new IntWritable(operation)); item.setFieldValue(OrcRecordUpdater.ROW, (operation == DELETE_OPERATION ? null : row)); indexBuilder.addKey(operation, originalWriteId, bucket.get(), rowId); initWriter(); writer.addRow(item); restoreBucket(currentBucket, operation); }
@Test public void testOrcSerDeStatsList() throws Exception { ObjectInspector inspector; synchronized (TestOrcSerDeStats.class) { inspector = ObjectInspectorFactory.getReflectionObjectInspector (ListStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(10000) .bufferSize(10000)); for (int row = 0; row < 5000; row++) { List<String> test = new ArrayList<String>(); for (int i = 0; i < 1000; i++) { test.add("hi"); } writer.addRow(new ListStruct(test)); } writer.close(); assertEquals(5000, writer.getNumberOfRows()); assertEquals(430000000, writer.getRawDataSize()); Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs)); // stats from reader assertEquals(5000, reader.getNumberOfRows()); assertEquals(430000000, reader.getRawDataSize()); assertEquals(430000000, reader.getRawDataSizeOfColumns(Lists.newArrayList("list1"))); }
@Test public void testOrcSerDeStatsMap() throws Exception { ObjectInspector inspector; synchronized (TestOrcSerDeStats.class) { inspector = ObjectInspectorFactory.getReflectionObjectInspector (MapStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(10000) .bufferSize(10000)); for (int row = 0; row < 1000; row++) { Map<String, Double> test = new HashMap<String, Double>(); for (int i = 0; i < 10; i++) { test.put("hi" + i, 2.0); } writer.addRow(new MapStruct(test)); } writer.close(); // stats from writer assertEquals(1000, writer.getNumberOfRows()); assertEquals(950000, writer.getRawDataSize()); Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs)); // stats from reader assertEquals(1000, reader.getNumberOfRows()); assertEquals(950000, reader.getRawDataSize()); assertEquals(950000, reader.getRawDataSizeOfColumns(Lists.newArrayList("map1"))); }
@Test public void testBitPack64Large() throws Exception { ObjectInspector inspector; synchronized (TestOrcFile.class) { inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } int size = 1080832; long[] inp = new long[size]; Random rand = new Random(1234); for (int i = 0; i < size; i++) { inp[i] = rand.nextLong(); } List<Long> input = Lists.newArrayList(Longs.asList(inp)); Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector).compress(CompressionKind.ZLIB)); for (Long l : input) { writer.addRow(l); } writer.close(); Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs)); RecordReader rows = reader.rows(); int idx = 0; while (rows.hasNext()) { Object row = rows.next(null); Assert.assertEquals(input.get(idx++).longValue(), ((LongWritable) row).get()); } }