@Override public void addElement(T datum) throws IOException { parquetWriter.write(datum); }
@Override public void write(final Record record) throws IOException { final GenericRecord genericRecord = AvroTypeUtil.createAvroRecord(record, avroSchema); parquetWriter.write(genericRecord); }
@Override public void write(KeyValue keyValue) throws IOException { GenericRecord record = schemaRegistryClient.decodeMessage(topic, keyValue.getValue()); LOG.trace("Writing record {}", record); if (record != null){ writer.write(record); } }
public Path writeDirect(String name, MessageType type, DirectWriter writer) throws IOException { File temp = tempDir.newFile(name + ".parquet"); temp.deleteOnExit(); temp.delete(); Path path = new Path(temp.getPath()); ParquetWriter<Void> parquetWriter = new ParquetWriter<Void>(path, new DirectWriteSupport(type, writer, new HashMap<String, String>())); parquetWriter.write(null); parquetWriter.close(); return path; }
while (dataFileReader.hasNext()) { record = dataFileReader.next(); writer.write(record); recordCount++;
protected static void writeRepeateListData(ParquetWriter<Group> writer, int elementNum, boolean isNull) throws IOException { SimpleGroupFactory f = new SimpleGroupFactory(schema); int listMaxSize = 4; for (int i = 0; i < elementNum; i++) { Group group = f.newGroup(); if (!isNull) { for (int j = 0; j < listMaxSize; j++) { group.append("list_int32_field_for_repeat_test", j); } } writer.write(group); } writer.close(); }
protected static void writeRepeateMapData( ParquetWriter<Group> writer, int elementNum, boolean isNull) throws IOException { SimpleGroupFactory f = new SimpleGroupFactory(schema); int mapMaxSize = 4; for (int i = 0; i < elementNum; i++) { Group group = f.newGroup(); if (!isNull) { for (int j = 0; j < mapMaxSize; j++) { group.addGroup("map_int32_for_repeat_test").append("key", j).append("value", j); } } writer.write(group); } writer.close(); }
protected static void writeListData(ParquetWriter<Group> writer, boolean isDictionaryEncoding, int elementNum) throws IOException { SimpleGroupFactory f = new SimpleGroupFactory(schema); int listMaxSize = 4; int listElementIndex = 0; for (int i = 0; i < elementNum; i++) { boolean isNull = isNull(i); Group group = f.newGroup(); int listSize = i % listMaxSize + 1; if (!isNull) { for (int j = 0; j < listSize; j++) { group.append("list_int32_field", getIntValue(isDictionaryEncoding, listElementIndex)); group.append("list_int64_field", getLongValue(isDictionaryEncoding, listElementIndex)); group.append("list_double_field", getDoubleValue(isDictionaryEncoding, listElementIndex)); group.append("list_float_field", getFloatValue(isDictionaryEncoding, listElementIndex)); group.append("list_boolean_field", getBooleanValue(listElementIndex)); group.append("list_binary_field", getBinaryValue(isDictionaryEncoding, listElementIndex)); HiveDecimal hd = getDecimal(isDictionaryEncoding, listElementIndex).setScale(2); HiveDecimalWritable hdw = new HiveDecimalWritable(hd); group.append("list_decimal_field", Binary.fromConstantByteArray(hdw.getInternalStorage())); listElementIndex++; } } for (int j = 0; j < listMaxSize; j++) { group.append("list_binary_field_for_repeat_test", getBinaryValue(isDictionaryEncoding, i)); } writer.write(group); } writer.close(); }
writer.write(group);
@Override public void accept(GenericRecord t) { try { writer.write(t); } catch (IOException e) { throw new UncheckedIOException(e); } }
writer.write(group);
@Override public void writeAvro(String key, IndexedRecord object) throws IOException { super.write(object); writeSupport.add(key); } }
@Override public void write(final Record record) throws IOException { final GenericRecord genericRecord = AvroTypeUtil.createAvroRecord(record, avroSchema); parquetWriter.write(genericRecord); }
@Override public void add(D datum) { try { numRecords += 1L; writer.write(datum); } catch (IOException e) { throw new RuntimeIOException(e, "Failed to write record %s", datum); } }
@Override public void writeAvro(String key, IndexedRecord object) throws IOException { super.write(object); writeSupport.add(key); } }
@Override public void process(EntityContainer entityContainer) { try { if (this.entityType == entityContainer.getEntity().getType()) { final T entity = (T) entityContainer.getEntity(); if (filters.stream().noneMatch(filter -> filter.test(entity))) { writer.write(entity); } } } catch (IOException e) { throw new RuntimeException("Unable to write entity", e); } }
@Override public void write(SinkRecord record) throws IOException { Object value = avroData.fromConnectData(record.valueSchema(), record.value()); writer.write((GenericRecord) value); }
@Override public void writeAvroWithMetadata(R avroRecord, HoodieRecord record) throws IOException { String seqId = HoodieRecord.generateSequenceId(commitTime, TaskContext.getPartitionId(), recordIndex.getAndIncrement()); HoodieAvroUtils.addHoodieKeyToRecord((GenericRecord) avroRecord, record.getRecordKey(), record.getPartitionPath(), file.getName()); HoodieAvroUtils.addCommitMetadataToRecord((GenericRecord) avroRecord, commitTime, seqId); super.write(avroRecord); writeSupport.add(record.getRecordKey()); }
@Override public void writeAvroWithMetadata(R avroRecord, HoodieRecord record) throws IOException { String seqId = HoodieRecord.generateSequenceId(commitTime, TaskContext.getPartitionId(), recordIndex.getAndIncrement()); HoodieAvroUtils.addHoodieKeyToRecord((GenericRecord) avroRecord, record.getRecordKey(), record.getPartitionPath(), file.getName()); HoodieAvroUtils.addCommitMetadataToRecord((GenericRecord) avroRecord, commitTime, seqId); super.write(avroRecord); writeSupport.add(record.getRecordKey()); }
private void writeParquetFile(String filePath, List<String> rowKeys) throws Exception { // Write out a parquet file Schema schema = HoodieAvroUtils.getRecordKeySchema(); BloomFilter filter = new BloomFilter(1000, 0.0001); HoodieAvroWriteSupport writeSupport = new HoodieAvroWriteSupport(new AvroSchemaConverter().convert(schema), schema, filter); ParquetWriter writer = new ParquetWriter(new Path(filePath), writeSupport, CompressionCodecName.GZIP, 120 * 1024 * 1024, ParquetWriter.DEFAULT_PAGE_SIZE); for (String rowKey : rowKeys) { GenericRecord rec = new GenericData.Record(schema); rec.put(HoodieRecord.RECORD_KEY_METADATA_FIELD, rowKey); writer.write(rec); filter.add(rowKey); } writer.close(); } }