@Override public void finish() throws IOException { parquetWriter.close(); } }
@Override public void close() throws IOException { writer.close(); } }
@Override public void close() throws IOException { parquetWriter.close(); }
public Path writeDirect(String name, MessageType type, DirectWriter writer) throws IOException { File temp = tempDir.newFile(name + ".parquet"); temp.deleteOnExit(); temp.delete(); Path path = new Path(temp.getPath()); ParquetWriter<Void> parquetWriter = new ParquetWriter<Void>(path, new DirectWriteSupport(type, writer, new HashMap<String, String>())); parquetWriter.write(null); parquetWriter.close(); return path; }
writer.close();
protected static void writeRepeateListData(ParquetWriter<Group> writer, int elementNum, boolean isNull) throws IOException { SimpleGroupFactory f = new SimpleGroupFactory(schema); int listMaxSize = 4; for (int i = 0; i < elementNum; i++) { Group group = f.newGroup(); if (!isNull) { for (int j = 0; j < listMaxSize; j++) { group.append("list_int32_field_for_repeat_test", j); } } writer.write(group); } writer.close(); }
protected static void writeRepeateMapData( ParquetWriter<Group> writer, int elementNum, boolean isNull) throws IOException { SimpleGroupFactory f = new SimpleGroupFactory(schema); int mapMaxSize = 4; for (int i = 0; i < elementNum; i++) { Group group = f.newGroup(); if (!isNull) { for (int j = 0; j < mapMaxSize; j++) { group.addGroup("map_int32_for_repeat_test").append("key", j).append("value", j); } } writer.write(group); } writer.close(); }
protected static void writeListData(ParquetWriter<Group> writer, boolean isDictionaryEncoding, int elementNum) throws IOException { SimpleGroupFactory f = new SimpleGroupFactory(schema); int listMaxSize = 4; int listElementIndex = 0; for (int i = 0; i < elementNum; i++) { boolean isNull = isNull(i); Group group = f.newGroup(); int listSize = i % listMaxSize + 1; if (!isNull) { for (int j = 0; j < listSize; j++) { group.append("list_int32_field", getIntValue(isDictionaryEncoding, listElementIndex)); group.append("list_int64_field", getLongValue(isDictionaryEncoding, listElementIndex)); group.append("list_double_field", getDoubleValue(isDictionaryEncoding, listElementIndex)); group.append("list_float_field", getFloatValue(isDictionaryEncoding, listElementIndex)); group.append("list_boolean_field", getBooleanValue(listElementIndex)); group.append("list_binary_field", getBinaryValue(isDictionaryEncoding, listElementIndex)); HiveDecimal hd = getDecimal(isDictionaryEncoding, listElementIndex).setScale(2); HiveDecimalWritable hdw = new HiveDecimalWritable(hd); group.append("list_decimal_field", Binary.fromConstantByteArray(hdw.getInternalStorage())); listElementIndex++; } } for (int j = 0; j < listMaxSize; j++) { group.append("list_binary_field_for_repeat_test", getBinaryValue(isDictionaryEncoding, i)); } writer.write(group); } writer.close(); }
@Override public void close() throws IOException { if (writer != null) { writer.close(); this.writer = null; } } }
@Override public void close() throws IOException { writer.close(); } };
writer.close();
@Override public void close() throws IOException { parquetWriter.close(); }
@Override public void close() throws IOException { writer.close(); } };
@Override public void complete() { try { this.writer.close(); } catch (IOException e) { throw new RuntimeException("Unable to close writers", e); } }
writer.close();
public void writeElements(final Iterator<? extends Element> elements) throws OperationException { try { // Write elements _writeElements(elements); } catch (final IOException | OperationException e) { throw new OperationException("Exception writing elements to temporary directory: " + tempFilesDir, e); } finally { // Close the writers for (final Map<Integer, ParquetWriter<Element>> splitToWriter : groupSplitToWriter.values()) { for (final ParquetWriter<Element> writer : splitToWriter.values()) { try { writer.close(); } catch (final IOException ignored) { // ignored } } } } }
private void writeParquetFile(String filePath, List<String> rowKeys) throws Exception { // Write out a parquet file Schema schema = HoodieAvroUtils.getRecordKeySchema(); BloomFilter filter = new BloomFilter(1000, 0.0001); HoodieAvroWriteSupport writeSupport = new HoodieAvroWriteSupport(new AvroSchemaConverter().convert(schema), schema, filter); ParquetWriter writer = new ParquetWriter(new Path(filePath), writeSupport, CompressionCodecName.GZIP, 120 * 1024 * 1024, ParquetWriter.DEFAULT_PAGE_SIZE); for (String rowKey : rowKeys) { GenericRecord rec = new GenericData.Record(schema); rec.put(HoodieRecord.RECORD_KEY_METADATA_FIELD, rowKey); writer.write(rec); filter.add(rowKey); } writer.close(); } }
@SuppressWarnings({"unchecked", "deprecation"}) private static void generateParquetData(Path filePath, boolean isParquetSchemaSimple) throws IOException, URISyntaxException, InterruptedException { Schema schema = (isParquetSchemaSimple ? SchemaTestUtil.getSimpleSchema() : SchemaTestUtil.getEvolvedSchema()); org.apache.parquet.schema.MessageType parquetSchema = new AvroSchemaConverter().convert(schema); BloomFilter filter = new BloomFilter(1000, 0.0001); HoodieAvroWriteSupport writeSupport = new HoodieAvroWriteSupport(parquetSchema, schema, filter); ParquetWriter writer = new ParquetWriter(filePath, writeSupport, CompressionCodecName.GZIP, 120 * 1024 * 1024, ParquetWriter.DEFAULT_PAGE_SIZE, ParquetWriter.DEFAULT_PAGE_SIZE, ParquetWriter.DEFAULT_IS_DICTIONARY_ENABLED, ParquetWriter.DEFAULT_IS_VALIDATING_ENABLED, ParquetWriter.DEFAULT_WRITER_VERSION, fileSystem.getConf()); List<IndexedRecord> testRecords = (isParquetSchemaSimple ? SchemaTestUtil .generateTestRecords(0, 100) : SchemaTestUtil.generateEvolvedTestRecords(100, 100)); testRecords.forEach(s -> { try { writer.write(s); } catch (IOException e) { fail("IOException while writing test records as parquet" + e.toString()); } }); writer.close(); }
private void writeParquetFile(Path data,Configuration conf) throws IOException { MessageType schema = parseMessageType( "message test { " + "required int32 key; " + "required int32 column1_i; " + "required double column2_d; " + "required binary column3_s; " + "required boolean column4_b; " + "} "); GroupWriteSupport.setSchema(schema, conf); SimpleGroupFactory f = new SimpleGroupFactory(schema); ParquetWriter<Group> writer = new ParquetWriter<Group>(data, new GroupWriteSupport(), UNCOMPRESSED, 1024, 1024, 512, true, false, ParquetProperties.WriterVersion.PARQUET_1_0, conf); writer.write(f.newGroup().append("key", 1).append("column1_i", 3).append("column2_d", 2.3) .append("column3_s", "some string").append("column4_b", true)); writer.write(f.newGroup().append("key", 2).append("column1_i", 5).append("column2_d", 4.5) .append("column3_s", "some more").append("column4_b", false)); writer.write(f.newGroup().append("key", 3).append("column1_i", 7).append("column2_d", 5.6) .append("column3_s", "some more and more").append("column4_b", true)); writer.write(f.newGroup().append("key", 4).append("column1_i", 9).append("column2_d",10.9) .append("column3_s", "some more and alst").append("column4_b", false)); writer.close(); } }
private void writeParquetFile(Path data,Configuration conf) throws IOException { MessageType schema = parseMessageType( "message test { " + "required int32 key; " + "required int32 column1_i_s; " + "required binary column2_d; " + "required binary column3_s; " + "required boolean column4_b; " + "} "); GroupWriteSupport.setSchema(schema, conf); SimpleGroupFactory f = new SimpleGroupFactory(schema); ParquetWriter<Group> writer = new ParquetWriter<Group>(data, new GroupWriteSupport(), UNCOMPRESSED, 1024, 1024, 512, true, false, ParquetProperties.WriterVersion.PARQUET_1_0, conf); writer.write(f.newGroup().append("key", 1).append("column1_i_s", 292).append("column2_d", "no type") .append("column3_s", "some string").append("column4_b", true)); writer.write(f.newGroup().append("key", 2).append("column1_i_s", 23).append("column2_d", "no type") .append("column3_s", "some more").append("column4_b", false)); writer.write(f.newGroup().append("key", 3).append("column1_i_s", 32).append("column2_d", "no type") .append("column3_s", "some more and more").append("column4_b", true)); writer.write(f.newGroup().append("key", 4).append("column1_i_s", 22).append("column2_d", "no type") .append("column3_s", "some more and alst").append("column4_b", false)); writer.close(); } }