public HoodieParquetWriter(String commitTime, Path file, HoodieParquetConfig parquetConfig, Schema schema) throws IOException { super(HoodieWrapperFileSystem.convertToHoodiePath(file, parquetConfig.getHadoopConf()), ParquetFileWriter.Mode.CREATE, parquetConfig.getWriteSupport(), parquetConfig.getCompressionCodecName(), parquetConfig.getBlockSize(), parquetConfig.getPageSize(), parquetConfig.getPageSize(), ParquetWriter.DEFAULT_IS_DICTIONARY_ENABLED, ParquetWriter.DEFAULT_IS_VALIDATING_ENABLED, ParquetWriter.DEFAULT_WRITER_VERSION, registerFileSystem(file, parquetConfig.getHadoopConf())); this.file = HoodieWrapperFileSystem.convertToHoodiePath(file, parquetConfig.getHadoopConf()); this.fs = (HoodieWrapperFileSystem) this.file .getFileSystem(registerFileSystem(file, parquetConfig.getHadoopConf())); // We cannot accurately measure the snappy compressed output file size. We are choosing a // conservative 10% // TODO - compute this compression ratio dynamically by looking at the bytes written to the // stream and the actual file size reported by HDFS this.maxFileSize = parquetConfig.getMaxFileSize() + Math .round(parquetConfig.getMaxFileSize() * parquetConfig.getCompressionRatio()); this.writeSupport = parquetConfig.getWriteSupport(); this.commitTime = commitTime; this.schema = schema; }
private static <T extends HoodieRecordPayload, R extends IndexedRecord> HoodieStorageWriter<R> newParquetStorageWriter(String commitTime, Path path, HoodieWriteConfig config, Schema schema, HoodieTable hoodieTable) throws IOException { BloomFilter filter = new BloomFilter(config.getBloomFilterNumEntries(), config.getBloomFilterFPP()); HoodieAvroWriteSupport writeSupport = new HoodieAvroWriteSupport( new AvroSchemaConverter().convert(schema), schema, filter); HoodieParquetConfig parquetConfig = new HoodieParquetConfig(writeSupport, CompressionCodecName.GZIP, config.getParquetBlockSize(), config.getParquetPageSize(), config.getParquetMaxFileSize(), hoodieTable.getHadoopConf(), config.getParquetCompressionRatio()); return new HoodieParquetWriter<>(commitTime, path, parquetConfig, schema); } }
private static <T extends HoodieRecordPayload, R extends IndexedRecord> HoodieStorageWriter<R> newParquetStorageWriter(String commitTime, Path path, HoodieWriteConfig config, Schema schema, HoodieTable hoodieTable) throws IOException { BloomFilter filter = new BloomFilter(config.getBloomFilterNumEntries(), config.getBloomFilterFPP()); HoodieAvroWriteSupport writeSupport = new HoodieAvroWriteSupport( new AvroSchemaConverter().convert(schema), schema, filter); HoodieParquetConfig parquetConfig = new HoodieParquetConfig(writeSupport, CompressionCodecName.GZIP, config.getParquetBlockSize(), config.getParquetPageSize(), config.getParquetMaxFileSize(), hoodieTable.getHadoopConf(), config.getParquetCompressionRatio()); return new HoodieParquetWriter<>(commitTime, path, parquetConfig, schema); } }
public HoodieParquetWriter(String commitTime, Path file, HoodieParquetConfig parquetConfig, Schema schema) throws IOException { super(HoodieWrapperFileSystem.convertToHoodiePath(file, parquetConfig.getHadoopConf()), ParquetFileWriter.Mode.CREATE, parquetConfig.getWriteSupport(), parquetConfig.getCompressionCodecName(), parquetConfig.getBlockSize(), parquetConfig.getPageSize(), parquetConfig.getPageSize(), ParquetWriter.DEFAULT_IS_DICTIONARY_ENABLED, ParquetWriter.DEFAULT_IS_VALIDATING_ENABLED, ParquetWriter.DEFAULT_WRITER_VERSION, registerFileSystem(file, parquetConfig.getHadoopConf())); this.file = HoodieWrapperFileSystem.convertToHoodiePath(file, parquetConfig.getHadoopConf()); this.fs = (HoodieWrapperFileSystem) this.file .getFileSystem(registerFileSystem(file, parquetConfig.getHadoopConf())); // We cannot accurately measure the snappy compressed output file size. We are choosing a // conservative 10% // TODO - compute this compression ratio dynamically by looking at the bytes written to the // stream and the actual file size reported by HDFS this.maxFileSize = parquetConfig.getMaxFileSize() + Math .round(parquetConfig.getMaxFileSize() * parquetConfig.getCompressionRatio()); this.writeSupport = parquetConfig.getWriteSupport(); this.commitTime = commitTime; this.schema = schema; }
filter); String commitTime = FSUtils.getCommitTime(filename); HoodieParquetConfig config = new HoodieParquetConfig(writeSupport, CompressionCodecName.GZIP, ParquetWriter.DEFAULT_BLOCK_SIZE, ParquetWriter.DEFAULT_PAGE_SIZE, 120 * 1024 * 1024, HoodieTestUtils.getDefaultHadoopConf(),