/** * Build a {@link ParquetWriter<Group>} for given file path with a block size. * @param blockSize * @param stagingFile * @return * @throws IOException */ public ParquetWriter<Group> getWriter(int blockSize, Path stagingFile) throws IOException { State state = this.destination.getProperties(); int pageSize = state.getPropAsInt(getProperty(WRITER_PARQUET_PAGE_SIZE), DEFAULT_PAGE_SIZE); int dictPageSize = state.getPropAsInt(getProperty(WRITER_PARQUET_DICTIONARY_PAGE_SIZE), DEFAULT_BLOCK_SIZE); boolean enableDictionary = state.getPropAsBoolean(getProperty(WRITER_PARQUET_DICTIONARY), DEFAULT_IS_DICTIONARY_ENABLED); boolean validate = state.getPropAsBoolean(getProperty(WRITER_PARQUET_VALIDATE), DEFAULT_IS_VALIDATING_ENABLED); String rootURI = state.getProp(WRITER_FILE_SYSTEM_URI, LOCAL_FS_URI); Path absoluteStagingFile = new Path(rootURI, stagingFile); CompressionCodecName codec = getCodecFromConfig(); GroupWriteSupport support = new GroupWriteSupport(); Configuration conf = new Configuration(); GroupWriteSupport.setSchema(this.schema, conf); ParquetProperties.WriterVersion writerVersion = getWriterVersion(); return new ParquetWriter<>(absoluteStagingFile, support, codec, blockSize, pageSize, dictPageSize, enableDictionary, validate, writerVersion, conf); }
/** * set the schema being written to the job conf * @param schema the schema of the data * @param configuration the job configuration */ public static void setSchema(Job job, MessageType schema) { GroupWriteSupport.setSchema(schema, ContextUtil.getConfiguration(job)); }
/** * Build a {@link ParquetWriter<Group>} for given file path with a block size. * @param blockSize * @param stagingFile * @return * @throws IOException */ public ParquetWriter<Group> getWriter(int blockSize, Path stagingFile) throws IOException { State state = this.destination.getProperties(); int pageSize = state.getPropAsInt(getProperty(WRITER_PARQUET_PAGE_SIZE), DEFAULT_PAGE_SIZE); int dictPageSize = state.getPropAsInt(getProperty(WRITER_PARQUET_DICTIONARY_PAGE_SIZE), DEFAULT_BLOCK_SIZE); boolean enableDictionary = state.getPropAsBoolean(getProperty(WRITER_PARQUET_DICTIONARY), DEFAULT_IS_DICTIONARY_ENABLED); boolean validate = state.getPropAsBoolean(getProperty(WRITER_PARQUET_VALIDATE), DEFAULT_IS_VALIDATING_ENABLED); String rootURI = state.getProp(WRITER_FILE_SYSTEM_URI, LOCAL_FS_URI); Path absoluteStagingFile = new Path(rootURI, stagingFile); CompressionCodecName codec = getCodecFromConfig(); GroupWriteSupport support = new GroupWriteSupport(); Configuration conf = new Configuration(); GroupWriteSupport.setSchema(this.schema, conf); ParquetProperties.WriterVersion writerVersion = getWriterVersion(); return new ParquetWriter<>(absoluteStagingFile, support, codec, blockSize, pageSize, dictPageSize, enableDictionary, validate, writerVersion, conf); }