@Override public void write(Group record) throws IOException { this.writer.write(record); this.count.incrementAndGet(); }
@Override public void write(Writable value) throws IOException { recordWriter.write(value); length = fileWriter.getPos(); }
@Override public void close() throws IOException { try { this.writer.close(); } finally { super.close(); } } }
public FootersCacheValue(FileStatusWrapper status, Footer footer) { this.modificationTime = status.getModificationTime(); this.footer = new Footer(footer.getFile(), footer.getParquetMetadata()); }
/** * Build a {@link ParquetWriter<Group>} for given file path with a block size. * @param blockSize * @param stagingFile * @return * @throws IOException */ public ParquetWriter<Group> getWriter(int blockSize, Path stagingFile) throws IOException { State state = this.destination.getProperties(); int pageSize = state.getPropAsInt(getProperty(WRITER_PARQUET_PAGE_SIZE), DEFAULT_PAGE_SIZE); int dictPageSize = state.getPropAsInt(getProperty(WRITER_PARQUET_DICTIONARY_PAGE_SIZE), DEFAULT_BLOCK_SIZE); boolean enableDictionary = state.getPropAsBoolean(getProperty(WRITER_PARQUET_DICTIONARY), DEFAULT_IS_DICTIONARY_ENABLED); boolean validate = state.getPropAsBoolean(getProperty(WRITER_PARQUET_VALIDATE), DEFAULT_IS_VALIDATING_ENABLED); String rootURI = state.getProp(WRITER_FILE_SYSTEM_URI, LOCAL_FS_URI); Path absoluteStagingFile = new Path(rootURI, stagingFile); CompressionCodecName codec = getCodecFromConfig(); GroupWriteSupport support = new GroupWriteSupport(); Configuration conf = new Configuration(); GroupWriteSupport.setSchema(this.schema, conf); ParquetProperties.WriterVersion writerVersion = getWriterVersion(); return new ParquetWriter<>(absoluteStagingFile, support, codec, blockSize, pageSize, dictPageSize, enableDictionary, validate, writerVersion, conf); }
private List<Group> readParquetFiles(File outputFile) throws IOException { ParquetReader<Group> reader = null; List<Group> records = new ArrayList<>(); try { reader = new ParquetReader<>(new Path(outputFile.toString()), new SimpleReadSupport()); for (Group value = reader.read(); value != null; value = reader.read()) { records.add(value); } } finally { if (reader != null) { try { reader.close(); } catch (Exception ex) { System.out.println(ex.getMessage()); } } } return records; }
public void close() throws IOException { if (reader != null) { reader.close(); } }
public TestMapredParquetOutputFormat(Optional<MessageType> schema, boolean singleLevelArray) { super(new ParquetOutputFormat<>(new TestDataWritableWriteSupport(singleLevelArray))); this.schema = requireNonNull(schema, "schema is null"); }
@Override public void readFields(DataInput in) throws IOException { realSplit = new ParquetInputSplit(); realSplit.readFields(in); }
/** * Read the footers of all the files under that path (recursively) * using summary files if possible * @param configuration the configuration to access the FS * @param fileStatus the root dir * @return all the footers * @throws IOException */ public static List<Footer> readFooters(Configuration configuration, FileStatus pathStatus, boolean skipRowGroups) throws IOException { List<FileStatus> files = listFiles(configuration, pathStatus); return readAllFootersInParallelUsingSummaryFiles(configuration, files, skipRowGroups); }
/** * {@inheritDoc} */ @Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { internalWriter.close(); if (memoryManager != null) { memoryManager.removeWriter(internalWriter); } }
/** * @param jobContext the current job context * @return the merged metadata from the footers * @throws IOException */ public GlobalMetaData getGlobalMetaData(JobContext jobContext) throws IOException { return ParquetFileWriter.getGlobalMetaData(getFooters(jobContext)); }
@Override public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException { if (committer == null) { Path output = getOutputPath(context); committer = new ParquetOutputCommitter(output, context); } return committer; }
private void setPredicatePushdown(JobConf jobConf) { if (this.config.filterPredicate != null) { ParquetInputFormat.setFilterPredicate(jobConf, this.config.filterPredicate); } } @Override
public BytesDecompressor getDecompressor(CompressionCodecName codecName) { BytesDecompressor decomp = decompressors.get(codecName); if (decomp == null) { CompressionCodec codec = getCodec(codecName); decomp = new BytesDecompressor(codec); decompressors.put(codecName, decomp); } return decomp; }
@Override public void write(DataOutput out) throws IOException { realSplit.write(out); } }
/** * {@inheritDoc} */ @Override public float getProgress() throws IOException, InterruptedException { return internalReader.getProgress(); }
@Override public void close() throws IOException { if (reader != null) { reader.close(); } }
/** * Will merge the metadata of all the footers together * @param footers the list files footers to merge * @return the global meta data for all the footers */ static GlobalMetaData getGlobalMetaData(List<Footer> footers) { return getGlobalMetaData(footers, true); }
@Override public void close() throws IOException { if (currentWriter != null) { currentWriter.close(); } }