@Override public MetadataWriter createMetadataWriter() { return new DwrfMetadataWriter(); } };
private static DwrfProto.ColumnEncoding toColumnEncoding(ColumnEncoding columnEncodings) { checkArgument( !columnEncodings.getAdditionalSequenceEncodings().isPresent(), "DWRF writer doesn't support writing columns with non-zero sequence IDs: " + columnEncodings); return DwrfProto.ColumnEncoding.newBuilder() .setKind(toColumnEncoding(columnEncodings.getColumnEncodingKind())) .setDictionarySize(columnEncodings.getDictionarySize()) .build(); }
private static RowIndexEntry toRowGroupIndex(RowGroupIndex rowGroupIndex) { return RowIndexEntry.newBuilder() .addAllPositions(rowGroupIndex.getPositions().stream() .map(Integer::longValue) .collect(toImmutableList())) .setStatistics(toColumnStatistics(rowGroupIndex.getColumnStatistics())) .build(); }
@Override public int writePostscript(SliceOutput output, int footerLength, int metadataLength, CompressionKind compression, int compressionBlockSize) throws IOException { DwrfProto.PostScript postScriptProtobuf = DwrfProto.PostScript.newBuilder() .setFooterLength(footerLength) .setWriterVersion(DWRF_WRITER_VERSION) .setCompression(toCompression(compression)) .setCompressionBlockSize(compressionBlockSize) .build(); return writeProtobufObject(output, postScriptProtobuf); }
@Override public int writeRowIndexes(SliceOutput output, List<RowGroupIndex> rowGroupIndexes) throws IOException { DwrfProto.RowIndex rowIndexProtobuf = DwrfProto.RowIndex.newBuilder() .addAllEntry(rowGroupIndexes.stream() .map(DwrfMetadataWriter::toRowGroupIndex) .collect(toImmutableList())) .build(); return writeProtobufObject(output, rowIndexProtobuf); }
private static DwrfProto.Stream toStream(Stream stream) { return DwrfProto.Stream.newBuilder() .setColumn(stream.getColumn()) .setKind(toStreamKind(stream.getStreamKind())) .setLength(stream.getLength()) .setUseVInts(stream.isUseVInts()) .build(); }
private static Type toType(OrcType type) { Builder builder = Type.newBuilder() .setKind(toTypeKind(type.getOrcTypeKind())) .addAllSubtypes(type.getFieldTypeIndexes()) .addAllFieldNames(type.getFieldNames()); return builder.build(); }
@Override public int writePostscript(SliceOutput output, int footerLength, int metadataLength, CompressionKind compression, int compressionBlockSize) throws IOException { DwrfProto.PostScript postScriptProtobuf = DwrfProto.PostScript.newBuilder() .setFooterLength(footerLength) .setWriterVersion(DWRF_WRITER_VERSION) .setCompression(toCompression(compression)) .setCompressionBlockSize(compressionBlockSize) .build(); return writeProtobufObject(output, postScriptProtobuf); }
@Override public int writeStripeFooter(SliceOutput output, StripeFooter footer) throws IOException { DwrfProto.StripeFooter footerProtobuf = DwrfProto.StripeFooter.newBuilder() .addAllStreams(footer.getStreams().stream() .map(DwrfMetadataWriter::toStream) .collect(toImmutableList())) .addAllColumns(footer.getColumnEncodings().stream() .map(DwrfMetadataWriter::toColumnEncoding) .collect(toImmutableList())) .build(); return writeProtobufObject(output, footerProtobuf); }
private static DwrfProto.Stream toStream(Stream stream) { return DwrfProto.Stream.newBuilder() .setColumn(stream.getColumn()) .setKind(toStreamKind(stream.getStreamKind())) .setLength(stream.getLength()) .setUseVInts(stream.isUseVInts()) .build(); }
private static Type toType(OrcType type) { Builder builder = Type.newBuilder() .setKind(toTypeKind(type.getOrcTypeKind())) .addAllSubtypes(type.getFieldTypeIndexes()) .addAllFieldNames(type.getFieldNames()); return builder.build(); }
@Override public int writeFooter(SliceOutput output, Footer footer) throws IOException { DwrfProto.Footer footerProtobuf = DwrfProto.Footer.newBuilder() .setNumberOfRows(footer.getNumberOfRows()) .setRowIndexStride(footer.getRowsInRowGroup()) .addAllStripes(footer.getStripes().stream() .map(DwrfMetadataWriter::toStripeInformation) .collect(toImmutableList())) .addAllTypes(footer.getTypes().stream() .map(DwrfMetadataWriter::toType) .collect(toImmutableList())) .addAllStatistics(footer.getFileStats().stream() .map(DwrfMetadataWriter::toColumnStatistics) .collect(toImmutableList())) .addAllMetadata(footer.getUserMetadata().entrySet().stream() .map(DwrfMetadataWriter::toUserMetadata) .collect(toImmutableList())) .addAllMetadata(STATIC_METADATA.entrySet().stream() .map(DwrfMetadataWriter::toUserMetadata) .collect(toImmutableList())) .build(); return writeProtobufObject(output, footerProtobuf); }
@Override public MetadataWriter createMetadataWriter() { return new DwrfMetadataWriter(); } };
private static RowIndexEntry toRowGroupIndex(RowGroupIndex rowGroupIndex) { return RowIndexEntry.newBuilder() .addAllPositions(rowGroupIndex.getPositions().stream() .map(Integer::longValue) .collect(toImmutableList())) .setStatistics(toColumnStatistics(rowGroupIndex.getColumnStatistics())) .build(); }
private static DwrfProto.ColumnEncoding toColumnEncoding(ColumnEncoding columnEncodings) { checkArgument( !columnEncodings.getAdditionalSequenceEncodings().isPresent(), "DWRF writer doesn't support writing columns with non-zero sequence IDs: " + columnEncodings); return DwrfProto.ColumnEncoding.newBuilder() .setKind(toColumnEncoding(columnEncodings.getColumnEncodingKind())) .setDictionarySize(columnEncodings.getDictionarySize()) .build(); }
@Override public int writeRowIndexes(SliceOutput output, List<RowGroupIndex> rowGroupIndexes) throws IOException { DwrfProto.RowIndex rowIndexProtobuf = DwrfProto.RowIndex.newBuilder() .addAllEntry(rowGroupIndexes.stream() .map(DwrfMetadataWriter::toRowGroupIndex) .collect(toImmutableList())) .build(); return writeProtobufObject(output, rowIndexProtobuf); }
@Override public int writeStripeFooter(SliceOutput output, StripeFooter footer) throws IOException { DwrfProto.StripeFooter footerProtobuf = DwrfProto.StripeFooter.newBuilder() .addAllStreams(footer.getStreams().stream() .map(DwrfMetadataWriter::toStream) .collect(toImmutableList())) .addAllColumns(footer.getColumnEncodings().stream() .map(DwrfMetadataWriter::toColumnEncoding) .collect(toImmutableList())) .build(); return writeProtobufObject(output, footerProtobuf); }
@Override public int writeFooter(SliceOutput output, Footer footer) throws IOException { DwrfProto.Footer footerProtobuf = DwrfProto.Footer.newBuilder() .setNumberOfRows(footer.getNumberOfRows()) .setRowIndexStride(footer.getRowsInRowGroup()) .addAllStripes(footer.getStripes().stream() .map(DwrfMetadataWriter::toStripeInformation) .collect(toImmutableList())) .addAllTypes(footer.getTypes().stream() .map(DwrfMetadataWriter::toType) .collect(toImmutableList())) .addAllStatistics(footer.getFileStats().stream() .map(DwrfMetadataWriter::toColumnStatistics) .collect(toImmutableList())) .addAllMetadata(footer.getUserMetadata().entrySet().stream() .map(DwrfMetadataWriter::toUserMetadata) .collect(toImmutableList())) .addAllMetadata(STATIC_METADATA.entrySet().stream() .map(DwrfMetadataWriter::toUserMetadata) .collect(toImmutableList())) .build(); return writeProtobufObject(output, footerProtobuf); }