org.apache.parquet.bytes.BytesInput java code examples

Refine search

private void readPageV2(DataPageV2 page) {
 this.pageValueCount = page.getValueCount();
 this.repetitionLevelColumn = newRLEIterator(descriptor.getMaxRepetitionLevel(),
   page.getRepetitionLevels());
 this.definitionLevelColumn = newRLEIterator(descriptor.getMaxDefinitionLevel(), page.getDefinitionLevels());
 try {
  LOG.debug("page data size " + page.getData().size() + " bytes and " + pageValueCount + " records");
  initDataReader(page.getDataEncoding(), page.getData().toInputStream(), page.getValueCount());
 } catch (IOException e) {
  throw new ParquetDecodingException("could not read page " + page + " in col " + descriptor, e);
 }
}

private IntIterator newRLEIterator(int maxLevel, BytesInput bytes) {
 try {
  if (maxLevel == 0) {
   return new NullIntIterator();
  }
  return new RLEIntIterator(
    new RunLengthBitPackingHybridDecoder(
      BytesUtils.getWidthFromMaxInt(maxLevel),
      new ByteArrayInputStream(bytes.toByteArray())));
 } catch (IOException e) {
  throw new ParquetDecodingException("could not read levels in page for col " + descriptor, e);
 }
}

@Override
public void writeDictionaryPage(DictionaryPage dictionaryPage) throws IOException {
 if (this.dictionaryPage != null) {
  throw new ParquetEncodingException("Only one dictionary page is allowed");
 }
 BytesInput dictionaryBytes = dictionaryPage.getBytes();
 int uncompressedSize = (int)dictionaryBytes.size();
 BytesInput compressedBytes = compressor.compress(dictionaryBytes);
 this.dictionaryPage = new DictionaryPage(BytesInput.copy(compressedBytes), uncompressedSize,
   dictionaryPage.getDictionarySize(), dictionaryPage.getEncoding());
}

private static DictionaryPage reusableCopy(DictionaryPage dict) {
 if (dict == null) {
  return null;
 }
 try {
  return new DictionaryPage(
    BytesInput.from(dict.getBytes().toByteArray()),
    dict.getDictionarySize(), dict.getEncoding());
 } catch (IOException e) {
  throw new ParquetDecodingException("Cannot read dictionary", e);
 }
}

/**
 * writes a dictionary page page
 * @param dictionaryPage the dictionary page
 * @throws IOException if there is an error while writing
 */
public void writeDictionaryPage(DictionaryPage dictionaryPage) throws IOException {
 state = state.write();
 LOG.debug("{}: write dictionary page: {} values", out.getPos(), dictionaryPage.getDictionarySize());
 currentChunkDictionaryPageOffset = out.getPos();
 int uncompressedSize = dictionaryPage.getUncompressedSize();
 int compressedPageSize = (int)dictionaryPage.getBytes().size(); // TODO: fix casts
 metadataConverter.writeDictionaryPageHeader(
   uncompressedSize,
   compressedPageSize,
   dictionaryPage.getDictionarySize(),
   dictionaryPage.getEncoding(),
   out);
 long headerSize = out.getPos() - currentChunkDictionaryPageOffset;
 this.uncompressedLength += uncompressedSize + headerSize;
 this.compressedLength += compressedPageSize + headerSize;
 LOG.debug("{}: write dictionary page content {}", out.getPos(), compressedPageSize);
 dictionaryPage.getBytes().writeAllTo(out);
 encodingStatsBuilder.addDictEncoding(dictionaryPage.getEncoding());
 currentEncodings.add(dictionaryPage.getEncoding());
}

/**
 * Creates a reader for definition and repetition levels, returning an optimized one if
 * the levels are not needed.
 */
protected static IntIterator createRLEIterator(
  int maxLevel, BytesInput bytes, ColumnDescriptor descriptor) throws IOException {
 try {
  if (maxLevel == 0) return new NullIntIterator();
  return new RLEIntIterator(
    new RunLengthBitPackingHybridDecoder(
      BytesUtils.getWidthFromMaxInt(maxLevel),
      bytes.toInputStream()));
 } catch (IOException e) {
  throw new IOException("could not read levels in page for col " + descriptor, e);
 }
}

@Override
public BytesInput getBytes() {
 int maxDicId = getDictionarySize() - 1;
 LOG.debug("max dic id {}", maxDicId);
 int bitWidth = BytesUtils.getWidthFromMaxInt(maxDicId);
 int initialSlabSize =
   CapacityByteArrayOutputStream.initialSlabSizeHeuristic(MIN_INITIAL_SLAB_SIZE, maxDictionaryByteSize, 10);
 RunLengthBitPackingHybridEncoder encoder =
   new RunLengthBitPackingHybridEncoder(bitWidth, initialSlabSize, maxDictionaryByteSize, this.allocator);
 encoders.add(encoder);
 IntIterator iterator = encodedValues.iterator();
 try {
  while (iterator.hasNext()) {
   encoder.writeInt(iterator.next());
  }
  // encodes the bit width
  byte[] bytesHeader = new byte[] { (byte) bitWidth };
  BytesInput rleEncodedBytes = encoder.toBytes();
  LOG.debug("rle encoded bytes {}", rleEncodedBytes.size());
  BytesInput bytes = concat(BytesInput.from(bytesHeader), rleEncodedBytes);
  // remember size of dictionary when we last wrote a page
  lastUsedDictionarySize = getDictionarySize();
  lastUsedDictionaryByteSize = dictionaryByteSize;
  return bytes;
 } catch (IOException e) {
  throw new ParquetEncodingException("could not encode the values", e);
 }
}

@SuppressWarnings("unused")
@Override
public void writeAllTo(OutputStream out) throws IOException {
 for (BytesInput input : inputs) {
  LOG.debug("write {} bytes to out", input.size());
  if (input instanceof SequenceBytesIn) LOG.debug("{");
  input.writeAllTo(out);
  if (input instanceof SequenceBytesIn) LOG.debug("}");
 }
}

/**
 * @param dictionaryPage a dictionary page of encoded double values
 * @throws IOException if there is an exception while decoding the dictionary page
 */
public PlainDoubleDictionary(DictionaryPage dictionaryPage) throws IOException {
 super(dictionaryPage);
 ByteBufferInputStream in = dictionaryPage.getBytes().toInputStream();
 doubleDictionaryContent = new double[dictionaryPage.getDictionarySize()];
 DoublePlainValuesReader doubleReader = new DoublePlainValuesReader();
 doubleReader.initFromPage(dictionaryPage.getDictionarySize(), in);
 for (int i = 0; i < doubleDictionaryContent.length; i++) {
  doubleDictionaryContent[i] = doubleReader.readDouble();
 }
}

public static Dictionary readDictionary(FSDataInputStream in, ColumnDescriptor column, PageHeaderWithOffset pageHeader, BytesDecompressor decompressor) throws IOException {
 in.seek(pageHeader.getOffset());
 final byte[] data = new byte[pageHeader.getPageHeader().getCompressed_page_size()];
 int read = in.read(data);
 if (read != data.length) {
  throw new IOException(format("Failed to read dictionary page, read %d bytes, expected %d", read, data.length));
 }
 final DictionaryPage dictionaryPage = new DictionaryPage(
  decompressor.decompress(BytesInput.from(data), pageHeader.getPageHeader().getUncompressed_page_size()),
  pageHeader.getPageHeader().getDictionary_page_header().getNum_values(),
  CONVERTER.getEncoding(pageHeader.getPageHeader().getDictionary_page_header().getEncoding()));
 return dictionaryPage.getEncoding().initDictionary(column, dictionaryPage);
}

/**
 * copies the input into a new byte array
 * @param bytesInput a BytesInput
 * @return a copy of the BytesInput
 * @throws IOException if there is an exception when reading bytes from the BytesInput
 */
public static BytesInput copy(BytesInput bytesInput) throws IOException {
 return from(bytesInput.toByteArray());
}

/**
 * @return the bytes representing the packed values
 * @throws IOException if there is an exception while creating the BytesInput
 */
public BytesInput toBytes() throws IOException {
 int packedByteLength = packedPosition + BytesUtils.paddedByteCountFromBits(inputSize * bitWidth);
 LOG.debug("writing {} bytes", (totalFullSlabSize + packedByteLength));
 if (inputSize > 0) {
  for (int i = inputSize; i < input.length; i++) {
   input[i] = 0;
  }
  pack();
 }
 return concat(concat(slabs), BytesInput.from(packed, 0, packedByteLength));
}

final ByteBuffer dictionaryBytes = dictionaryPage.getBytes().toByteBuffer();
binaryDictionaryContent = new Binary[dictionaryPage.getDictionarySize()];
  int len = readIntLittleEndian(dictionaryBytes, offset);

/**
 * Compress a given buffer of bytes
 * @param bytes
 * @return
 * @throws IOException
 */
@Override
public BytesInput compress(BytesInput bytes) throws IOException {
 int maxOutputSize = Snappy.maxCompressedLength((int) bytes.size());
 ByteBuffer bufferIn = bytes.toByteBuffer();
 outgoing = ensure(outgoing, maxOutputSize);
 final int size;
 if (bufferIn.isDirect()) {
  size = Snappy.compress(bufferIn, outgoing);
 } else {
  // Snappy library requires buffers be direct
  this.incoming = ensure(this.incoming, (int) bytes.size());
  this.incoming.put(bufferIn);
  this.incoming.flip();
  size = Snappy.compress(this.incoming, outgoing);
 }
 outgoing.limit(size);
 return BytesInput.from(outgoing);
}

if (dictionaryPage == null) {
 dictionaryPage =
     new DictionaryPage(
         decompressor.decompress(BytesInput.from(in, pageHeader.compressed_page_size), pageHeader.getUncompressed_page_size()),
         pageHeader.uncompressed_page_size,
         parquetMetadataConverter.getEncoding(pageHeader.dictionary_page_header.encoding)
buffer.flip();
return new DataPageV1(
    decompressor.decompress(BytesInput.from(buffer), pageHeader.getUncompressed_page_size()),
    pageHeader.data_page_header.num_values,
    pageHeader.uncompressed_page_size,
BytesInput decompressedPageData =
  decompressor.decompress(
    BytesInput.from(buffer),
    pageHeader.uncompressed_page_size);
ByteBuffer byteBuffer = decompressedPageData.toByteBuffer();
int limit = byteBuffer.limit();
byteBuffer.limit(dataHeaderV2.getRepetition_levels_byte_length());
BytesInput repetitionLevels = BytesInput.from(byteBuffer.slice());
byteBuffer.position(dataHeaderV2.getRepetition_levels_byte_length());
byteBuffer.limit(dataHeaderV2.getRepetition_levels_byte_length() + dataHeaderV2.getDefinition_levels_byte_length());
BytesInput definitionLevels = BytesInput.from(byteBuffer.slice());
byteBuffer.position(dataHeaderV2.getRepetition_levels_byte_length() + dataHeaderV2.getDefinition_levels_byte_length());
byteBuffer.limit(limit);
BytesInput data = BytesInput.from(byteBuffer.slice());

/**
 * getBytes will trigger flushing block buffer, DO NOT write after getBytes() is called without calling reset()
 *
 * @return a BytesInput that contains the encoded page data
 */
@Override
public BytesInput getBytes() {
 // The Page Header should include: blockSizeInValues, numberOfMiniBlocks, totalValueCount
 if (deltaValuesToFlush != 0) {
  flushBlockBuffer();
 }
 return BytesInput.concat(
     config.toBytesInput(),
     BytesInput.fromUnsignedVarInt(totalValueCount),
     BytesInput.fromZigZagVarLong(firstValue),
     BytesInput.from(baos));
}

/**
 * getBytes will trigger flushing block buffer, DO NOT write after getBytes() is called without calling reset()
 *
 * @return a BytesInput that contains the encoded page data
 */
@Override
public BytesInput getBytes() {
 // The Page Header should include: blockSizeInValues, numberOfMiniBlocks, totalValueCount
 if (deltaValuesToFlush != 0) {
  flushBlockBuffer();
 }
 return BytesInput.concat(
     config.toBytesInput(),
     BytesInput.fromUnsignedVarInt(totalValueCount),
     BytesInput.fromZigZagVarInt(firstValue),
     BytesInput.from(baos));
}

private DictionaryPage readDictionaryPageHelper(PageHeader pageHeader) throws IOException {
 ByteBuffer data = uncompressPage(pageHeader, false);
 return new DictionaryPage(
   BytesInput.from(data, 0, pageHeader.uncompressed_page_size),
   pageHeader.getDictionary_page_header().getNum_values(),
   parquetMetadataConverter.getEncoding(pageHeader.dictionary_page_header.encoding)
 );
}

@Override
public BytesInput decompress(BytesInput bytes, int uncompressedSize) throws IOException {
 final BytesInput decompressed;
 if (codec != null) {
  decompressor.reset();
  InputStream is = codec.createInputStream(bytes.toInputStream(), decompressor);
  decompressed = BytesInput.from(is, uncompressedSize);
 } else {
  decompressed = bytes;
 }
 return decompressed;
}

@Override
public void decompress(ByteBuffer input, int compressedSize, ByteBuffer output, int uncompressedSize) throws IOException {
 ByteBuffer decompressed = decompress(BytesInput.from(input), uncompressedSize).toByteBuffer();
 output.put(decompressed);
}

Javadoc

A source of bytes capable of writing itself to an output. A BytesInput should be consumed right away. It is not a container. For example if it is referring to a stream, subsequent BytesInput reads from the stream will be incorrect if the previous has not been consumed.

Most used methods

from
size
toByteArray
toInputStream
toByteBuffer
concat
logically concatenate the provided inputs
copy
copies the input into a new byte array
writeAllTo
writes the bytes into a stream
empty
fromInt
fromUnsignedVarInt
fromZigZagVarInt

Popular in Java

Running tasks concurrently on multiple threads
findViewById (Activity)
getSupportFragmentManager (FragmentActivity)
scheduleAtFixedRate (Timer)
InputStream (java.io)
A readable source of bytes.Most clients will use input streams that read data from the file system (
String (java.lang)
UUID (java.util)
UUID is an immutable representation of a 128-bit universally unique identifier (UUID). There are mul
Servlet (javax.servlet)
Defines methods that all servlets must implement. A servlet is a small Java program that runs within
Font (java.awt)
The Font class represents fonts, which are used to render text in a visible way. A font provides the
Get (org.apache.hadoop.hbase.client)
Used to perform Get operations on a single row. To get everything for a row, instantiate a Get objec
Best plugins for Eclipse

How to useBytesInput in org.apache.parquet.bytes

Best Java code snippets using org.apache.parquet.bytes.BytesInput (Showing top 20 results out of 315)

Refine search

How to use
BytesInput
in
org.apache.parquet.bytes