org.apache.parquet.bytes.BytesUtils java code examples

Refine search

BytesInput

private IntIterator newRLEIterator(int maxLevel, BytesInput bytes) {
 try {
  if (maxLevel == 0) {
   return new NullIntIterator();
  }
  return new RLEIntIterator(
    new RunLengthBitPackingHybridDecoder(
      BytesUtils.getWidthFromMaxInt(maxLevel),
      new ByteArrayInputStream(bytes.toByteArray())));
 } catch (IOException e) {
  throw new ParquetDecodingException("could not read levels in page for col " + descriptor, e);
 }
}

/**
 * Initializes the internal state for decoding ints of `bitWidth`.
 */
private void init(int bitWidth) {
 Preconditions.checkArgument(bitWidth >= 0 && bitWidth <= 32, "bitWidth must be >= 0 and <= 32");
 this.bitWidth = bitWidth;
 this.bytesWidth = BytesUtils.paddedByteCountFromBits(bitWidth);
 this.packer = Packer.LITTLE_ENDIAN.newBytePacker(bitWidth);
}

  - ParquetFooterInputFromCache.FOOTER_LENGTH_SIZE - ParquetFileWriter.MAGIC.length;
stream.seek(footerLengthIndex);
int footerLength = BytesUtils.readIntLittleEndian(stream);
stream.seek(footerLengthIndex - footerLength);
if (LOG.isInfoEnabled()) {

/**
 * Creates a reader for definition and repetition levels, returning an optimized one if
 * the levels are not needed.
 */
protected static IntIterator createRLEIterator(
  int maxLevel, BytesInput bytes, ColumnDescriptor descriptor) throws IOException {
 try {
  if (maxLevel == 0) return new NullIntIterator();
  return new RLEIntIterator(
    new RunLengthBitPackingHybridDecoder(
      BytesUtils.getWidthFromMaxInt(maxLevel),
      bytes.toInputStream()));
 } catch (IOException e) {
  throw new IOException("could not read levels in page for col " + descriptor, e);
 }
}

@Override
public BytesInput getBytes() {
 int maxDicId = getDictionarySize() - 1;
 LOG.debug("max dic id {}", maxDicId);
 int bitWidth = BytesUtils.getWidthFromMaxInt(maxDicId);
 int initialSlabSize =
   CapacityByteArrayOutputStream.initialSlabSizeHeuristic(MIN_INITIAL_SLAB_SIZE, maxDictionaryByteSize, 10);
 RunLengthBitPackingHybridEncoder encoder =
   new RunLengthBitPackingHybridEncoder(bitWidth, initialSlabSize, maxDictionaryByteSize, this.allocator);
 encoders.add(encoder);
 IntIterator iterator = encodedValues.iterator();
 try {
  while (iterator.hasNext()) {
   encoder.writeInt(iterator.next());
  }
  // encodes the bit width
  byte[] bytesHeader = new byte[] { (byte) bitWidth };
  BytesInput rleEncodedBytes = encoder.toBytes();
  LOG.debug("rle encoded bytes {}", rleEncodedBytes.size());
  BytesInput bytes = concat(BytesInput.from(bytesHeader), rleEncodedBytes);
  // remember size of dictionary when we last wrote a page
  lastUsedDictionarySize = getDictionarySize();
  lastUsedDictionaryByteSize = dictionaryByteSize;
  return bytes;
 } catch (IOException e) {
  throw new ParquetEncodingException("could not encode the values", e);
 }
}

/**
 * @param bound the maximum value stored by this column
 */
public BitPackingValuesReader(int bound) {
 this.bitsPerValue = getWidthFromMaxInt(bound);
}

public static int readIntLittleEndianPaddedOnBitWidth(InputStream in, int bitWidth)
  throws IOException {
 int bytesWidth = paddedByteCountFromBits(bitWidth);
 switch (bytesWidth) {
  case 0:
   return 0;
  case 1:
   return BytesUtils.readIntLittleEndianOnOneByte(in);
  case 2:
   return BytesUtils.readIntLittleEndianOnTwoBytes(in);
  case 3:
   return  BytesUtils.readIntLittleEndianOnThreeBytes(in);
  case 4:
   return BytesUtils.readIntLittleEndian(in);
  default:
   throw new IOException(
    String.format("Encountered bitWidth (%d) that requires more than 4 bytes", bitWidth));
 }
}

/**
 * @return the bytes representing the packed values
 * @throws IOException if there is an exception while creating the BytesInput
 */
public BytesInput toBytes() throws IOException {
 int packedByteLength = packedPosition + BytesUtils.paddedByteCountFromBits(inputSize * bitWidth);
 LOG.debug("writing {} bytes", (totalFullSlabSize + packedByteLength));
 if (inputSize > 0) {
  for (int i = inputSize; i < input.length; i++) {
   input[i] = 0;
  }
  pack();
 }
 return concat(concat(slabs), BytesInput.from(packed, 0, packedByteLength));
}

 throws IOException {
int bytesWidth = paddedByteCountFromBits(bitWidth);
switch (bytesWidth) {
 case 0:
  break;
 case 1:
  writeIntLittleEndianOnOneByte(out, v);
  break;
 case 2:
  writeIntLittleEndianOnTwoBytes(out, v);
  break;
 case 3:
  writeIntLittleEndianOnThreeBytes(out, v);
  break;
 case 4:
  writeIntLittleEndian(out, v);
  break;
 default:

final ByteBuffer dictionaryBytes = dictionaryPage.getBytes().toByteBuffer();
binaryDictionaryContent = new Binary[dictionaryPage.getDictionarySize()];
  int len = readIntLittleEndian(dictionaryBytes, offset);

@Override
public void initFromPage(int valueCount, byte[] page, int offset)
    throws IOException
{
  checkArgument(page.length > offset, "Attempt to read offset not in the  page");
  ByteArrayInputStream in = new ByteArrayInputStream(page, offset, page.length - offset);
  int bitWidth = BytesUtils.readIntLittleEndianOnOneByte(in);
  decoder = new RunLengthBitPackingHybridDecoder(bitWidth, in);
}

@Override
public void writeAllTo(OutputStream out) throws IOException {
 BytesUtils.writeUnsignedVarInt(intValue, out);
}

private void writeRleRun() throws IOException {
 // we may have been working on a bit-packed-run
 // so close that run if it exists before writing this
 // rle-run
 endPreviousBitPackedRun();
 // write the rle-header (lsb of 0 signifies a rle run)
 BytesUtils.writeUnsignedVarInt(repeatCount << 1, baos);
 // write the repeated-value
 BytesUtils.writeIntLittleEndianPaddedOnBitWidth(baos, previousValue, bitWidth);
 // reset the repeat count
 repeatCount = 0;
 // throw away all the buffered values, they were just repeats and they've been written
 numBufferedValues = 0;
}

/**
 * eagerly loads all the data into memory
 */
@Override
public void initFromPage(int valueCount, ByteBufferInputStream stream) throws IOException {
 this.in = stream;
 this.config = DeltaBinaryPackingConfig.readConfig(in);
 this.totalValueCount = BytesUtils.readUnsignedVarInt(in);
 allocateValuesBuffer();
 bitWidths = new int[config.miniBlockNumInABlock];
 //read first value from header
 valuesBuffer[valuesBuffered++] = BytesUtils.readZigZagVarLong(in);
 while (valuesBuffered < totalValueCount) { //values Buffered could be more than totalValueCount, since we flush on a mini block basis
  loadNewBlockToBuffer();
 }
}

 private void readNext() throws IOException {
  Preconditions.checkArgument(in.available() > 0, "Reading past RLE/BitPacking stream.");
  final int header = BytesUtils.readUnsignedVarInt(in);
  mode = (header & 1) == 0 ? MODE.RLE : MODE.PACKED;
  switch (mode) {
  case RLE:
   currentCount = header >>> 1;
   LOG.debug("reading {} values RLE", currentCount);
   currentValue = BytesUtils.readIntLittleEndianPaddedOnBitWidth(in, bitWidth);
   break;
  case PACKED:
   int numGroups = header >>> 1;
   currentCount = numGroups * 8;
   LOG.debug("reading {} values BIT PACKED", currentCount);
   currentBuffer = new int[currentCount]; // TODO: reuse a buffer
   byte[] bytes = new byte[numGroups * bitWidth];
   // At the end of the file RLE data though, there might not be that many bytes left.
   int bytesToRead = (int)Math.ceil(currentCount * bitWidth / 8.0);
   bytesToRead = Math.min(bytesToRead, in.available());
   new DataInputStream(in).readFully(bytes, 0, bytesToRead);
   for (int valueIndex = 0, byteIndex = 0; valueIndex < currentCount; valueIndex += 8, byteIndex += bitWidth) {
    packer.unpack8Values(bytes, byteIndex, currentBuffer, valueIndex);
   }
   break;
  default:
   throw new ParquetDecodingException("not a valid mode " + mode);
  }
 }
}

/**
 * uses a trick mentioned in https://developers.google.com/protocol-buffers/docs/encoding to read zigZag encoded data
 * @param in an input stream
 * @return the value of a zig-zag varint read from the current position in the stream
 * @throws IOException if there is an exception while reading
 */
public static int readZigZagVarInt(InputStream in) throws IOException {
 int raw = readUnsignedVarInt(in);
 int temp = (((raw << 31) >> 31) ^ raw) >> 1;
 return temp ^ (raw & (1 << 31));
}

@Override
public void writeAllTo(OutputStream out) throws IOException {
 BytesUtils.writeIntLittleEndian(out, intValue);
}

protected void writeBitWidthForMiniBlock(int i) {
 try {
  BytesUtils.writeIntLittleEndianOnOneByte(baos, bitWidths[i]);
 } catch (IOException e) {
  throw new ParquetEncodingException("can not write bitwith for miniblock", e);
 }
}

/**
 * Creates a reader for definition and repetition levels, returning an optimized one if
 * the levels are not needed.
 */
protected static IntIterator createRLEIterator(
  int maxLevel, BytesInput bytes, ColumnDescriptor descriptor) throws IOException {
 try {
  if (maxLevel == 0) return new NullIntIterator();
  return new RLEIntIterator(
    new RunLengthBitPackingHybridDecoder(
      BytesUtils.getWidthFromMaxInt(maxLevel),
      bytes.toInputStream()));
 } catch (IOException e) {
  throw new IOException("could not read levels in page for col " + descriptor, e);
 }
}

@Override
public BytesInput getBytes() {
 int maxDicId = getDictionarySize() - 1;
 LOG.debug("max dic id {}", maxDicId);
 int bitWidth = BytesUtils.getWidthFromMaxInt(maxDicId);
 int initialSlabSize =
   CapacityByteArrayOutputStream.initialSlabSizeHeuristic(MIN_INITIAL_SLAB_SIZE, maxDictionaryByteSize, 10);
 RunLengthBitPackingHybridEncoder encoder =
   new RunLengthBitPackingHybridEncoder(bitWidth, initialSlabSize, maxDictionaryByteSize, this.allocator);
 encoders.add(encoder);
 IntIterator iterator = encodedValues.iterator();
 try {
  while (iterator.hasNext()) {
   encoder.writeInt(iterator.next());
  }
  // encodes the bit width
  byte[] bytesHeader = new byte[] { (byte) bitWidth };
  BytesInput rleEncodedBytes = encoder.toBytes();
  LOG.debug("rle encoded bytes {}", rleEncodedBytes.size());
  BytesInput bytes = concat(BytesInput.from(bytesHeader), rleEncodedBytes);
  // remember size of dictionary when we last wrote a page
  lastUsedDictionarySize = getDictionarySize();
  lastUsedDictionaryByteSize = dictionaryByteSize;
  return bytes;
 } catch (IOException e) {
  throw new ParquetEncodingException("could not encode the values", e);
 }
}

How to useBytesUtils in org.apache.parquet.bytes

Best Java code snippets using org.apache.parquet.bytes.BytesUtils (Showing top 20 results out of 315)

Refine search

How to use
BytesUtils
in
org.apache.parquet.bytes