- ParquetFooterInputFromCache.FOOTER_LENGTH_SIZE - ParquetFileWriter.MAGIC.length; stream.seek(footerLengthIndex); int footerLength = BytesUtils.readIntLittleEndian(stream); stream.seek(footerLengthIndex - footerLength); if (LOG.isInfoEnabled()) {
public static int readIntLittleEndianPaddedOnBitWidth(InputStream in, int bitWidth) throws IOException { int bytesWidth = paddedByteCountFromBits(bitWidth); switch (bytesWidth) { case 0: return 0; case 1: return BytesUtils.readIntLittleEndianOnOneByte(in); case 2: return BytesUtils.readIntLittleEndianOnTwoBytes(in); case 3: return BytesUtils.readIntLittleEndianOnThreeBytes(in); case 4: return BytesUtils.readIntLittleEndian(in); default: throw new IOException( String.format("Encountered bitWidth (%d) that requires more than 4 bytes", bitWidth)); } }
@Override public void initFromPage(int valueCountL, ByteBufferInputStream stream) throws IOException { int length = BytesUtils.readIntLittleEndian(stream); this.decoder = new RunLengthBitPackingHybridDecoder( bitWidth, stream.sliceStream(length)); }
public static int readIntLittleEndianPaddedOnBitWidth(InputStream in, int bitWidth) throws IOException { int bytesWidth = paddedByteCountFromBits(bitWidth); switch (bytesWidth) { case 0: return 0; case 1: return BytesUtils.readIntLittleEndianOnOneByte(in); case 2: return BytesUtils.readIntLittleEndianOnTwoBytes(in); case 3: return BytesUtils.readIntLittleEndianOnThreeBytes(in); case 4: return BytesUtils.readIntLittleEndian(in); default: throw new IOException( String.format("Encountered bitWidth (%d) that requires more than 4 bytes", bitWidth)); } }
private static final ParquetMetadata readFooter(InputFile file, ParquetReadOptions options, SeekableInputStream f, ParquetMetadataConverter converter) throws IOException { long fileLen = file.getLength(); String filePath = file.toString(); LOG.debug("File length {}", fileLen); int FOOTER_LENGTH_SIZE = 4; if (fileLen < MAGIC.length + FOOTER_LENGTH_SIZE + MAGIC.length) { // MAGIC + data + footer + footerIndex + MAGIC throw new RuntimeException(filePath + " is not a Parquet file (too small length: " + fileLen + ")"); } long footerLengthIndex = fileLen - FOOTER_LENGTH_SIZE - MAGIC.length; LOG.debug("reading footer index at {}", footerLengthIndex); f.seek(footerLengthIndex); int footerLength = readIntLittleEndian(f); byte[] magic = new byte[MAGIC.length]; f.readFully(magic); if (!Arrays.equals(MAGIC, magic)) { throw new RuntimeException(filePath + " is not a Parquet file. expected magic number at tail " + Arrays.toString(MAGIC) + " but found " + Arrays.toString(magic)); } long footerIndex = footerLengthIndex - footerLength; LOG.debug("read footer length: {}, footer index: {}", footerLength, footerIndex); if (footerIndex < MAGIC.length || footerIndex >= footerLengthIndex) { throw new RuntimeException("corrupted file: the footer index is not within the file: " + footerIndex); } f.seek(footerIndex); return converter.readParquetMetadata(f, options.getMetadataFilter()); }
final int size = BytesUtils.readIntLittleEndian(footerBytes, footerBytes.length - FOOTER_METADATA_SIZE);
private static final ParquetMetadata readFooter(InputFile file, ParquetReadOptions options, SeekableInputStream f, ParquetMetadataConverter converter) throws IOException { long fileLen = file.getLength(); String filePath = file.toString(); LOG.debug("File length {}", fileLen); int FOOTER_LENGTH_SIZE = 4; if (fileLen < MAGIC.length + FOOTER_LENGTH_SIZE + MAGIC.length) { // MAGIC + data + footer + footerIndex + MAGIC throw new RuntimeException(filePath + " is not a Parquet file (too small length: " + fileLen + ")"); } long footerLengthIndex = fileLen - FOOTER_LENGTH_SIZE - MAGIC.length; LOG.debug("reading footer index at {}", footerLengthIndex); f.seek(footerLengthIndex); int footerLength = readIntLittleEndian(f); byte[] magic = new byte[MAGIC.length]; f.readFully(magic); if (!Arrays.equals(MAGIC, magic)) { throw new RuntimeException(filePath + " is not a Parquet file. expected magic number at tail " + Arrays.toString(MAGIC) + " but found " + Arrays.toString(magic)); } long footerIndex = footerLengthIndex - footerLength; LOG.debug("read footer length: {}, footer index: {}", footerLength, footerIndex); if (footerIndex < MAGIC.length || footerIndex >= footerLengthIndex) { throw new RuntimeException("corrupted file: the footer index is not within the file: " + footerIndex); } f.seek(footerIndex); return converter.readParquetMetadata(f, options.getMetadataFilter()); }
@Override public void initFromPage(int valueCountL, ByteBufferInputStream stream) throws IOException { int length = BytesUtils.readIntLittleEndian(stream); this.decoder = new RunLengthBitPackingHybridDecoder( bitWidth, stream.sliceStream(length)); // 4 is for the length which is stored as 4 bytes little endian updateNextOffset(length + 4); }
final int size = BytesUtils.readIntLittleEndian(footerBytes, footerBytes.length - FOOTER_METADATA_SIZE);
int footerLength = readIntLittleEndian(f); byte[] magic = new byte[MAGIC.length]; f.readFully(magic);
@Override public void skip() { try { int length = BytesUtils.readIntLittleEndian(in); in.skipFully(length); } catch (IOException e) { throw new ParquetDecodingException("could not skip bytes at offset " + in.position(), e); } catch (RuntimeException e) { throw new ParquetDecodingException("could not skip bytes at offset " + in.position(), e); } }
@Override public void skip() { try { int length = BytesUtils.readIntLittleEndian(in); in.skipFully(length); } catch (IOException e) { throw new ParquetDecodingException("could not skip bytes at offset " + in.position(), e); } catch (RuntimeException e) { throw new ParquetDecodingException("could not skip bytes at offset " + in.position(), e); } }
@Override public Binary readBytes() { try { int length = BytesUtils.readIntLittleEndian(in); return Binary.fromConstantByteBuffer(in.slice(length)); } catch (IOException e) { throw new ParquetDecodingException("could not read bytes at offset " + in.position(), e); } catch (RuntimeException e) { throw new ParquetDecodingException("could not read bytes at offset " + in.position(), e); } }
@Override public Binary readBytes() { try { int length = BytesUtils.readIntLittleEndian(in); return Binary.fromConstantByteBuffer(in.slice(length)); } catch (IOException e) { throw new ParquetDecodingException("could not read bytes at offset " + in.position(), e); } catch (RuntimeException e) { throw new ParquetDecodingException("could not read bytes at offset " + in.position(), e); } }
public BinaryDictionary(DictionaryPage dictionaryPage, Integer length) throws IOException { super(dictionaryPage.getEncoding()); byte[] dictionaryBytes = dictionaryPage.getSlice().getBytes(); content = new Binary[dictionaryPage.getDictionarySize()]; int offset = 0; if (length == null) { for (int i = 0; i < content.length; i++) { int len = readIntLittleEndian(dictionaryBytes, offset); offset += 4; content[i] = Binary.fromByteArray(dictionaryBytes, offset, len); offset += len; } } else { checkArgument(length > 0, "Invalid byte array length: %s", length); for (int i = 0; i < content.length; i++) { content[i] = Binary.fromByteArray(dictionaryBytes, offset, length); offset += length; } } }
public BinaryDictionary(DictionaryPage dictionaryPage, Integer length) throws IOException { super(dictionaryPage.getEncoding()); byte[] dictionaryBytes = dictionaryPage.getSlice().getBytes(); content = new Binary[dictionaryPage.getDictionarySize()]; int offset = 0; if (length == null) { for (int i = 0; i < content.length; i++) { int len = readIntLittleEndian(dictionaryBytes, offset); offset += 4; content[i] = Binary.fromByteArray(dictionaryBytes, offset, len); offset += len; } } else { checkArgument(length > 0, "Invalid byte array length: %s", length); for (int i = 0; i < content.length; i++) { content[i] = Binary.fromByteArray(dictionaryBytes, offset, length); offset += length; } } }
int len = readIntLittleEndian(dictionaryBytes, offset);
int len = readIntLittleEndian(dictionaryBytes, offset);