/** * convert parquet binary decimal to BigDecimal, lifted from * https://github.com/apache/parquet-mr/blob/master/parquet-pig/src/main/java/org/apache/parquet/pig/convert/DecimalUtils.java#L38 */ private static BigDecimal convertBinaryToDecimal(Binary value, int precision, int scale) { // based on parquet-mr pig conversion which is based on spark conversion... yo dawg? if (precision <= 18) { ByteBuffer buffer = value.toByteBuffer(); byte[] bytes = buffer.array(); int start = buffer.arrayOffset() + buffer.position(); int end = buffer.arrayOffset() + buffer.limit(); long unscaled = 0L; int i = start; while (i < end) { unscaled = (unscaled << 8 | bytes[i] & 0xff); i++; } int bits = 8 * (end - start); long unscaledNew = (unscaled << (64 - bits)) >> (64 - bits); if (unscaledNew <= -Math.pow(10, 18) || unscaledNew >= Math.pow(10, 18)) { return new BigDecimal(unscaledNew); } else { return BigDecimal.valueOf(unscaledNew / Math.pow(10, scale)); } } else { return new BigDecimal(new BigInteger(value.getBytes()), scale); } }
/** * convert deprecated parquet int96 nanosecond timestamp to a long, based on * https://github.com/prestodb/presto/blob/master/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetTimestampUtils.java#L56 */ private static long convertInt96BinaryToTimestamp(Binary value) { // based on prestodb parquet int96 timestamp conversion byte[] bytes = value.getBytes(); // little endian encoding - need to invert byte order long timeOfDayNanos = Longs.fromBytes(bytes[7], bytes[6], bytes[5], bytes[4], bytes[3], bytes[2], bytes[1], bytes[0]); int julianDay = Ints.fromBytes(bytes[11], bytes[10], bytes[9], bytes[8]); long ts = ((julianDay - JULIAN_EPOCH_OFFSET_DAYS) * MILLIS_IN_DAY) + (timeOfDayNanos / NANOS_PER_MILLISECOND); return ts; }
case BINARY: Binary bin = g.getBinary(fieldIndex, index); byte[] bytes = bin.getBytes(); if (binaryAsString) { return StringUtils.fromUtf8(bytes);
@Override public byte[] decodeToBinary(int id) { return dictionary.decodeToBinary(id).getBytes(); } }
@Override public byte[] decodeToBinary(int id) { return dictionary.decodeToBinary(id).getBytes(); } }
/** * Returns the byte array for rowId. */ public final byte[] getBinary(int rowId) { if (dictionary == null) { ColumnVector.Array array = getByteArray(rowId); byte[] bytes = new byte[array.length]; System.arraycopy(array.byteArray, array.byteArrayOffset, bytes, 0, bytes.length); return bytes; } else { Binary v = dictionary.decodeToBinary(dictionaryIds.getDictId(rowId)); return v.getBytes(); } }
/** * Returns the UTF8String for rowId. */ public final UTF8String getUTF8String(int rowId) { if (dictionary == null) { ColumnVector.Array a = getByteArray(rowId); return UTF8String.fromBytes(a.byteArray, a.byteArrayOffset, a.length); } else { Binary v = dictionary.decodeToBinary(dictionaryIds.getDictId(rowId)); return UTF8String.fromBytes(v.getBytes()); } }
@JsonProperty(value = "max") public Object getMax() { if (primitiveType == PrimitiveType.PrimitiveTypeName.BINARY && max != null) { return new String(((Binary) max).getBytes()); } return max; }
public Binary copy() { if (isBackingBytesReused) { return Binary.fromConstantByteArray(getBytes()); } else { return this; } }
for (int i = 0; i < num; i++) { if (defColumn.readInteger() == maxDefLevel) { column.putByteArray(rowId + i, data.readBinary(arrayLen).getBytes()); } else { column.putNull(rowId + i);
@Override public UTF8String read(UTF8String ignored) { Binary binary = column.nextBinary(); ByteBuffer buffer = binary.toByteBuffer(); if (buffer.hasArray()) { return UTF8String.fromBytes( buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining()); } else { return UTF8String.fromBytes(binary.getBytes()); } } }
@Override public void addBinary(Binary value) { final int length = value.length(); final byte[] bytes = value.getBytes(); /* set the bytes in LE format in the buffer of decimal vector, we will swap * the bytes while writing into the vector. */ writer.writeBigEndianBytesToDecimal(bytes); } }
@Override final public void addBinary(Binary value) { parent.add(DatumFactory.createInet4(value.getBytes())); } }
for (int i = 0; i < num; i++) { if (defColumn.readInteger() == maxDefLevel) { column.putByteArray(rowId + i, data.readBinary(arrayLen).getBytes()); } else { column.putNull(rowId + i);
for (int i = 0; i < num; i++) { if (defColumn.readInteger() == maxDefLevel) { column.putByteArray(rowId + i, data.readBinary(arrayLen).getBytes()); } else { column.putNull(rowId + i);
@Override public void writeBytes(Binary v) { int i = 0; byte[] vb = v.getBytes(); int length = previous.length < vb.length ? previous.length : vb.length; // find the number of matching prefix bytes between this value and the previous one for(i = 0; (i < length) && (previous[i] == vb[i]); i++); prefixLengthWriter.writeInteger(i); suffixWriter.writeBytes(v.slice(i, vb.length - i)); previous = vb; } }
@Override void addNext(int start, int index) { if (usingDictionary) { byte[] input = pageReader.dictionaryValueReader.readBytes().getBytes(); valueVec.getMutator().setSafe(index * 12, 1, ParquetReaderUtility.getIntFromLEBytes(input, 0), ParquetReaderUtility.getIntFromLEBytes(input, 4), ParquetReaderUtility.getIntFromLEBytes(input, 8)); } valueVec.getMutator().set(index, 1, bytebuf.getInt(start), bytebuf.getInt(start + 4), bytebuf.getInt(start + 8)); } }