Refine search
/** * convert parquet binary decimal to BigDecimal, lifted from * https://github.com/apache/parquet-mr/blob/master/parquet-pig/src/main/java/org/apache/parquet/pig/convert/DecimalUtils.java#L38 */ private static BigDecimal convertBinaryToDecimal(Binary value, int precision, int scale) { // based on parquet-mr pig conversion which is based on spark conversion... yo dawg? if (precision <= 18) { ByteBuffer buffer = value.toByteBuffer(); byte[] bytes = buffer.array(); int start = buffer.arrayOffset() + buffer.position(); int end = buffer.arrayOffset() + buffer.limit(); long unscaled = 0L; int i = start; while (i < end) { unscaled = (unscaled << 8 | bytes[i] & 0xff); i++; } int bits = 8 * (end - start); long unscaledNew = (unscaled << (64 - bits)) >> (64 - bits); if (unscaledNew <= -Math.pow(10, 18) || unscaledNew >= Math.pow(10, 18)) { return new BigDecimal(unscaledNew); } else { return BigDecimal.valueOf(unscaledNew / Math.pow(10, scale)); } } else { return new BigDecimal(new BigInteger(value.getBytes()), scale); } }
@Override public void write(RecordConsumer rc) { rc.startMessage(); rc.startField("votes", 0); rc.startGroup(); rc.startField("key_value", 0); rc.startGroup(); rc.startField("key", 0); rc.addBinary(Binary.fromString("lettuce")); rc.endField("key", 0); rc.startField("value", 1); rc.addInteger(34); rc.endField("value", 1); rc.endGroup(); rc.startGroup(); rc.startField("key", 0); rc.addBinary(Binary.fromString("cabbage")); rc.endField("key", 0); rc.startField("value", 1); rc.addInteger(18); rc.endField("value", 1); rc.endGroup(); rc.endField("key_value", 0); rc.endGroup(); rc.endField("votes", 0); rc.endMessage(); } });
@Override public final Binary readBinary(int len) { Binary result = Binary.fromConstantByteArray(buffer, offset - Platform.BYTE_ARRAY_OFFSET, len); offset += len; return result; } }
public static NanoTime fromBinary(Binary bytes) { Preconditions.checkArgument(bytes.length() == 12, "Must be 12 bytes"); ByteBuffer buf = bytes.toByteBuffer(); buf.order(ByteOrder.LITTLE_ENDIAN); long timeOfDayNanos = buf.getLong(); int julianDay = buf.getInt(); return new NanoTime(julianDay, timeOfDayNanos); }
@Override public void writeField() throws IOException { consumer.startField(fieldName, fieldId); reader.read(holder); IntervalUtility.intToLEByteArray(holder.months, output, 0); IntervalUtility.intToLEByteArray(holder.days, output, 4); IntervalUtility.intToLEByteArray(holder.milliseconds, output, 8); consumer.addBinary(Binary.fromByteArray(output)); consumer.endField(fieldName, fieldId); } }
@Override public void write(Object value) { String v = inspector.getPrimitiveJavaObject(value); recordConsumer.addBinary(Binary.fromString(v)); } }
/** * convert deprecated parquet int96 nanosecond timestamp to a long, based on * https://github.com/prestodb/presto/blob/master/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetTimestampUtils.java#L56 */ private static long convertInt96BinaryToTimestamp(Binary value) { // based on prestodb parquet int96 timestamp conversion byte[] bytes = value.getBytes(); // little endian encoding - need to invert byte order long timeOfDayNanos = Longs.fromBytes(bytes[7], bytes[6], bytes[5], bytes[4], bytes[3], bytes[2], bytes[1], bytes[0]); int julianDay = Ints.fromBytes(bytes[11], bytes[10], bytes[9], bytes[8]); long ts = ((julianDay - JULIAN_EPOCH_OFFSET_DAYS) * MILLIS_IN_DAY) + (timeOfDayNanos / NANOS_PER_MILLISECOND); return ts; }
@Override public void write(Object value) { byte[] vBinary = inspector.getPrimitiveJavaObject(value); recordConsumer.addBinary(Binary.fromByteArray(vBinary)); } }
@Override public String convert(Binary binary) { return binary.toStringUsingUTF8(); } }
public Binary copy() { if (isBackingBytesReused) { return Binary.fromConstantByteArray(getBytes()); } else { return this; } }
@Override public Binary slice(int start, int length) { if (isBackingBytesReused) { return Binary.fromReusedByteArray(value, offset + start, length); } else { return Binary.fromConstantByteArray(value, offset + start, length); } }
@Override public void addBinary(final Binary value) { if ("UTF8".equals(originalType)) { addObject(value.toStringUsingUTF8()); } else { addObject(value.getBytes()); } }
@Override public FilterPredicate buildPredict(Operator op, Object constant, String columnName) throws Exception{ switch (op) { case LESS_THAN: return lt(binaryColumn(columnName), Binary.fromString((String) constant)); case IS_NULL: case EQUALS: case NULL_SAFE_EQUALS: return eq(binaryColumn(columnName), (constant == null) ? null : Binary.fromString((String) constant)); case LESS_THAN_EQUALS: return ltEq(binaryColumn(columnName), Binary.fromString((String) constant)); default: // should never be executed throw new RuntimeException("Unknown PredicateLeaf Operator type: " + op); } } }
@Override public void write(BaseRow row, int ordinal) { recordConsumer.addBinary( Binary.fromReusedByteArray(row.getBinaryString(ordinal).getBytes())); } }
private Timestamp convert(Binary binary) { ByteBuffer buf = binary.toByteBuffer(); buf.order(ByteOrder.LITTLE_ENDIAN); long timeOfDayNanos = buf.getLong(); int julianDay = buf.getInt(); NanoTime nt = new NanoTime(julianDay, timeOfDayNanos); return NanoTimeUtils.getTimestamp(nt, skipTimestampConversion); }
@Override public void write(int repetitionLevel, CharSequence value) { if (value instanceof Utf8) { Utf8 utf8 = (Utf8) value; column.writeBinary(repetitionLevel, Binary.fromReusedByteArray(utf8.getBytes(), 0, utf8.getByteLength())); } else { column.writeBinary(repetitionLevel, Binary.fromString(value.toString())); } } }
@Deprecated /** * @deprecated Use @link{fromReusedByteArray} or @link{fromConstantByteArray} instead */ public static Binary fromByteArray(final byte[] value) { return fromReusedByteArray(value); // Assume producer intends to reuse byte[] }
private Date getDateFromInt96(Object val) { if (val instanceof byte[]) { byte[] bytes = (byte[]) val; ByteBuffer buf = Binary.fromByteArray(bytes).toByteBuffer(); buf.order(ByteOrder.LITTLE_ENDIAN); long timeOfDayNanos = buf.getLong(); int julianDay = buf.getInt(); JDateTime date = new JDateTime(new JulianDateStamp(julianDay, (double) timeOfDayNanos / NANOS_PER_DAY)); return date.convertToDate(); } return null; } }
@Override public String toString() { return "Binary{" + length() + (isBackingBytesReused ? " reused": " constant") + " bytes, " + Arrays.toString(getBytesUnsafe()) + "}"; }