Object instance = clazz.newInstance(); if (instance instanceof Compressor) { if (!((Compressor) instance).getName().equals(compressorClassName)) { throw new RuntimeException(String.format("For not carbondata native supported compressor," + " the result of method getName() should be the full class name. Expected '%s'," + " found '%s'", compressorClassName, ((Compressor) instance).getName()));
DataType dataType = columnPageEncoderMeta.getStoreDataType(); if (dataType == DataTypes.BOOLEAN) { return compressor.compressByte(getBooleanPage()); } else if (dataType == DataTypes.BYTE) { return compressor.compressByte(getBytePage()); } else if (dataType == DataTypes.SHORT) { return compressor.compressShort(getShortPage()); } else if (dataType == DataTypes.SHORT_INT) { return compressor.compressByte(getShortIntPage()); } else if (dataType == DataTypes.INT) { return compressor.compressInt(getIntPage()); } else if (dataType == DataTypes.LONG) { return compressor.compressLong(getLongPage()); } else if (dataType == DataTypes.FLOAT) { return compressor.compressFloat(getFloatPage()); } else if (dataType == DataTypes.DOUBLE) { return compressor.compressDouble(getDoublePage()); } else if (DataTypes.isDecimal(dataType)) { return compressor.compressByte(getDecimalPage()); } else if (dataType == DataTypes.BYTE_ARRAY && columnPageEncoderMeta.getColumnSpec().getColumnType() == ColumnType.COMPLEX_PRIMITIVE) { return compressor.compressByte(getComplexChildrenLVFlattenedBytePage()); } else if (dataType == DataTypes.BYTE_ARRAY && (columnPageEncoderMeta.getColumnSpec().getColumnType() == ColumnType.COMPLEX_STRUCT || columnPageEncoderMeta.getColumnSpec().getColumnType() == ColumnType.PLAIN_LONG_VALUE || columnPageEncoderMeta.getColumnSpec().getColumnType() == ColumnType.PLAIN_VALUE)) { return compressor.compressByte(getComplexParentFlattenedBytePage()); } else if (dataType == DataTypes.BYTE_ARRAY) {
@Override public void decodeAndFillVector(byte[] input, int offset, int length, ColumnVectorInfo vectorInfo, BitSet nullBits, boolean isLVEncoded, int pageSize, ReusableDataBuffer reusableDataBuffer) throws MemoryException, IOException { Compressor compressor = CompressorFactory.getInstance().getCompressor(meta.getCompressorName()); byte[] unCompressData; if (null != reusableDataBuffer && compressor.supportReusableBuffer()) { int uncompressedLength = compressor.unCompressedLength(input, offset, length); unCompressData = reusableDataBuffer.getDataBuffer(uncompressedLength); compressor.rawUncompress(input, offset, length, unCompressData); } else { unCompressData = compressor.unCompressByte(input, offset, length); } converter.decodeAndFillVector(unCompressData, vectorInfo, nullBits, meta.getStoreDataType(), pageSize); }
@Override public byte[] compress(Compressor compressor) throws MemoryException, IOException { if (UnsafeMemoryManager.isOffHeap() && compressor.supportUnsafe()) { // use raw compression and copy to byte[] int inputSize = totalLength; long compressedMaxSize = compressor.maxCompressedLength(inputSize); MemoryBlock compressed = UnsafeMemoryManager.allocateMemoryWithRetry(taskId, compressedMaxSize); long outSize = compressor.rawCompress(baseOffset, inputSize, compressed.getBaseOffset()); assert outSize < Integer.MAX_VALUE; byte[] output = new byte[(int) outSize]; CarbonUnsafe.getUnsafe() .copyMemory(compressed.getBaseObject(), compressed.getBaseOffset(), output, CarbonUnsafe.BYTE_ARRAY_OFFSET, outSize); UnsafeMemoryManager.INSTANCE.freeMemory(taskId, compressed); return output; } else { return super.compress(compressor); } }
/** * Convert schema to binary */ public static byte[] convertSchemaToBinary(List<ColumnSchema> columnSchemas) throws IOException { ByteArrayOutputStream stream = new ByteArrayOutputStream(); DataOutput dataOutput = new DataOutputStream(stream); dataOutput.writeShort(columnSchemas.size()); for (ColumnSchema columnSchema : columnSchemas) { if (columnSchema.getColumnReferenceId() == null) { columnSchema.setColumnReferenceId(columnSchema.getColumnUniqueId()); } columnSchema.write(dataOutput); } byte[] byteArray = stream.toByteArray(); // Compress to reduce the size of schema return CompressorFactory.NativeSupportedCompressor.SNAPPY.getCompressor().compressByte( byteArray); }
List<ByteBuffer> encoderMetas = localDictionaryChunk.getDictionary_meta().getEncoder_meta(); ColumnPageDecoder decoder = DefaultEncodingFactory.getInstance().createDecoder( encodings, encoderMetas, compressor.getName()); ColumnPage decode = decoder.decode(localDictionaryChunk.getDictionary_data(), 0, localDictionaryChunk.getDictionary_data().length); BitSet usedDictionary = BitSet.valueOf(compressor.unCompressByte( localDictionaryChunk.getDictionary_values())); int length = usedDictionary.length();
int uncompressedSize = compressor.unCompressedLength(rawData.array(), copySourcePoint, dimensionColumnChunk.data_page_length); if (null != reusableDataBuffer) { .rawUncompress(rawData.array(), copySourcePoint, dimensionColumnChunk.data_page_length, dataPage); copySourcePoint += dimensionColumnChunk.data_page_length;
public void readBlockletData(BlockletHeader header) throws IOException { ensureCapacity(header.getBlocklet_length()); offset = 0; int len = readIntFromStream(); byte[] b = new byte[len]; if (!readBytesFromStream(b, 0, len)) { throw new EOFException("Failed to read blocklet data"); } compressor.rawUncompress(b, buffer); }
@Override void encodeIndexStorage(ColumnPage inputPage) { BlockIndexerStorage<byte[][]> indexStorage = new BlockIndexerStorageForShort(inputPage.getByteArrayPage(), false, false, false); byte[] flattened = ByteUtil.flatten(indexStorage.getDataPage()); Compressor compressor = CompressorFactory.getInstance().getCompressor( inputPage.getColumnCompressorName()); byte[] compressed = compressor.compressByte(flattened); super.indexStorage = indexStorage; super.compressedDataPage = compressed; }
ByteBuffer rawData = dimensionRawColumnChunk.getRawData(); int uncompressedSize = compressor .unCompressedLength(rawData.array(), (int) dimensionRawColumnChunk.getOffSet(), dimensionRawColumnChunk.getLength()); if (null != reusableDataBuffer) { dataPage = new byte[uncompressedSize]; compressor.rawUncompress(rawData.array(), (int) dimensionRawColumnChunk.getOffSet(), dimensionRawColumnChunk.getLength(), dataPage);
@Override public void decodeAndFillVector(byte[] input, int offset, int length, ColumnVectorInfo vectorInfo, BitSet nullBits, boolean isLVEncoded, int pageSize, ReusableDataBuffer reusableDataBuffer) throws MemoryException, IOException { Compressor compressor = CompressorFactory.getInstance().getCompressor(meta.getCompressorName()); byte[] unCompressData; if (null != reusableDataBuffer && compressor.supportReusableBuffer()) { int uncompressedLength = compressor.unCompressedLength(input, offset, length); unCompressData = reusableDataBuffer.getDataBuffer(uncompressedLength); compressor.rawUncompress(input, offset, length, unCompressData); } else { unCompressData = compressor.unCompressByte(input, offset, length); } converter.decodeAndFillVector(unCompressData, vectorInfo, nullBits, meta.getStoreDataType(), pageSize); }
CarbonCommonConstants.COMPRESSOR); if (null == columnCompressor) { columnCompressor = CompressorFactory.getInstance().getCompressor().getName();
void apppendBlocklet(DataOutputStream outputStream) throws IOException { outputStream.write(CarbonStreamOutputFormat.CARBON_SYNC_MARKER); BlockletInfo blockletInfo = new BlockletInfo(); blockletInfo.setNum_rows(getRowIndex() + 1); BlockletHeader blockletHeader = new BlockletHeader(); blockletHeader.setBlocklet_length(getCount()); blockletHeader.setMutation(MutationType.INSERT); blockletHeader.setBlocklet_info(blockletInfo); // add blocklet level min/max blockletMinMaxIndex = generateBlockletMinMax(); if (blockletInfo.getNum_rows() > 1) { BlockletIndex blockletIndex = new BlockletIndex(); blockletIndex.setMin_max_index(CarbonMetadataUtil.convertMinMaxIndex(blockletMinMaxIndex)); blockletHeader.setBlocklet_index(blockletIndex); } byte[] headerBytes = CarbonUtil.getByteArray(blockletHeader); outputStream.writeInt(headerBytes.length); outputStream.write(headerBytes); byte[] compressed = compressor.compressByte(getBytes(), getCount()); outputStream.writeInt(compressed.length); outputStream.write(compressed); }
@Override public void decodeAndFillVector(byte[] input, int offset, int length, ColumnVectorInfo vectorInfo, BitSet nullBits, boolean isLVEncoded, int pageSize, ReusableDataBuffer reusableDataBuffer) throws MemoryException, IOException { Compressor compressor = CompressorFactory.getInstance().getCompressor(meta.getCompressorName()); byte[] unCompressData; if (null != reusableDataBuffer && compressor.supportReusableBuffer()) { int uncompressedLength = compressor.unCompressedLength(input, offset, length); unCompressData = reusableDataBuffer.getDataBuffer(uncompressedLength); compressor.rawUncompress(input, offset, length, unCompressData); } else { unCompressData = compressor.unCompressByte(input, offset, length); } if (DataTypes.isDecimal(meta.getSchemaDataType())) { TableSpec.ColumnSpec columnSpec = meta.getColumnSpec(); DecimalConverterFactory.DecimalConverter decimalConverter = DecimalConverterFactory.INSTANCE .getDecimalConverter(columnSpec.getPrecision(), columnSpec.getScale()); vectorInfo.decimalConverter = decimalConverter; } converter.decodeAndFillVector(unCompressData, vectorInfo, nullBits, meta.getStoreDataType(), pageSize); }
CarbonCommonConstants.COMPRESSOR); if (null == compressorName) { compressorName = CompressorFactory.getInstance().getCompressor().getName();
CompressorFactory.getInstance().getCompressor(columnCompressor).compressByte( usedDictionaryValues.toByteArray()));
@Override public void decodeAndFillVector(byte[] input, int offset, int length, ColumnVectorInfo vectorInfo, BitSet nullBits, boolean isLVEncoded, int pageSize, ReusableDataBuffer reusableDataBuffer) throws MemoryException, IOException { Compressor compressor = CompressorFactory.getInstance().getCompressor(meta.getCompressorName()); byte[] unCompressData; if (null != reusableDataBuffer && compressor.supportReusableBuffer()) { int uncompressedLength = compressor.unCompressedLength(input, offset, length); unCompressData = reusableDataBuffer.getDataBuffer(uncompressedLength); compressor.rawUncompress(input, offset, length, unCompressData); } else { unCompressData = compressor.unCompressByte(input, offset, length); } if (DataTypes.isDecimal(meta.getSchemaDataType())) { TableSpec.ColumnSpec columnSpec = meta.getColumnSpec(); DecimalConverterFactory.DecimalConverter decimalConverter = DecimalConverterFactory.INSTANCE .getDecimalConverter(columnSpec.getPrecision(), columnSpec.getScale()); vectorInfo.decimalConverter = decimalConverter; } converter.decodeAndFillVector(unCompressData, vectorInfo, nullBits, meta.getStoreDataType(), pageSize); }
CarbonCommonConstants.COMPRESSOR); if (columnCompressor == null) { columnCompressor = CompressorFactory.getInstance().getCompressor().getName();
private void fillNullBitSet(ColumnPage inputPage, DataChunk2 dataChunk) { PresenceMeta presenceMeta = new PresenceMeta(); presenceMeta.setPresent_bit_streamIsSet(true); Compressor compressor = CompressorFactory.getInstance().getCompressor( inputPage.getColumnCompressorName()); presenceMeta.setPresent_bit_stream( compressor.compressByte(inputPage.getNullBits().toByteArray())); dataChunk.setPresence(presenceMeta); }
int uncompressedLength; byte[] unCompressData; if (null != reusableDataBuffer && compressor.supportReusableBuffer()) { uncompressedLength = compressor.unCompressedLength(input, offset, length); unCompressData = reusableDataBuffer.getDataBuffer(uncompressedLength); compressor.rawUncompress(input, offset, length, unCompressData); } else { unCompressData = compressor.unCompressByte(input, offset, length); uncompressedLength = unCompressData.length;