@Override public String toString() { return String.format("[converter: %s, data type: %s", converter, columnPage.getDataType()); }
private static ColumnPageEncoder createCodecForDimension(ColumnPage inputPage) { TableSpec.ColumnSpec columnSpec = inputPage.getColumnSpec(); if (columnSpec.getColumnType() == ColumnType.COMPLEX_PRIMITIVE) { if (inputPage.getDataType() == DataTypes.BYTE_ARRAY || inputPage.getDataType() == DataTypes.STRING) { // use legacy encoder return null; } else if ((inputPage.getDataType() == DataTypes.BYTE) || (inputPage.getDataType() == DataTypes.SHORT) || (inputPage.getDataType() == DataTypes.INT) || ( inputPage.getDataType() == DataTypes.LONG)) { return selectCodecByAlgorithmForIntegral(inputPage.getStatistics(), true, columnSpec) .createEncoder(null); } else if ((inputPage.getDataType() == DataTypes.FLOAT) || (inputPage.getDataType() == DataTypes.DOUBLE)) { return selectCodecByAlgorithmForFloating(inputPage.getStatistics(), true, columnSpec) .createEncoder(null); } } // use legacy encoder return null; }
@Override protected ColumnPageEncoderMeta getEncoderMeta(ColumnPage inputPage) { return new ColumnPageEncoderMeta(inputPage.getColumnSpec(), inputPage.getDataType(), inputPage.getStatistics(), inputPage.getColumnCompressorName()); } };
@Override protected byte[] encodeData(ColumnPage input) throws MemoryException, IOException { validateDataType(input.getDataType()); this.dataType = input.getDataType(); if (dataType == DataTypes.BYTE) { byte[] bytePage = input.getBytePage(); for (int i = 0; i < bytePage.length; i++) { putValue(bytePage[i]); } } else if (dataType == DataTypes.SHORT) { short[] shortPage = input.getShortPage(); for (int i = 0; i < shortPage.length; i++) { putValue(shortPage[i]); } } else if (dataType == DataTypes.INT) { int[] intPage = input.getIntPage(); for (int i = 0; i < intPage.length; i++) { putValue(intPage[i]); } } else if (dataType == DataTypes.LONG) { long[] longPage = input.getLongPage(); for (int i = 0; i < longPage.length; i++) { putValue(longPage[i]); } } else { throw new UnsupportedOperationException(input.getDataType() + " does not support RLE encoding"); } return collectResult(); }
private ColumnPageEncoder createEncoderForDimension(TableSpec.DimensionSpec columnSpec, ColumnPage inputPage) { switch (columnSpec.getColumnType()) { case GLOBAL_DICTIONARY: case DIRECT_DICTIONARY: case PLAIN_VALUE: return new DirectCompressCodec(inputPage.getDataType()).createEncoder(null); case COMPLEX: return new ComplexDimensionIndexCodec(false, false).createEncoder(null); default: throw new RuntimeException("unsupported dimension type: " + columnSpec.getColumnType()); } }
@Override protected ColumnPageEncoderMeta getEncoderMeta(ColumnPage inputPage) { return new RLEEncoderMeta(inputPage.getColumnSpec(), inputPage.getDataType(), inputPage.getPageSize(), inputPage.getStatistics(), inputPage.getColumnCompressorName()); }
private BlockletMinMaxIndex buildMinMaxIndex(ColumnPage inputPage, List<Encoding> encoders) { BlockletMinMaxIndex index = new BlockletMinMaxIndex(); ByteBuffer max; ByteBuffer min; if (CarbonUtil.isEncodedWithMeta(encoders) && inputPage.getColumnSpec().getColumnType() == ColumnType.PLAIN_VALUE) { max = ByteBuffer.wrap(DataTypeUtil .getMinMaxBytesBasedOnDataTypeForNoDictionaryColumn(inputPage.getStatistics().getMax(), inputPage.getDataType())); min = ByteBuffer.wrap(DataTypeUtil .getMinMaxBytesBasedOnDataTypeForNoDictionaryColumn(inputPage.getStatistics().getMin(), inputPage.getDataType())); } else { byte[] bytes = CarbonUtil.getValueAsBytes(inputPage.getDataType(), inputPage.getStatistics().getMax()); max = ByteBuffer.wrap(bytes); min = ByteBuffer.wrap( CarbonUtil.getValueAsBytes(inputPage.getDataType(), inputPage.getStatistics().getMin())); } index.addToMax_values(max); index.addToMin_values(min); index.addToMin_max_presence(inputPage.getStatistics().writeMinMax()); return index; }
LOGGER.debug( "Encoder result ---> Source data type: " + noDictDimensionPages[noDictIndex] .getDataType().getName() + " Destination data type: " + targetDataType .getName() + " for the column: " + noDictDimensionPages[noDictIndex] .getColumnSpec().getFieldName() + " having encoding type: "
private ColumnPageEncoder createEncoderForMeasureOrNoDictionaryPrimitive(ColumnPage columnPage, TableSpec.ColumnSpec columnSpec) { SimpleStatsResult stats = columnPage.getStatistics(); DataType dataType = stats.getDataType(); if (dataType == DataTypes.BOOLEAN) { return new DirectCompressCodec(columnPage.getDataType()).createEncoder(null); } else if (dataType == DataTypes.BYTE || dataType == DataTypes.SHORT || dataType == DataTypes.INT || dataType == DataTypes.LONG || dataType == DataTypes.TIMESTAMP) { return selectCodecByAlgorithmForIntegral(stats, false, columnSpec).createEncoder(null); } else if (DataTypes.isDecimal(dataType)) { return createEncoderForDecimalDataTypeMeasure(columnPage, columnSpec); } else if (dataType == DataTypes.FLOAT || dataType == DataTypes.DOUBLE) { return selectCodecByAlgorithmForFloating(stats, false, columnSpec).createEncoder(null); } else if (dataType == DataTypes.BYTE_ARRAY) { return new DirectCompressCodec(columnPage.getDataType()).createEncoder(null); } else { throw new RuntimeException("unsupported data type: " + stats.getDataType()); } }
/** * method to add complex column data * @param depth * complexColumnIndex of column * @param dataList * dataList */ public void putComplexData(int depth, List<byte[]> dataList) { assert (depth <= this.complexColumnIndex); int positionNumber = currentRowIdList[depth]; for (byte[] value : dataList) { if (columnPages[depth].getDataType() != DataTypes.BYTE_ARRAY) { if ((value == null) || (value.length == 0)) { columnPages[depth].putNull(positionNumber); columnPages[depth].statsCollector.updateNull(positionNumber); columnPages[depth].nullBitSet.set(positionNumber); } else { columnPages[depth].putData(positionNumber, DataTypeUtil .getDataBasedOnDataTypeForNoDictionaryColumn(value, columnPages[depth].getColumnSpec().getSchemaDataType(), false)); } } else { columnPages[depth].putData(positionNumber, value); } positionNumber++; } currentRowIdList[depth] = positionNumber; }
private ColumnPageEncoder createEncoderForDecimalDataTypeMeasure(ColumnPage columnPage, TableSpec.ColumnSpec columnSpec) { DecimalConverterFactory.DecimalConverterType decimalConverterType = ((DecimalColumnPage) columnPage).getDecimalConverter().getDecimalConverterType(); switch (decimalConverterType) { case DECIMAL_INT: case DECIMAL_LONG: return selectCodecByAlgorithmForDecimal(columnPage.getStatistics(), decimalConverterType, columnSpec) .createEncoder(null); default: return new DirectCompressCodec(columnPage.getDataType()).createEncoder(null); } }
@Override public long getLong(int rowId) { DataType dataType = columnPage.getDataType(); if (dataType == DataTypes.BOOLEAN || dataType == DataTypes.BYTE) { return converter.decodeLong(columnPage.getByte(rowId)); } else if (dataType == DataTypes.SHORT) { return converter.decodeLong(columnPage.getShort(rowId)); } else if (dataType == DataTypes.SHORT_INT) { return converter.decodeLong(columnPage.getShortInt(rowId)); } else if (dataType == DataTypes.INT) { return converter.decodeLong(columnPage.getInt(rowId)); } else if (dataType == DataTypes.LONG) { return columnPage.getLong(rowId); } else { throw new RuntimeException("internal error: " + this.toString()); } }
/** * Get the new column page based on the sorted data * * @param input * @return * @throws MemoryException */ public ColumnPage getSortedColumnPageIfRequired(ColumnPage input) throws MemoryException { if (null != indexStorage) { Object[] dataPage = indexStorage.getDataPage(); ColumnPageEncoderMeta columnPageEncoderMeta = new ColumnPageEncoderMeta(input.getColumnSpec(), input.getDataType(), input.getColumnPageEncoderMeta().getCompressorName()); ColumnPage columnPage = ColumnPage.newPage(columnPageEncoderMeta, input.getPageSize()); putDataToPage(columnPage, dataPage); return columnPage; } else { return input; } }
/** * Create a new column page with input data type and page size. */ protected LocalDictColumnPage(ColumnPage actualDataColumnPage, ColumnPage encodedColumnpage, LocalDictionaryGenerator localDictionaryGenerator, boolean isComplexTypePrimitive, boolean isDecoderBasedFallBackEnabled) { super(actualDataColumnPage.getColumnPageEncoderMeta(), actualDataColumnPage.getPageSize()); // if threshold is not reached then create page level dictionary // for encoding with local dictionary if (!localDictionaryGenerator.isThresholdReached()) { pageLevelDictionary = new PageLevelDictionary(localDictionaryGenerator, actualDataColumnPage.getColumnSpec().getFieldName(), actualDataColumnPage.getDataType(), isComplexTypePrimitive, actualDataColumnPage.getColumnCompressorName()); this.encodedDataColumnPage = encodedColumnpage; this.keyGenerator = KeyGeneratorFactory .getKeyGenerator(new int[] { CarbonCommonConstants.LOCAL_DICTIONARY_MAX + 1 }); this.dummyKey = new int[1]; } else { // else free the encoded column page memory as its of no use encodedColumnpage.freeMemory(); } this.isDecoderBasedFallBackEnabled = isDecoderBasedFallBackEnabled; this.actualDataColumnPage = actualDataColumnPage; }
@Override public double getDouble(int rowId) { DataType dataType = columnPage.getDataType(); if (dataType == DataTypes.BOOLEAN || dataType == DataTypes.BYTE) { return converter.decodeDouble(columnPage.getByte(rowId)); } else if (dataType == DataTypes.SHORT) { return converter.decodeDouble(columnPage.getShort(rowId)); } else if (dataType == DataTypes.SHORT_INT) { return converter.decodeDouble(columnPage.getShortInt(rowId)); } else if (dataType == DataTypes.INT) { return converter.decodeDouble(columnPage.getInt(rowId)); } else if (dataType == DataTypes.LONG) { return converter.decodeDouble(columnPage.getLong(rowId)); } else if (dataType == DataTypes.FLOAT) { return converter.decodeDouble(columnPage.getFloat(rowId)); } else if (dataType == DataTypes.DOUBLE) { return columnPage.getDouble(rowId); } else { throw new RuntimeException("internal error: " + this.toString()); } }
public byte[] encodeAndCompressPage(ColumnPage input, ColumnPageValueConverter converter, Compressor compressor) throws MemoryException, IOException { encodedPage = ColumnPage.newPage( new ColumnPageEncoderMeta(input.getColumnPageEncoderMeta().getColumnSpec(), targetDataType, input.getColumnPageEncoderMeta().getCompressorName()), input.getPageSize()); if (isInvertedIndex) { indexStorage = new BlockIndexerStorageForNoDictionary(getPageBasedOnDataType(input), input.getDataType(), isInvertedIndex); } ColumnPage columnPage = getSortedColumnPageIfRequired(input); columnPage.convertValue(converter); byte[] result = encodedPage.compress(compressor); return result; }
@Override public BigDecimal getDecimal(int rowId) { DecimalConverterFactory.DecimalConverter decimalConverter = ((DecimalColumnPage) columnPage).getDecimalConverter(); DataType dataType = columnPage.getDataType(); if (dataType == DataTypes.BYTE) { return decimalConverter.getDecimal(converter.decodeLong(columnPage.getByte(rowId))); } else if (dataType == DataTypes.SHORT) { return decimalConverter.getDecimal(converter.decodeLong(columnPage.getShort(rowId))); } else if (dataType == DataTypes.SHORT_INT) { return decimalConverter.getDecimal(converter.decodeLong(columnPage.getShortInt(rowId))); } else if (dataType == DataTypes.INT) { return decimalConverter.getDecimal(converter.decodeLong(columnPage.getInt(rowId))); } else if (dataType == DataTypes.LONG || DataTypes.isDecimal(dataType)) { return columnPage.getDecimal(rowId); } else { throw new RuntimeException("internal error: " + this.toString()); } }
if (DataTypes.isDecimal(noDictDimensionPages[i].getDataType()) && model .isCompactionFlow() && value != null) { value = DataTypeUtil.getDataTypeConverter().convertFromDecimalToBigDecimal(value); if (DataTypes.isDecimal(measurePages[i].getDataType()) && model.isCompactionFlow() && value != null) {
private byte[] getChunkDataInBytes(int rowId) { ColumnType columnType = columnPage.getColumnSpec().getColumnType(); DataType srcDataType = columnPage.getColumnSpec().getSchemaDataType(); DataType targetDataType = columnPage.getDataType(); if (null != localDictionary) { return localDictionary