for (int i = 0; i < carbonColumns.length; i++) { CarbonColumn carbonColumn = carbonColumns[i]; if (carbonColumn.isDimension()) { if (carbonColumn.hasEncoding(Encoding.DIRECT_DICTIONARY)) { DirectDictionaryGenerator generator = DirectDictionaryKeyGeneratorFactory .getDirectDictionaryGenerator(carbonColumn.getDataType()); fields[i] = new StructField(carbonColumn.getColName(), convertCarbonToSparkDataType(generator.getReturnType()), true, null); } else if (!carbonColumn.hasEncoding(Encoding.DICTIONARY)) { fields[i] = new StructField(carbonColumn.getColName(), convertCarbonToSparkDataType(carbonColumn.getDataType()), true, null); } else if (carbonColumn.isComplex()) { fields[i] = new StructField(carbonColumn.getColName(), convertCarbonToSparkDataType(carbonColumn.getDataType()), true, null); } else { fields[i] = new StructField(carbonColumn.getColName(), convertCarbonToSparkDataType( org.apache.carbondata.core.metadata.datatype.DataTypes.INT), true, null); } else if (carbonColumn.isMeasure()) { DataType dataType = carbonColumn.getDataType(); if (dataType == org.apache.carbondata.core.metadata.datatype.DataTypes.BOOLEAN || dataType == org.apache.carbondata.core.metadata.datatype.DataTypes.SHORT || dataType == org.apache.carbondata.core.metadata.datatype.DataTypes.INT || dataType == org.apache.carbondata.core.metadata.datatype.DataTypes.LONG) { fields[i] = new StructField(carbonColumn.getColName(), convertCarbonToSparkDataType(dataType), true, null); } else if (org.apache.carbondata.core.metadata.datatype.DataTypes.isDecimal(dataType)) { CarbonMeasure measure = (CarbonMeasure) carbonColumn;
Object convertedValue = this.name2Converters.get(carbonColumn.getColName()).convert( strFilterValue, badRecordLogHolder); if (carbonColumn.isMeasure()) { convertedValue = DataConvertUtil.getNullValueForMeasure(carbonColumn.getDataType(), carbonColumn.getColumnSchema().getScale()); if (carbonColumn.getDataType().equals(DataTypes.BOOLEAN)) { convertedValue = BooleanConvert.boolean2Byte((Boolean)convertedValue); internalFilterValue = CarbonUtil.getValueAsBytes(carbonColumn.getDataType(), convertedValue); } else if (carbonColumn.hasEncoding(Encoding.DIRECT_DICTIONARY) || carbonColumn.hasEncoding(Encoding.DICTIONARY)) { if (DataTypeUtil.isPrimitiveColumn(carbonColumn.getDataType())) { if (convertedValue == null) { convertedValue = DataConvertUtil.getNullValueForMeasure(carbonColumn.getDataType(), carbonColumn.getColumnSchema().getScale()); CarbonUtil.getValueAsBytes(carbonColumn.getDataType(), convertedValue); } else { internalFilterValue = (byte[]) convertedValue;
/** * compute the column ordinal as per data is stored * * @param segmentProperties * @param column * @return */ public static int getColumnOrdinal(SegmentProperties segmentProperties, CarbonColumn column) { if (column.isMeasure()) { // as measures are stored at the end after all dimensions and complex dimensions hence add // the last dimension ordinal to measure ordinal. Segment properties will store min max // length in one array on the order normal dimension, complex dimension and then measure return segmentProperties.getLastDimensionColOrdinal() + column.getOrdinal(); } else { return column.getOrdinal(); } }
public CarbonColumn(ColumnSchema columnSchema, int ordinal, int schemaOrdinal) { this.columnSchema = columnSchema; this.ordinal = ordinal; this.schemaOrdinal = schemaOrdinal; this.columnIdentifier = new ColumnIdentifier(getColumnId(), getColumnProperties(), getDataType()); }
public static boolean[] getNoDictionaryMapping(CarbonColumn[] carbonColumns) { List<Boolean> noDictionaryMapping = new ArrayList<Boolean>(); for (CarbonColumn column : carbonColumns) { // for complex type need to break the loop if (column.isComplex()) { break; } if (!column.hasEncoding(Encoding.DICTIONARY) && column.isDimension()) { noDictionaryMapping.add(true); } else if (column.isDimension()) { noDictionaryMapping.add(false); } } return ArrayUtils .toPrimitive(noDictionaryMapping.toArray(new Boolean[noDictionaryMapping.size()])); }
@Override protected byte[] convertNonDictionaryValue(int indexColIdx, Object value) { // no dictionary measure columns will be of original data, so convert it to bytes if (DataTypeUtil.isPrimitiveColumn(indexColumns.get(indexColIdx).getDataType())) { return CarbonUtil.getValueAsBytes(indexColumns.get(indexColIdx).getDataType(), value); } return (byte[]) value; }
directDictionaryGenerators = new DirectDictionaryGenerator[storageColumns.length]; for (int i = 0; i < storageColumns.length; i++) { if (storageColumns[i].hasEncoding(Encoding.DIRECT_DICTIONARY)) { directDictionaryGenerators[i] = DirectDictionaryKeyGeneratorFactory .getDirectDictionaryGenerator(storageColumns[i].getDataType()); measureDataTypes[i] = storageColumns[dimensionCount + i].getDataType(); filterMap = new int[storageColumns.length]; for (int i = 0; i < storageColumns.length; i++) { if (storageColumns[i].isDimension()) { if (isFiltlerDimensions[storageColumns[i].getOrdinal()]) { isRequired[i] = true; isFilterRequired[i] = true; filterMap[i] = storageColumns[i].getOrdinal(); if (isFiltlerMeasures[storageColumns[i].getOrdinal()]) { isRequired[i] = true; isFilterRequired[i] = true; filterMap[i] = carbonTable.getDimensionOrdinalMax() + storageColumns[i].getOrdinal(); for (int j = 0; j < projection.length; j++) { for (int i = 0; i < storageColumns.length; i++) { if (storageColumns[i].getColName().equals(projection[j].getColName())) { isRequired[i] = true; isProjectionRequired[i] = true;
if (dataField.getColumn().isDimension()) { if (dataField.getColumn().hasEncoding(Encoding.DIRECT_DICTIONARY) && !dataField.getColumn().isComplex()) { return new DirectDictionaryFieldConverterImpl(dataField, nullFormat, index, isEmptyBadRecord); } else if (dataField.getColumn().hasEncoding(Encoding.DICTIONARY) && !dataField.getColumn().isComplex()) { DictionaryColumnUniqueIdentifier identifier = null; if (null == dataField.getColumn().getColumnSchema().getParentColumnTableRelations() || dataField.getColumn().getColumnSchema().getParentColumnTableRelations().isEmpty()) { identifier = new DictionaryColumnUniqueIdentifier(absoluteTableIdentifier, dataField.getColumn().getColumnIdentifier(), dataField.getColumn().getDataType()); return new DictionaryFieldConverterImpl(dataField.getColumn(), absoluteTableIdentifier.getCarbonTableIdentifier().getTableId(), } else { ParentColumnTableRelation parentColumnTableRelation = dataField.getColumn().getColumnSchema().getParentColumnTableRelations().get(0); RelationIdentifier relationIdentifier = parentColumnTableRelation ColumnIdentifier parentColumnIdentifier = new ColumnIdentifier(parentColumnTableRelation.getColumnId(), null, dataField.getColumn().getDataType()); AbsoluteTableIdentifier parentAbsoluteTableIdentifier = AbsoluteTableIdentifier.from(parentTablePath, parentTableIdentifier); identifier = new DictionaryColumnUniqueIdentifier(parentAbsoluteTableIdentifier, parentColumnIdentifier, dataField.getColumn().getDataType());
@Override public Object transform(Object input) { return ((CarbonColumn) input).getColName(); } });
AbsoluteTableIdentifier absoluteTableIdentifier, DictionaryClient client, Boolean useOnePass, Map<Object, Integer> localCache, int index, String nullFormat, Boolean isEmptyBadRecords) { DataType dataType = carbonColumn.getDataType(); if (DataTypes.isArrayType(dataType) || DataTypes.isMapType(dataType)) { List<CarbonDimension> listOfChildDimensions = new ArrayDataType(carbonColumn.getColName(), parentName, carbonColumn.getColumnId(), carbonColumn.hasEncoding(Encoding.DICTIONARY)); for (CarbonDimension dimension : listOfChildDimensions) { arrayDataType.addChildren( createComplexType(dimension, carbonColumn.getColName(), absoluteTableIdentifier, client, useOnePass, localCache, index, nullFormat, isEmptyBadRecords)); new StructDataType(carbonColumn.getColName(), parentName, carbonColumn.getColumnId(), carbonColumn.hasEncoding(Encoding.DICTIONARY)); for (CarbonDimension dimension : dimensions) { structDataType.addChildren( createComplexType(dimension, carbonColumn.getColName(), absoluteTableIdentifier, client, useOnePass, localCache, index, nullFormat, isEmptyBadRecords)); return new PrimitiveDataType(carbonColumn, parentName, carbonColumn.getColumnId(), (CarbonDimension) carbonColumn, absoluteTableIdentifier, client, useOnePass, localCache, nullFormat, isEmptyBadRecords);
@Override public Object convert(Object value, BadRecordLogHolder logHolder) throws RuntimeException { String literalValue = (String) value; if (literalValue == null) { logHolder.setReason( CarbonDataProcessorUtil.prepareFailureReason(column.getColName(), column.getDataType())); return CarbonCommonConstants.DIRECT_DICT_VALUE_NULL; } else if (literalValue.equals(nullFormat)) { return CarbonCommonConstants.DIRECT_DICT_VALUE_NULL; } else { int key = directDictionaryGenerator.generateDirectSurrogateKey(literalValue); if (key == CarbonCommonConstants.DIRECT_DICT_VALUE_NULL) { if ((literalValue.length() > 0) || (literalValue.length() == 0 && isEmptyBadRecord)) { String message = logHolder.getColumnMessageMap().get(column.getColName()); if (null == message) { message = CarbonDataProcessorUtil.prepareFailureReason( column.getColName(), column.getDataType()); logHolder.getColumnMessageMap().put(column.getColName(), message); } logHolder.setReason(message); } } return key; } }
/** * This initialization is done inside executor task * for column dictionary involved in decoding. * * @param carbonColumns column list * @param carbonTable table identifier */ @Override public void initialize(CarbonColumn[] carbonColumns, CarbonTable carbonTable) throws IOException { this.carbonColumns = carbonColumns; dictionaries = new Dictionary[carbonColumns.length]; dataTypes = new DataType[carbonColumns.length]; for (int i = 0; i < carbonColumns.length; i++) { if (carbonColumns[i].hasEncoding(Encoding.DICTIONARY) && !carbonColumns[i] .hasEncoding(Encoding.DIRECT_DICTIONARY) && !carbonColumns[i].isComplex()) { CacheProvider cacheProvider = CacheProvider.getInstance(); Cache<DictionaryColumnUniqueIdentifier, Dictionary> forwardDictionaryCache = cacheProvider .createCache(CacheType.FORWARD_DICTIONARY); dataTypes[i] = carbonColumns[i].getDataType(); String dictionaryPath = carbonTable.getTableInfo().getFactTable().getTableProperties() .get(CarbonCommonConstants.DICTIONARY_PATH); dictionaries[i] = forwardDictionaryCache.get(new DictionaryColumnUniqueIdentifier( carbonTable.getAbsoluteTableIdentifier(), carbonColumns[i].getColumnIdentifier(), dataTypes[i], dictionaryPath)); } else { dataTypes[i] = carbonColumns[i].getDataType(); } } }
boolean isNull = CarbonCommonConstants.MEMBER_DEFAULT_VAL.equals(literalValue); if (literalValue == null || isNull) { String message = logHolder.getColumnMessageMap().get(dataField.getColumn().getColName()); if (null == message) { message = CarbonDataProcessorUtil.prepareFailureReason(dataField.getColumn().getColName(), dataField.getColumn().getDataType()); logHolder.getColumnMessageMap().put(dataField.getColumn().getColName(), message); if (dataField.getColumn().isDimension()) { logHolder.setReason(message); } else if (literalValue.length() == 0) { if (isEmptyBadRecord) { String message = logHolder.getColumnMessageMap().get(dataField.getColumn().getColName()); if (null == message) { message = CarbonDataProcessorUtil.prepareFailureReason(dataField.getColumn().getColName(), dataField.getColumn().getDataType()); logHolder.getColumnMessageMap().put(dataField.getColumn().getColName(), message); if (dataField.getColumn().isDimension()) { String dateFormat = null; if (dataField.getColumn().getDataType() == DataTypes.DATE) { dateFormat = dataField.getDateFormat(); } else if (dataField.getColumn().getDataType() == DataTypes.TIMESTAMP) { dateFormat = dataField.getTimestampFormat(); dataField.getColumn().getDataType(), dataField.getColumn().getColumnSchema().getScale(), dataField.getColumn().getColumnSchema().getPrecision(), true, dateFormat);
CarbonColumn carbonColumn = currentCondExpression.getColumnList().get(0).getCarbonColumn(); if (carbonColumn.hasEncoding(Encoding.IMPLICIT)) { return new ConditionalFilterResolverImpl(expression, isExpressionResolve, true, currentCondExpression.getColumnList().get(0).getCarbonColumn().isMeasure()); if (currentCondExpression.isSingleColumn() && ! column.getDataType().isComplexType()) { if (column.isMeasure()) { if (FilterUtil.checkIfExpressionContainsColumn(currentCondExpression.getLeft()) && FilterUtil.checkIfExpressionContainsColumn(currentCondExpression.getRight()) || ( currentCondExpression.getColumnList().get(0).getCarbonColumn().isMeasure()); .hasEncoding(Encoding.DICTIONARY) || currentCondExpression.getColumnList().get(0) .getCarbonColumn().hasEncoding(Encoding.DIRECT_DICTIONARY)) { if (FilterUtil.checkIfExpressionContainsColumn(currentCondExpression.getLeft()) && FilterUtil.checkIfExpressionContainsColumn(currentCondExpression.getRight()) || ( currentCondExpression.getColumnList().get(0).getCarbonColumn().isMeasure()); currentCondExpression = (BinaryConditionalExpression) expression; column = currentCondExpression.getColumnList().get(0).getCarbonColumn(); if (currentCondExpression.isSingleColumn() && ! column.getDataType().isComplexType()) { if (column.isMeasure()) { if (FilterUtil.checkIfExpressionContainsColumn(currentCondExpression.getLeft()) && FilterUtil.checkIfExpressionContainsColumn(currentCondExpression.getRight()) || ( .hasEncoding(Encoding.DICTIONARY) || currentCondExpression.getColumnList().get(0) .getCarbonColumn().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
public static GenericQueryType[] getComplexDimensions(CarbonTable carbontable, CarbonColumn[] carbonColumns, Cache<DictionaryColumnUniqueIdentifier, Dictionary> cache) throws IOException { GenericQueryType[] queryTypes = new GenericQueryType[carbonColumns.length]; for (int i = 0; i < carbonColumns.length; i++) { if (carbonColumns[i].isComplex()) { if (DataTypes.isArrayType(carbonColumns[i].getDataType())) { queryTypes[i] = new ArrayQueryType(carbonColumns[i].getColName(), carbonColumns[i].getColName(), i); } else if (DataTypes.isStructType(carbonColumns[i].getDataType())) { queryTypes[i] = new StructQueryType(carbonColumns[i].getColName(), carbonColumns[i].getColName(), i); } else { throw new UnsupportedOperationException( carbonColumns[i].getDataType().getName() + " is not supported"); } fillChildren(carbontable, queryTypes[i], (CarbonDimension) carbonColumns[i], i, cache); } } return queryTypes; }
try { GenericDataType complextType = dataFieldsWithComplexDataType.get(dataFields[i].getColumn().getOrdinal()); complextType.writeByteArray(data[orderOfData[i]], dataOutputStream, logHolder); dataOutputStream.close(); DataType dataType = dataFields[i].getColumn().getDataType(); if (dataType == DataTypes.DATE && data[orderOfData[i]] instanceof Long) { if (dateDictionaryGenerator == null) {
if (column.getDataType() == DataTypes.DATE) { dataField.setDateFormat(loadModel.getDateFormat()); column.setDateFormat(loadModel.getDateFormat()); } else if (column.getDataType() == DataTypes.TIMESTAMP) { dataField.setTimestampFormat(loadModel.getTimestampformat()); column.setTimestampFormat(loadModel.getTimestampformat()); if (column.isComplex()) { complexDataFields.add(dataField); List<CarbonDimension> childDimensions = for (CarbonColumn column : measures) { if (!(column.getColName().equals("default_dummy_measure"))) { dataFields.add(new DataField(column));
protected void addValue2BloomIndex(int indexColIdx, Object value) { byte[] indexValue; // convert measure to bytes // convert non-dict dimensions to simple bytes without length // convert internal-dict dimensions to simple bytes without any encode if (indexColumns.get(indexColIdx).isMeasure()) { // NULL value of all measures are already processed in `ColumnPage.getData` // or `RawBytesReadSupport.readRow` with actual data type // Carbon stores boolean as byte. Here we convert it for `getValueAsBytes` if (indexColumns.get(indexColIdx).getDataType().equals(DataTypes.BOOLEAN)) { value = BooleanConvert.boolean2Byte((Boolean)value); } indexValue = CarbonUtil.getValueAsBytes(indexColumns.get(indexColIdx).getDataType(), value); } else { if (indexColumns.get(indexColIdx).hasEncoding(Encoding.DICTIONARY) || indexColumns.get(indexColIdx).hasEncoding(Encoding.DIRECT_DICTIONARY)) { indexValue = convertDictionaryValue(indexColIdx, value); } else { indexValue = convertNonDictionaryValue(indexColIdx, value); } } if (indexValue.length == 0) { indexValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY; } indexBloomFilters.get(indexColIdx).add(new Key(indexValue)); }
else if (carbonColumns[i].hasEncoding(Encoding.DIRECT_DICTIONARY)) { if (DataType.TIMESTAMP == carbonColumns[i].getDataType()) { data[i] = new Timestamp((long) data[i] / 1000L); } else if (DataType.DATE == carbonColumns[i].getDataType()) { data[i] = new Date((long) data[i]);
/** * Match the columns for transactional and non transactional tables * @param isTransactionalTable * @param queryColumn * @param tableColumn * @return */ private static boolean isColumnMatches(boolean isTransactionalTable, CarbonColumn queryColumn, CarbonColumn tableColumn) { // If it is non transactional table just check the column names, no need to validate // column id as multiple sdk's output placed in a single folder doesn't have same // column ID but can have same column name if (tableColumn.getDataType().isComplexType() && !(tableColumn.getDataType().getId() == DataTypes.ARRAY_TYPE_ID)) { if (tableColumn.getColumnId().equalsIgnoreCase(queryColumn.getColumnId())) { return true; } else { return isColumnMatchesStruct(tableColumn, queryColumn); } } else { return (tableColumn.getColumnId().equalsIgnoreCase(queryColumn.getColumnId()) || ( !isTransactionalTable && tableColumn.getColName() .equalsIgnoreCase(queryColumn.getColName()))); } }