public boolean partitionColumnIsTimeMillis() { if (partitionDateColumnRef == null) return false; DataType type = partitionDateColumnRef.getType(); return type.isBigInt() && !DateFormat.isDatePattern(partitionDateFormat); }
@Override public IReadableTable getDistinctValuesFor(TblColRef col) { return new SortedColumnDFSFile(factColumnsInputPath + "/" + col.getIdentity(), col.getType()); } }, new DictionaryProvider() {
private void init() { dynGtDataTypes = Lists.newArrayList(); dynGtColBlocks = Lists.newArrayList(); dynDim2gt = Maps.newHashMap(); dynMetrics2gt = Maps.newHashMap(); int gtColIdx = super.getColumnCount(); BitSet rtColBlock = new BitSet(); // dynamic dimensions for (TblColRef rtDim : dynDims) { dynDim2gt.put(rtDim, gtColIdx); dynGtDataTypes.add(rtDim.getType()); rtColBlock.set(gtColIdx); gtColIdx++; } dynamicDims = new ImmutableBitSet(rtColBlock); // dynamic metrics for (DynamicFunctionDesc rtFunc : dynFuncs) { dynMetrics2gt.put(rtFunc, gtColIdx); dynGtDataTypes.add(rtFunc.getReturnDataType()); rtColBlock.set(gtColIdx); gtColIdx++; } dynGtColBlocks.add(new ImmutableBitSet(rtColBlock)); }
private boolean needDictionaryColumn(FunctionDesc functionDesc) { DataType dataType = functionDesc.getParameter().getColRefs().get(0).getType(); if (dataType.isIntegerFamily() && !dataType.isBigInt()) { return false; } return true; }
public boolean partitionColumnIsYmdInt() { if (partitionDateColumnRef == null) return false; DataType type = partitionDateColumnRef.getType(); return (type.isInt() || type.isBigInt()) && DateFormat.isDatePattern(partitionDateFormat); }
@SuppressWarnings("unchecked") @Override public TupleFilter onSerialize(TupleFilter filter) { if (filter instanceof CompareTupleFilter) { CompareTupleFilter cfilter = (CompareTupleFilter) filter; List<? extends TupleFilter> children = cfilter.getChildren(); if (children == null || children.size() < 1) { throw new IllegalArgumentException("Illegal compare filter: " + cfilter); } TblColRef col = cfilter.getColumn(); if (col == null || !col.getType().isDateTimeFamily()) { return cfilter; } for (TupleFilter child : filter.getChildren()) { dateCompareTupleChildren.put(child, col.getType()); } } if (filter instanceof ConstantTupleFilter && dateCompareTupleChildren.containsKey(filter)) { ConstantTupleFilter constantTupleFilter = (ConstantTupleFilter) filter; Set<String> newValues = Sets.newHashSet(); DataType columnType = dateCompareTupleChildren.get(filter); for (String value : (Collection<String>) constantTupleFilter.getValues()) { newValues.add(formatTime(value, columnType)); } return new ConstantTupleFilter(newValues); } return filter; }
@Override protected void doSetup(Context context) throws IOException { tmpBuf = ByteBuffer.allocate(4096); Configuration conf = context.getConfiguration(); bindCurrentConfiguration(conf); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(); CubeInstance cube = CubeManager.getInstance(config).getCube(conf.get(BatchConstants.CFG_CUBE_NAME)); List<TblColRef> uhcColumns = cube.getDescriptor().getAllUHCColumns(); FileSplit fileSplit = (FileSplit) context.getInputSplit(); String colName = fileSplit.getPath().getParent().getName(); for (int i = 0; i < uhcColumns.size(); i++) { if (uhcColumns.get(i).getIdentity().equalsIgnoreCase(colName)) { index = i; break; } } type = uhcColumns.get(index).getType(); //for debug logger.info("column name: " + colName); logger.info("index: " + index); logger.info("type: " + type); }
DataType dataType = allCols.get(colIndex).getType(); writeFieldValue(context, dataType, colIndex, rangeInfo.getMin()); writeFieldValue(context, dataType, colIndex, rangeInfo.getMax());
private static void findMinMax(Set<Array<String>> satisfyingHostRecords, TblColRef[] hostCols, String[] min, String[] max) { DataTypeOrder[] orders = new DataTypeOrder[hostCols.length]; for (int i = 0; i < hostCols.length; i++) { orders[i] = hostCols[i].getType().getOrder(); } for (Array<String> rec : satisfyingHostRecords) { String[] row = rec.data; for (int i = 0; i < row.length; i++) { min[i] = orders[i].min(min[i], row[i]); max[i] = orders[i].max(max[i], row[i]); } } }
public static Map<TblColRef, Dictionary<String>> buildDictionary(final CubeInstance cubeInstance, Iterable<List<String>> recordList) throws IOException { final List<TblColRef> columnsNeedToBuildDictionary = cubeInstance.getDescriptor() .listDimensionColumnsExcludingDerived(true); final HashMap<Integer, TblColRef> tblColRefMap = Maps.newHashMap(); int index = 0; for (TblColRef column : columnsNeedToBuildDictionary) { tblColRefMap.put(index++, column); } HashMap<TblColRef, Dictionary<String>> result = Maps.newHashMap(); HashMultimap<TblColRef, String> valueMap = HashMultimap.create(); for (List<String> row : recordList) { for (int i = 0; i < row.size(); i++) { String cell = row.get(i); if (tblColRefMap.containsKey(i)) { valueMap.put(tblColRefMap.get(i), cell); } } } for (TblColRef tblColRef : valueMap.keySet()) { Set<String> values = valueMap.get(tblColRef); Dictionary<String> dict = DictionaryGenerator.buildDictionary(tblColRef.getType(), new IterableDictionaryValueEnumerator(values)); result.put(tblColRef, dict); } return result; }
private static DimensionEncoding[] getDimensionEncodings(FunctionDesc function, List<TblColRef> literalCols, Map<TblColRef, Dictionary<String>> dictionaryMap) { final DimensionEncoding[] dimensionEncodings = new DimensionEncoding[literalCols.size()]; for (int i = 0; i < literalCols.size(); i++) { TblColRef colRef = literalCols.get(i); Pair<String, String> topNEncoding = TopNMeasureType.getEncoding(function, colRef); String encoding = topNEncoding.getFirst(); String encodingVersionStr = topNEncoding.getSecond(); if (StringUtils.isEmpty(encoding) || DictionaryDimEnc.ENCODING_NAME.equals(encoding)) { dimensionEncodings[i] = new DictionaryDimEnc(dictionaryMap.get(colRef)); } else { int encodingVersion = 1; if (!StringUtils.isEmpty(encodingVersionStr)) { try { encodingVersion = Integer.parseInt(encodingVersionStr); } catch (NumberFormatException e) { throw new RuntimeException(TopNMeasureType.CONFIG_ENCODING_VERSION_PREFIX + colRef.getName() + " has to be an integer"); } } Object[] encodingConf = DimensionEncoding.parseEncodingConf(encoding); String encodingName = (String) encodingConf[0]; String[] encodingArgs = (String[]) encodingConf[1]; encodingArgs = DateDimEnc.replaceEncodingArgs(encoding, encodingArgs, encodingName, literalCols.get(i).getType()); dimensionEncodings[i] = DimensionEncodingFactory.create(encodingName, encodingArgs, encodingVersion); } } return dimensionEncodings; }
DataTypeOrder order = model.findColumn(colId).getType().getOrder(); String newMin = order.min(r1.getMin(), r2.getMin()); String newMax = order.max(r1.getMax(), r2.getMax());
public void init(int index, CubeDesc cubeDesc) { bitIndex = index; colRef = cubeDesc.getModel().findColumn(column); column = colRef.getIdentity(); Preconditions.checkArgument(colRef != null, "Cannot find rowkey column %s in cube %s", column, cubeDesc); Preconditions.checkState(StringUtils.isNotEmpty(this.encoding)); Object[] encodingConf = DimensionEncoding.parseEncodingConf(this.encoding); encodingName = (String) encodingConf[0]; encodingArgs = (String[]) encodingConf[1]; if (!DimensionEncodingFactory.isValidEncoding(this.encodingName)) throw new IllegalArgumentException("Not supported row key col encoding: '" + this.encoding + "'"); // convert date/time dictionary on date/time column to DimensionEncoding implicitly // however date/time dictionary on varchar column is still required DataType type = colRef.getType(); if (DictionaryDimEnc.ENCODING_NAME.equals(encodingName)) { if (type.isDate()) { encoding = encodingName = DateDimEnc.ENCODING_NAME; } if (type.isTimeFamily()) { encoding = encodingName = TimeDimEnc.ENCODING_NAME; } } encodingArgs = DateDimEnc.replaceEncodingArgs(encoding, encodingArgs, encodingName, type); if (encodingName.startsWith(FixedLenDimEnc.ENCODING_NAME) && (type.isIntegerFamily() || type.isNumberFamily())) { logger.warn(colRef + " type is " + type + " and cannot apply fixed_length encoding"); } }
private void bindVariable(TupleFilter filter, DataContext dataContext) { if (filter == null) { return; } for (TupleFilter childFilter : filter.getChildren()) { bindVariable(childFilter, dataContext); } if (filter instanceof CompareTupleFilter && dataContext != null) { CompareTupleFilter compFilter = (CompareTupleFilter) filter; for (Map.Entry<String, Object> entry : compFilter.getVariables().entrySet()) { String variable = entry.getKey(); Object value = dataContext.get(variable); if (value != null) { String str = value.toString(); if (compFilter.getColumn().getType().isDateTimeFamily()) str = String.valueOf(DateFormat.stringToMillis(str)); compFilter.clearPreviousVariableValues(variable); compFilter.bindVariable(variable, str); } } } } // ============================================================================
TblColRef col = comp.getColumn(); if (!col.getType().needCompare()) { continue;
public DataType getRewriteFieldType() { if (getMeasureType() instanceof BasicMeasureType) { if (isMax() || isMin()) { return parameter.getColRefs().get(0).getType(); } else if (isSum()) { return parameter.isColumnType() ? DataType.getType(returnType) : DataType.getType("bigint"); } else if (isCount()) { return DataType.getType("bigint"); } else { throw new IllegalArgumentException("unknown measure type " + getMeasureType()); } } else { return DataType.ANY; } }
private String tsRangeToStr(long ts, PartitionDesc part) { String value; DataType partitionColType = part.getPartitionDateColumnRef().getType(); if (partitionColType.isDate()) { value = DateFormat.formatToDateStr(ts); } else if (partitionColType.isTimeFamily()) { value = DateFormat.formatToTimeWithoutMilliStr(ts); } else if (partitionColType.isStringFamily() || partitionColType.isIntegerFamily()) {//integer like 20160101 String partitionDateFormat = part.getPartitionDateFormat(); if (StringUtils.isEmpty(partitionDateFormat)) { value = "" + ts; } else { value = DateFormat.formatToDateStr(ts, partitionDateFormat); } } else { throw new RuntimeException("Type " + partitionColType + " is not valid partition column type"); } return value; }
@Override protected void doSetup(Context context) throws IOException { super.bindCurrentConfiguration(context.getConfiguration()); Configuration conf = context.getConfiguration(); mos = new MultipleOutputs(context); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(); String cubeName = conf.get(BatchConstants.CFG_CUBE_NAME); CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName); CubeDesc cubeDesc = cube.getDescriptor(); List<TblColRef> uhcColumns = cubeDesc.getAllUHCColumns(); int taskId = context.getTaskAttemptID().getTaskID().getId(); col = uhcColumns.get(taskId); logger.info("column name: " + col.getIdentity()); if (cube.getDescriptor().getShardByColumns().contains(col)) { //for ShardByColumns builder = DictionaryGenerator.newDictionaryBuilder(col.getType()); builder.init(null, 0, null); } else { //for GlobalDictionaryColumns String hdfsDir = conf.get(BatchConstants.CFG_GLOBAL_DICT_BASE_DIR); DictionaryInfo dictionaryInfo = new DictionaryInfo(col.getColumnDesc(), col.getDatatype()); String builderClass = cubeDesc.getDictionaryBuilderClass(col); builder = (IDictionaryBuilder) ClassUtil.newInstance(builderClass); builder.init(dictionaryInfo, 0, hdfsDir); } }
continue; final DataType type = allCols.get(i).getType();
Dictionary<String> dict = DictionaryGenerator.buildDictionary(col.getType(), new IterableDictionaryValueEnumerator("1", "2", "3")); DictionaryInfo info4 = dictMgr.saveDictionary(col, MockupReadableTable.newNonExistTable("/a/path"), dict); assertTrue(info1 == info4); Dictionary<String> dict2 = DictionaryGenerator.buildDictionary(col.getType(), new IterableDictionaryValueEnumerator("1", "2", "3", "4")); DictionaryInfo info5 = dictMgr.saveDictionary(col, MockupReadableTable.newNonExistTable("/a/path"), dict2); assertTrue(info1 != info5);