private static void buildSingleColumnRangeCondAsTimeMillis(StringBuilder builder, TblColRef partitionColumn, long startInclusive, long endExclusive) { String partitionColumnName = partitionColumn.getIdentity(); builder.append(partitionColumnName + " >= " + startInclusive); builder.append(" AND "); builder.append(partitionColumnName + " < " + endExclusive); }
public void putDictResPath(TblColRef col, String dictResPath) { String dictKey = col.getIdentity(); getDictionaries().put(dictKey, dictResPath); }
public void init(DataModelDesc model) { if (StringUtils.isEmpty(partitionDateColumn)) return; partitionDateColumnRef = model.findColumn(partitionDateColumn); partitionDateColumn = partitionDateColumnRef.getIdentity(); if (StringUtils.isBlank(partitionTimeColumn) == false) { partitionTimeColumnRef = model.findColumn(partitionTimeColumn); partitionTimeColumn = partitionTimeColumnRef.getIdentity(); } partitionConditionBuilder = (IPartitionConditionBuilder) ClassUtil.newInstance(partitionConditionBuilderClz); }
private void normalizeColumnNames(String[] names) { if (names == null) return; for (int i = 0; i < names.length; i++) { TblColRef col = cubeDesc.getModel().findColumn(names[i]); names[i] = col.getIdentity(); } // check no dup Set<String> set = new HashSet<>(Arrays.asList(names)); if (set.size() < names.length) throw new IllegalStateException( "Columns in aggrgroup must not contain duplication: " + Arrays.asList(names)); }
private static void buildSingleColumnRangeCondAsYmdInt(StringBuilder builder, TblColRef partitionColumn, long startInclusive, long endExclusive, String partitionColumnDateFormat) { String partitionColumnName = partitionColumn.getIdentity(); builder.append(partitionColumnName + " >= " + DateFormat.formatToDateStr(startInclusive, partitionColumnDateFormat)); builder.append(" AND "); builder.append( partitionColumnName + " < " + DateFormat.formatToDateStr(endExclusive, partitionColumnDateFormat)); }
private Map<String, List<String>> getInfluencedCubesByDims(List<String> dims, List<CubeInstance> cubes) { Map<String, List<String>> influencedCubes = new HashMap<>(); for (CubeInstance cubeInstance : cubes) { CubeDesc cubeDesc = cubeInstance.getDescriptor(); for (TblColRef tblColRef : cubeDesc.listDimensionColumnsIncludingDerived()) { if (dims.contains(tblColRef.getIdentity())) continue; if (influencedCubes.get(tblColRef.getIdentity()) == null) { List<String> candidates = new ArrayList<>(); candidates.add(cubeInstance.getName()); influencedCubes.put(tblColRef.getIdentity(), candidates); } else influencedCubes.get(tblColRef.getIdentity()).add(cubeInstance.getName()); } } return influencedCubes; }
private Map<String, List<String>> getInfluencedCubesByMeasures(List<String> allCols, List<CubeInstance> cubes) { Map<String, List<String>> influencedCubes = new HashMap<>(); for (CubeInstance cubeInstance : cubes) { CubeDesc cubeDesc = cubeInstance.getDescriptor(); Set<TblColRef> tblColRefs = Sets.newHashSet(cubeDesc.listAllColumns()); tblColRefs.removeAll(cubeDesc.listDimensionColumnsIncludingDerived()); for (TblColRef tblColRef : tblColRefs) { if (allCols.contains(tblColRef.getIdentity())) continue; if (influencedCubes.get(tblColRef.getIdentity()) == null) { List<String> candidates = new ArrayList<>(); candidates.add(cubeInstance.getName()); influencedCubes.put(tblColRef.getIdentity(), candidates); } else influencedCubes.get(tblColRef.getIdentity()).add(cubeInstance.getName()); } } return influencedCubes; }
private void outputDimRangeInfo() throws IOException, InterruptedException { if (col != null && minValue != null) { // output written to baseDir/colName/colName.dci-r-00000 (etc) String dimRangeFileName = col.getIdentity() + "/" + col.getName() + DIMENSION_COL_INFO_FILE_POSTFIX; mos.write(BatchConstants.CFG_OUTPUT_PARTITION, NullWritable.get(), new Text(minValue.getBytes(StandardCharsets.UTF_8)), dimRangeFileName); mos.write(BatchConstants.CFG_OUTPUT_PARTITION, NullWritable.get(), new Text(maxValue.getBytes(StandardCharsets.UTF_8)), dimRangeFileName); logger.info("write dimension range info for col : " + col.getName() + " minValue:" + minValue + " maxValue:" + maxValue); } }
private void outputDict(TblColRef col, Dictionary<String> dict) throws IOException, InterruptedException { // output written to baseDir/colName/colName.rldict-r-00000 (etc) String dictFileName = col.getIdentity() + "/" + col.getName() + DICT_FILE_POSTFIX; try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); DataOutputStream outputStream = new DataOutputStream(baos);) { outputStream.writeUTF(dict.getClass().getName()); dict.write(outputStream); mos.write(BatchConstants.CFG_OUTPUT_DICT, NullWritable.get(), new ArrayPrimitiveWritable(baos.toByteArray()), dictFileName); } }
private void outputDimRangeInfo(List<Tuple2<String, Tuple3<Writable, Writable, String>>> result) { if (col != null && minValue != null) { // output written to baseDir/colName/colName.dci-r-00000 (etc) String dimRangeFileName = col.getIdentity() + "/" + col.getName() + DIMENSION_COL_INFO_FILE_POSTFIX; result.add(new Tuple2<String, Tuple3<Writable, Writable, String>>(BatchConstants.CFG_OUTPUT_PARTITION, new Tuple3<Writable, Writable, String>(NullWritable.get(), new Text(minValue.getBytes(StandardCharsets.UTF_8)), dimRangeFileName))); result.add(new Tuple2<String, Tuple3<Writable, Writable, String>>(BatchConstants.CFG_OUTPUT_PARTITION, new Tuple3<Writable, Writable, String>(NullWritable.get(), new Text(maxValue.getBytes(StandardCharsets.UTF_8)), dimRangeFileName))); logger.info("write dimension range info for col : {} minValue:{} maxValue:{}", col.getName(), minValue, maxValue); } }
private void outputDict(TblColRef col, Dictionary<String> dict) throws IOException, InterruptedException { // output written to baseDir/colName/colName.rldict-r-00000 (etc) String dictFileName = col.getIdentity() + "/" + col.getName() + DICT_FILE_POSTFIX; try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); DataOutputStream outputStream = new DataOutputStream(baos);) { outputStream.writeUTF(dict.getClass().getName()); dict.write(outputStream); mos.write(BatchConstants.CFG_OUTPUT_DICT, NullWritable.get(), new ArrayPrimitiveWritable(baos.toByteArray()), dictFileName); } mos.close(); } }
private void initDimensionsAndMetrics() { for (ModelDimensionDesc dim : dimensions) { dim.init(this); } for (int i = 0; i < metrics.length; i++) { metrics[i] = findColumn(metrics[i]).getIdentity(); } }
@Override public IReadableTable getDistinctValuesFor(TblColRef col) { return new SortedColumnDFSFile(factColumnsInputPath + "/" + col.getIdentity(), col.getType()); } }, new DictionaryProvider() {
public String getExpressionInSourceDB() { if (!column.isComputedColumn()) { return getIdentity(); } else { return column.getComputedColumnExpr(); } }
private void outputDict(TblColRef col, Dictionary<String> dict, List<Tuple2<String, Tuple3<Writable, Writable, String>>> result) throws IOException { // output written to baseDir/colName/colName.rldict-r-00000 (etc) String dictFileName = col.getIdentity() + "/" + col.getName() + DICT_FILE_POSTFIX; try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); DataOutputStream outputStream = new DataOutputStream(baos)) { outputStream.writeUTF(dict.getClass().getName()); dict.write(outputStream); result.add(new Tuple2<String, Tuple3<Writable, Writable, String>>(BatchConstants.CFG_OUTPUT_DICT, new Tuple3<Writable, Writable, String>(NullWritable.get(), new ArrayPrimitiveWritable(baos.toByteArray()), dictFileName))); } }
public String getDictResPath(TblColRef col) { String r; String dictKey = col.getIdentity(); r = getDictionaries().get(dictKey); // try Kylin v1.x dict key as well if (r == null) { String v1DictKey = col.getTable() + "/" + col.getName(); r = getDictionaries().get(v1DictKey); } return r; }
private boolean validate() { // ensure no dup between dimensions/metrics for (ModelDimensionDesc dim : dimensions) { String table = dim.getTable(); for (String c : dim.getColumns()) { TblColRef dcol = findColumn(table, c); metrics = ArrayUtils.removeElement(metrics, dcol.getIdentity()); } } Set<TblColRef> mcols = new HashSet<>(); for (String m : metrics) { mcols.add(findColumn(m)); } // validate PK/FK are in dimensions boolean pkfkDimAmended = false; for (Chain chain : joinsTree.getTableChains().values()) { pkfkDimAmended = validatePkFkDim(chain.join, mcols) || pkfkDimAmended; } return pkfkDimAmended; }
public void init(DataModelDesc model) { expression = expression.toUpperCase(Locale.ROOT); if (expression.equals(PercentileMeasureType.FUNC_PERCENTILE)) { expression = PercentileMeasureType.FUNC_PERCENTILE_APPROX; // for backward compatibility } returnDataType = DataType.getType(returnType); for (ParameterDesc p = parameter; p != null; p = p.getNextParameter()) { if (p.isColumnType()) { TblColRef colRef = model.findColumn(p.getValue()); p.setValue(colRef.getIdentity()); p.setColRef(colRef); } } }
private void saveDictionaryInfo(CubeSegment cubeSeg, TblColRef col, DictionaryInfo dictInfo) throws IOException { if (dictInfo == null) return; // work on copy instead of cached objects CubeInstance cubeCopy = cubeSeg.getCubeInstance().latestCopyForWrite(); // get a latest copy CubeSegment segCopy = cubeCopy.getSegmentById(cubeSeg.getUuid()); Dictionary<?> dict = dictInfo.getDictionaryObject(); segCopy.putDictResPath(col, dictInfo.getResourcePath()); segCopy.getRowkeyStats().add(new Object[] { col.getIdentity(), dict.getSize(), dict.getSizeOfId() }); CubeUpdate update = new CubeUpdate(cubeCopy); update.setToUpdateSegs(segCopy); updateCube(update); }
@Test public void testNoDupColInDimAndMeasure() { DataModelDesc model = DataModelManager.getInstance(getTestConfig()).getDataModelDesc("test_kylin_inner_join_model_desc"); String[] metrics = model.getMetrics(); TblColRef col = model.findColumn("edw.test_cal_dt.cal_dt"); assertTrue(metrics.length == 2); assertTrue(ArrayUtils.contains(metrics, col.getIdentity()) == false); }