public NDCuboidBuilder(CubeSegment cubeSegment, RowKeyEncoderProvider rowKeyEncoderProvider) { this.cubeSegment = cubeSegment; this.rowKeyEncoderProvider = rowKeyEncoderProvider; this.rowKeySplitter = new RowKeySplitter(cubeSegment); }
private static boolean exceedCap(Dim[] dims, long cap) { return combCount(dims) > cap; }
public static <K, V> List<Map<K, V>> calculate(Map<K, Set<V>> fuzzyValues, long cap) { Dim<K, V>[] dims = toDims(fuzzyValues); // If a query has many IN clause and each IN clause has many values, then it will easily generate // thousands of fuzzy keys. When there are lots of fuzzy keys, the scan performance is bottle necked // on it. So simply choose to abandon all fuzzy keys in this case. if (exceedCap(dims, cap)) { return Lists.newArrayList(); } else { return combination(dims); } }
@Test public void testWithSlr() throws Exception { //has shard CubeInstance cube = CubeManager.getInstance(getTestConfig()).getCube("TEST_KYLIN_CUBE_WITH_SLR_READY"); RowKeySplitter rowKeySplitter = new RowKeySplitter(cube.getFirstSegment(), 11, 20); // base cuboid rowkey byte[] input = { 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 0, -104, -106, -128, 11, 54, -105, 55, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 13, 71, 114, 65, 66, 73, 78, 9, 9, 9, 9, 9, 9, 9, 9, 0, 10, 0 }; rowKeySplitter.split(input); assertEquals(11, rowKeySplitter.getBufferSize()); }
public long decode(byte[] bytes) throws IOException { this.values.clear(); long cuboidId = rowKeySplitter.split(bytes); initCuboid(cuboidId); ByteArray[] splits = rowKeySplitter.getSplitBuffers(); int offset = rowKeySplitter.getBodySplitOffset(); // skip shard and cuboid id part for (int i = 0; i < this.cuboid.getColumns().size(); i++) { TblColRef col = this.cuboid.getColumns().get(i); collectValue(col, splits[offset].array(), splits[offset].offset(), splits[offset].length()); offset++; } return cuboidId; }
@Override public void doMap(Text key, Text value, Context context) throws IOException, InterruptedException { long cuboidID = rowKeySplitter.split(key.getBytes()); Cuboid cuboid = Cuboid.findForMandatory(cubeDesc, cuboidID); int fullKeySize = buildKey(cuboid, rowKeySplitter.getSplitBuffers()); outputKey.set(newKeyBuf.array(), 0, fullKeySize); String baseOutputPath = PathNameCuboidOld; if (cuboidID == baseCuboid) { baseOutputPath = PathNameCuboidBase; } mos.write(outputKey, value, generateFileName(baseOutputPath)); }
@Override public void doMap(Text key, Text value, Context context) throws IOException, InterruptedException { long cuboidID = rowKeySplitter.split(key.getBytes()); if (cuboidID != baseCuboid && !recommendCuboids.contains(cuboidID)) { return; } String baseOutputPath = PathNameCuboidOld; if (cuboidID == baseCuboid) { baseOutputPath = PathNameCuboidBase; } mos.write(key, value, generateFileName(baseOutputPath)); }
@Test public void testLegacyCubeSeg() { // legacy cube segments does not have DimensionRangeInfo, but with TSRange can do some pruning CubeInstance cube = CubeManager.getInstance(getTestConfig()) .getCube("test_kylin_cube_without_slr_left_join_ready_2_segments"); TblColRef col = cube.getModel().findColumn("TEST_KYLIN_FACT.CAL_DT"); CubeSegment seg = cube.getSegments(SegmentStatusEnum.READY).get(0); TSRange tsRange = seg.getTSRange(); long start = tsRange.start.v; try (SetAndUnsetSystemProp sns = new SetAndUnsetSystemProp("kylin.query.skip-empty-segments", "false")) { { TupleFilter f = compare(col, FilterOperatorEnum.LTE, start); SegmentPruner segmentPruner = new SegmentPruner(f); Assert.assertTrue(segmentPruner.check(seg)); } { TupleFilter f = compare(col, FilterOperatorEnum.LT, start); SegmentPruner segmentPruner = new SegmentPruner(f); Assert.assertFalse(segmentPruner.check(seg)); } } } }
@Test public void testEmptySegment() { CubeSegment seg = cube.getFirstSegment(); TblColRef col = cube.getModel().findColumn("CUSTOMER.C_NATION"); // a normal hit TupleFilter f = compare(col, FilterOperatorEnum.EQ, "CHINA"); SegmentPruner segmentPruner = new SegmentPruner(f); Assert.assertTrue(segmentPruner.check(seg)); // make the segment empty, it should be pruned seg.setInputRecords(0); Assert.assertFalse(segmentPruner.check(seg)); }
public List<CubeSegment> listSegmentsForQuery(CubeInstance cube) { List<CubeSegment> r = new ArrayList<>(); for (CubeSegment seg : cube.getSegments(SegmentStatusEnum.READY)) { if (check(seg)) r.add(seg); } return r; }
@Before public void setUp() throws Exception { this.createTestMetadata(); }
@After public void after() throws Exception { this.cleanupTestMetadata(); }
private int buildKey(Cuboid cuboid, ByteArray[] splitBuffers) { RowKeyEncoder rowkeyEncoder = rowKeyEncoderProvider.getRowkeyEncoder(cuboid); int startIdx = rowKeySplitter.getBodySplitOffset(); // skip shard and cuboidId int endIdx = startIdx + Long.bitCount(cuboid.getId()); int offset = 0; for (int i = startIdx; i < endIdx; i++) { System.arraycopy(splitBuffers[i].array(), splitBuffers[i].offset(), newKeyBodyBuf, offset, splitBuffers[i].length()); offset += splitBuffers[i].length(); } int fullKeySize = rowkeyEncoder.getBytesLength(); while (newKeyBuf.array().length < fullKeySize) { newKeyBuf = new ByteArray(newKeyBuf.length() * 2); } newKeyBuf.setLength(fullKeySize); rowkeyEncoder.encode(new ByteArray(newKeyBodyBuf, 0, offset), newKeyBuf); return fullKeySize; }
private static <K, V> Dim<K, V>[] toDims(Map<K, Set<V>> fuzzyValues) { Dim[] dims = new Dim[fuzzyValues.size()]; int i = 0; for (Entry<K, Set<V>> entry : fuzzyValues.entrySet()) { dims[i] = new Dim<K, V>(); dims[i].col = entry.getKey(); dims[i].values = entry.getValue(); if (dims[i].values == null) dims[i].values = Collections.emptySet(); i++; } return dims; }
@After public void after() { this.cleanupTestMetadata(); }
private DimensionRangeInfo tryDeduceRangeFromPartitionCol(CubeSegment seg, TblColRef col) { DataModelDesc model = seg.getModel(); PartitionDesc part = model.getPartitionDesc(); if (!part.isPartitioned()) return null; if (!col.equals(part.getPartitionDateColumnRef())) return null; // deduce the dim range from TSRange TSRange tsRange = seg.getTSRange(); if (tsRange.start.isMin || tsRange.end.isMax) return null; // DimensionRangeInfo cannot express infinite String min = tsRangeToStr(tsRange.start.v, part); String max = tsRangeToStr(tsRange.end.v - 1, part); // note the -1, end side is exclusive return new DimensionRangeInfo(min, max); }
@Before public void setUp() { this.createTestMetadata(); cube = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()).getCube("ssb_cube_with_dimention_range"); }
@Test public void testWithoutSlr() throws Exception { //no shard CubeInstance cube = CubeManager.getInstance(getTestConfig()).getCube("TEST_KYLIN_CUBE_WITHOUT_SLR_READY"); RowKeySplitter rowKeySplitter = new RowKeySplitter(cube.getFirstSegment(), 11, 20); // base cuboid rowkey byte[] input = { 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 11, 55, -13, 13, 22, 34, 121, 70, 80, 45, 71, 84, 67, 9, 9, 9, 9, 9, 9, 0, 10, 5 }; rowKeySplitter.split(input); assertEquals(10, rowKeySplitter.getBufferSize()); } }
public RowKeyDecoder(CubeSegment cubeSegment) { this.cubeDesc = cubeSegment.getCubeDesc(); this.rowKeySplitter = new RowKeySplitter(cubeSegment); this.colIO = new RowKeyColumnIO(cubeSegment.getDimensionEncodingMap()); this.values = new ArrayList<String>(); }
public void init() { KylinConfig kConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(conf, metaUrl); try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig .setAndUnsetThreadLocalConfig(kConfig)) { CubeInstance cubeInstance = CubeManager.getInstance(kConfig).getCube(cubeName); this.cubeSegment = cubeInstance.getSegmentById(segmentId); this.cubeDesc = cubeInstance.getDescriptor(); this.ndCuboidBuilder = new NDCuboidBuilder(cubeSegment, new RowKeyEncoderProvider(cubeSegment)); this.rowKeySplitter = new RowKeySplitter(cubeSegment); } }