@Test public void testWithSlr() throws Exception { //has shard CubeInstance cube = CubeManager.getInstance(getTestConfig()).getCube("TEST_KYLIN_CUBE_WITH_SLR_READY"); RowKeySplitter rowKeySplitter = new RowKeySplitter(cube.getFirstSegment(), 11, 20); // base cuboid rowkey byte[] input = { 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 0, -104, -106, -128, 11, 54, -105, 55, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 13, 71, 114, 65, 66, 73, 78, 9, 9, 9, 9, 9, 9, 9, 9, 0, 10, 0 }; rowKeySplitter.split(input); assertEquals(11, rowKeySplitter.getBufferSize()); }
@Override public void doMap(Text key, Text value, Context context) throws IOException, InterruptedException { long cuboidID = rowKeySplitter.split(key.getBytes()); Cuboid cuboid = Cuboid.findForMandatory(cubeDesc, cuboidID); int fullKeySize = buildKey(cuboid, rowKeySplitter.getSplitBuffers()); outputKey.set(newKeyBuf.array(), 0, fullKeySize); String baseOutputPath = PathNameCuboidOld; if (cuboidID == baseCuboid) { baseOutputPath = PathNameCuboidBase; } mos.write(outputKey, value, generateFileName(baseOutputPath)); }
public long decode(byte[] bytes) throws IOException { this.values.clear(); long cuboidId = rowKeySplitter.split(bytes); initCuboid(cuboidId); ByteArray[] splits = rowKeySplitter.getSplitBuffers(); int offset = rowKeySplitter.getBodySplitOffset(); // skip shard and cuboid id part for (int i = 0; i < this.cuboid.getColumns().size(); i++) { TblColRef col = this.cuboid.getColumns().get(i); collectValue(col, splits[offset].array(), splits[offset].offset(), splits[offset].length()); offset++; } return cuboidId; }
@Override public Iterator<Tuple2<ByteArray, Object[]>> call(final Tuple2<ByteArray, Object[]> tuple2) throws Exception { if (initialized == false) { synchronized (SparkCubingByLayer.class) { if (initialized == false) { init(); initialized = true; } } } byte[] key = tuple2._1().array(); long cuboidId = rowKeySplitter.parseCuboid(key); final List<Long> myChildren = cubeSegment.getCuboidScheduler().getSpanningCuboid(cuboidId); // if still empty or null if (myChildren == null || myChildren.size() == 0) { return EMTPY_ITERATOR.iterator(); } rowKeySplitter.split(key); final Cuboid parentCuboid = Cuboid.findForMandatory(cubeDesc, cuboidId); List<Tuple2<ByteArray, Object[]>> tuples = new ArrayList(myChildren.size()); for (Long child : myChildren) { Cuboid childCuboid = Cuboid.findForMandatory(cubeDesc, child); ByteArray result = ndCuboidBuilder.buildKey2(parentCuboid, childCuboid, rowKeySplitter.getSplitBuffers()); tuples.add(new Tuple2<>(result, tuple2._2())); } return tuples.iterator(); } }
public NDCuboidBuilder(CubeSegment cubeSegment, RowKeyEncoderProvider rowKeyEncoderProvider) { this.cubeSegment = cubeSegment; this.rowKeyEncoderProvider = rowKeyEncoderProvider; this.rowKeySplitter = new RowKeySplitter(cubeSegment); }
@Override public void doMap(Text key, Text value, Context context) throws IOException, InterruptedException { long cuboidID = rowKeySplitter.split(key.getBytes()); if (cuboidID != baseCuboid && !recommendCuboids.contains(cuboidID)) { return; } String baseOutputPath = PathNameCuboidOld; if (cuboidID == baseCuboid) { baseOutputPath = PathNameCuboidBase; } mos.write(key, value, generateFileName(baseOutputPath)); }
private int buildKey(Cuboid cuboid, ByteArray[] splitBuffers) { RowKeyEncoder rowkeyEncoder = rowKeyEncoderProvider.getRowkeyEncoder(cuboid); int startIdx = rowKeySplitter.getBodySplitOffset(); // skip shard and cuboidId int endIdx = startIdx + Long.bitCount(cuboid.getId()); int offset = 0; for (int i = startIdx; i < endIdx; i++) { System.arraycopy(splitBuffers[i].array(), splitBuffers[i].offset(), newKeyBodyBuf, offset, splitBuffers[i].length()); offset += splitBuffers[i].length(); } int fullKeySize = rowkeyEncoder.getBytesLength(); while (newKeyBuf.array().length < fullKeySize) { newKeyBuf = new ByteArray(newKeyBuf.length() * 2); } newKeyBuf.setLength(fullKeySize); rowkeyEncoder.encode(new ByteArray(newKeyBodyBuf, 0, offset), newKeyBuf); return fullKeySize; }
public RowKeyDecoder(CubeSegment cubeSegment) { this.cubeDesc = cubeSegment.getCubeDesc(); this.rowKeySplitter = new RowKeySplitter(cubeSegment); this.colIO = new RowKeyColumnIO(cubeSegment.getDimensionEncodingMap()); this.values = new ArrayList<String>(); }
@Override public Iterator<Tuple2<ByteArray, Object[]>> call(final Tuple2<ByteArray, Object[]> tuple2) throws Exception { if (initialized == false) { synchronized (SparkCubingByLayer.class) { if (initialized == false) { init(); initialized = true; } } } byte[] key = tuple2._1().array(); long cuboidId = rowKeySplitter.parseCuboid(key); final List<Long> myChildren = cubeSegment.getCuboidScheduler().getSpanningCuboid(cuboidId); // if still empty or null if (myChildren == null || myChildren.size() == 0) { return EMTPY_ITERATOR.iterator(); } rowKeySplitter.split(key); final Cuboid parentCuboid = Cuboid.findForMandatory(cubeDesc, cuboidId); List<Tuple2<ByteArray, Object[]>> tuples = new ArrayList(myChildren.size()); for (Long child : myChildren) { Cuboid childCuboid = Cuboid.findForMandatory(cubeDesc, child); ByteArray result = ndCuboidBuilder.buildKey2(parentCuboid, childCuboid, rowKeySplitter.getSplitBuffers()); tuples.add(new Tuple2<>(result, tuple2._2())); } return tuples.iterator(); } }
@Override public void doMap(Text key, Text value, Context context) throws IOException, InterruptedException { long cuboidID = rowKeySplitter.split(key.getBytes()); if (cuboidID != baseCuboid && !recommendCuboids.contains(cuboidID)) { return; } String baseOutputPath = PathNameCuboidOld; if (cuboidID == baseCuboid) { baseOutputPath = PathNameCuboidBase; } mos.write(key, value, generateFileName(baseOutputPath)); }
private void buildKeyInternal(Cuboid parentCuboid, Cuboid childCuboid, ByteArray[] splitBuffers, ByteArray newKeyBodyBuf) { RowKeyEncoder rowkeyEncoder = rowKeyEncoderProvider.getRowkeyEncoder(childCuboid); // rowkey columns long mask = Long.highestOneBit(parentCuboid.getId()); long parentCuboidId = parentCuboid.getId(); long childCuboidId = childCuboid.getId(); long parentCuboidIdActualLength = (long)Long.SIZE - Long.numberOfLeadingZeros(parentCuboid.getId()); int index = rowKeySplitter.getBodySplitOffset(); // skip shard and cuboidId int offset = RowConstants.ROWKEY_SHARDID_LEN + RowConstants.ROWKEY_CUBOIDID_LEN; // skip shard and cuboidId for (int i = 0; i < parentCuboidIdActualLength; i++) { if ((mask & parentCuboidId) > 0) {// if the this bit position equals // 1 if ((mask & childCuboidId) > 0) {// if the child cuboid has this // column System.arraycopy(splitBuffers[index].array(), splitBuffers[index].offset(), newKeyBodyBuf.array(), offset, splitBuffers[index].length()); offset += splitBuffers[index].length(); } index++; } mask = mask >> 1; } rowkeyEncoder.fillHeader(newKeyBodyBuf.array()); }
@Override public void doMap(Text key, Text value, Context context) throws IOException, InterruptedException { long cuboidId = rowKeySplitter.split(key.getBytes()); Cuboid parentCuboid = Cuboid.findForMandatory(cubeDesc, cuboidId); for (Long child : myChildren) { Cuboid childCuboid = Cuboid.findForMandatory(cubeDesc, child); result = ndCuboidBuilder.buildKey(parentCuboid, childCuboid, rowKeySplitter.getSplitBuffers()); outputKey.set(result.getSecond().array(), 0, result.getFirst()); context.write(outputKey, value);
@Test public void testWithoutSlr() throws Exception { //no shard CubeInstance cube = CubeManager.getInstance(getTestConfig()).getCube("TEST_KYLIN_CUBE_WITHOUT_SLR_READY"); RowKeySplitter rowKeySplitter = new RowKeySplitter(cube.getFirstSegment(), 11, 20); // base cuboid rowkey byte[] input = { 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 11, 55, -13, 13, 22, 34, 121, 70, 80, 45, 71, 84, 67, 9, 9, 9, 9, 9, 9, 0, 10, 5 }; rowKeySplitter.split(input); assertEquals(10, rowKeySplitter.getBufferSize()); } }
@Override protected void doSetup(Context context) throws IOException { super.bindCurrentConfiguration(context.getConfiguration()); cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME); segmentID = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_ID); String cuboidModeName = context.getConfiguration().get(BatchConstants.CFG_CUBOID_MODE); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(); CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName); cubeDesc = cube.getDescriptor(); cubeSegment = cube.getSegmentById(segmentID); ndCuboidBuilder = new NDCuboidBuilder(cubeSegment); // initialize CubiodScheduler cuboidScheduler = CuboidSchedulerUtil.getCuboidSchedulerByMode(cubeSegment, cuboidModeName); rowKeySplitter = new RowKeySplitter(cubeSegment); }
private Text processKey(Text key) throws IOException { long cuboidID = rowKeySplitter.split(key.getBytes()); Cuboid cuboid = Cuboid.findForMandatory(cubeDesc, cuboidID); RowKeyEncoder rowkeyEncoder = rowKeyEncoderProvider.getRowkeyEncoder(cuboid); ByteArray[] splittedByteses = rowKeySplitter.getSplitBuffers(); int bufOffset = 0; int bodySplitOffset = rowKeySplitter.getBodySplitOffset();
private int buildKey(Cuboid cuboid, ByteArray[] splitBuffers) { RowKeyEncoder rowkeyEncoder = rowKeyEncoderProvider.getRowkeyEncoder(cuboid); int startIdx = rowKeySplitter.getBodySplitOffset(); // skip shard and cuboidId int endIdx = startIdx + Long.bitCount(cuboid.getId()); int offset = 0; for (int i = startIdx; i < endIdx; i++) { System.arraycopy(splitBuffers[i].array(), splitBuffers[i].offset(), newKeyBodyBuf, offset, splitBuffers[i].length()); offset += splitBuffers[i].length(); } int fullKeySize = rowkeyEncoder.getBytesLength(); while (newKeyBuf.array().length < fullKeySize) { newKeyBuf = new ByteArray(newKeyBuf.length() * 2); } newKeyBuf.setLength(fullKeySize); rowkeyEncoder.encode(new ByteArray(newKeyBodyBuf, 0, offset), newKeyBuf); return fullKeySize; }
public long decode(byte[] bytes) throws IOException { this.values.clear(); long cuboidId = rowKeySplitter.split(bytes, bytes.length); initCuboid(cuboidId); SplittedBytes[] splits = rowKeySplitter.getSplitBuffers(); int offset = 1; // skip cuboid id part for (int i = 0; i < this.cuboid.getColumns().size(); i++) { TblColRef col = this.cuboid.getColumns().get(i); collectValue(col, splits[offset].value, splits[offset].length); offset++; } return cuboidId; }
@Test public void testWithoutSlr() throws Exception { CubeInstance cube = CubeManager.getInstance(getTestConfig()).getCube("TEST_KYLIN_CUBE_WITHOUT_SLR_READY"); RowKeySplitter rowKeySplitter = new RowKeySplitter(cube.getFirstSegment(), 10, 20); // base cuboid rowkey byte[] input = { 0, 0, 0, 0, 0, 0, 0, -1, 11, 55, -13, 13, 22, 34, 121, 70, 80, 45, 71, 84, 67, 9, 9, 9, 9, 9, 9, 0, 10, 5 }; rowKeySplitter.split(input, input.length); assertEquals(9, rowKeySplitter.getBufferSize()); } }
@Override protected void doSetup(Context context) throws IOException { super.bindCurrentConfiguration(context.getConfiguration()); mos = new MultipleOutputs(context); String cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME); String segmentID = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_ID); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(); CubeManager cubeManager = CubeManager.getInstance(config); CubeInstance cube = cubeManager.getCube(cubeName); CubeSegment optSegment = cube.getSegmentById(segmentID); CubeSegment originalSegment = cube.getOriginalSegmentToOptimize(optSegment); rowKeySplitter = new RowKeySplitter(originalSegment); baseCuboid = cube.getCuboidScheduler().getBaseCuboidId(); recommendCuboids = cube.getCuboidsRecommend(); Preconditions.checkNotNull(recommendCuboids, "The recommend cuboid map could not be null"); }
public long decode(byte[] bytes) throws IOException { this.values.clear(); long cuboidId = rowKeySplitter.split(bytes); initCuboid(cuboidId); ByteArray[] splits = rowKeySplitter.getSplitBuffers(); int offset = rowKeySplitter.getBodySplitOffset(); // skip shard and cuboid id part for (int i = 0; i < this.cuboid.getColumns().size(); i++) { TblColRef col = this.cuboid.getColumns().get(i); collectValue(col, splits[offset].array(), splits[offset].offset(), splits[offset].length()); offset++; } return cuboidId; }