/** * translate GTRecord format fuzzy keys to hbase expected format * @return */ private List<Pair<byte[], byte[]>> translateFuzzyKeys(List<GTRecord> fuzzyKeys) { if (fuzzyKeys == null || fuzzyKeys.isEmpty()) { return Collections.emptyList(); } List<Pair<byte[], byte[]>> ret = Lists.newArrayList(); for (GTRecord gtRecordFuzzyKey : fuzzyKeys) { byte[] hbaseFuzzyKey = fuzzyKeyEncoder.createBuf(); byte[] hbaseFuzzyMask = fuzzyMaskEncoder.createBuf(); fuzzyKeyEncoder.encode(gtRecordFuzzyKey, gtRecordFuzzyKey.getInfo().getPrimaryKey(), hbaseFuzzyKey); fuzzyMaskEncoder.encode(gtRecordFuzzyKey, gtRecordFuzzyKey.getInfo().getPrimaryKey(), hbaseFuzzyMask); ret.add(Pair.newPair(hbaseFuzzyKey, hbaseFuzzyMask)); } return ret; }
/** * for each selected hbase column, it might contain values of multiple GT columns. * The mapping should be passed down to storage */ protected List<List<Integer>> getHBaseColumnsGTMapping(ImmutableBitSet selectedColBlocks) { List<List<Integer>> ret = Lists.newArrayList(); int colBlkIndex = 1; int metricOffset = fullGTInfo.getPrimaryKey().trueBitCount(); HBaseMappingDesc hbaseMapping = cubeSeg.getCubeDesc().getHbaseMapping(); for (HBaseColumnFamilyDesc familyDesc : hbaseMapping.getColumnFamily()) { for (HBaseColumnDesc hbaseColDesc : familyDesc.getColumns()) { if (selectedColBlocks.get(colBlkIndex)) { int[] metricIndexes = hbaseColDesc.getMeasureIndex(); Integer[] gtIndexes = new Integer[metricIndexes.length]; for (int i = 0; i < gtIndexes.length; i++) { gtIndexes[i] = metricIndexes[i] + metricOffset; } ret.add(Arrays.asList(gtIndexes)); } colBlkIndex++; } } Preconditions.checkState(selectedColBlocks.trueBitCount() == ret.size() + 1); return ret; }
@Override public void write(long cuboidId, GTRecord record) throws IOException { if (lastCuboidId == null || !lastCuboidId.equals(cuboidId)) { if (lastCuboidId != null) { logger.info("Cuboid " + lastCuboidId + " has " + cuboidRowCount + " rows"); cuboidRowCount = 0; } // output another cuboid initVariables(cuboidId); lastCuboidId = cuboidId; } cuboidRowCount++; rowKeyEncoder.encode(record, record.getInfo().getPrimaryKey(), keyBuf); //output measures valueBuf.clear(); try { record.exportColumns(measureColumns, valueBuf); } catch (BufferOverflowException boe) { valueBuf = ByteBuffer.allocate((int) (record.sizeOf(measureColumns) * 1.5)); record.exportColumns(measureColumns, valueBuf); } outputKey.set(keyBuf, 0, keyBuf.length); outputValue.set(valueBuf.array(), 0, valueBuf.position()); writeAsKeyValue(outputKey, outputValue); }
protected GTScanRange newScanRange(Collection<ColumnRange> andDimRanges) { GTRecord pkStart = new GTRecord(gtInfo); GTRecord pkEnd = new GTRecord(gtInfo); Map<Integer, Set<ByteArray>> fuzzyValues = Maps.newHashMap(); for (ColumnRange range : andDimRanges) { int col = range.column.getColumnDesc().getZeroBasedIndex(); if (!gtInfo.getPrimaryKey().get(col)) continue; pkStart.set(col, range.begin); pkEnd.set(col, range.end); if (range.valueSet != null && !range.valueSet.isEmpty()) { fuzzyValues.put(col, range.valueSet); } } List<GTRecord> fuzzyKeys = buildFuzzyKeys(fuzzyValues); return new GTScanRange(pkStart, pkEnd, fuzzyKeys); }
@Override public int compareTo(ResultMergeSlot o) { long cuboidComp = this.currentCuboidId - o.currentCuboidId; if (cuboidComp != 0) return cuboidComp < 0 ? -1 : 1; // note GTRecord.equals() don't work because the two GTRecord comes from different GridTable ImmutableBitSet pk = this.currentRecord.getInfo().getPrimaryKey(); for (int i = 0; i < pk.trueBitCount(); i++) { int c = pk.trueBitAt(i); int comp = this.currentRecord.get(c).compareTo(o.currentRecord.get(c)); if (comp != 0) return comp; } return 0; }
@Override public int compareTo(MergeSlot o) { long cuboidComp = this.currentCuboidId - o.currentCuboidId; if (cuboidComp != 0) return cuboidComp < 0 ? -1 : 1; // note GTRecord.equals() don't work because the two GTRecord comes from different GridTable ImmutableBitSet pk = this.currentRecord.getInfo().getPrimaryKey(); for (int i = 0; i < pk.trueBitCount(); i++) { int c = pk.trueBitAt(i); int comp = this.currentRecord.get(c).compareTo(o.currentRecord.get(c)); if (comp != 0) return comp; } return 0; }
@Override public void encode(GTRecord record, ImmutableBitSet keyColumns, byte[] buf) { ByteArray byteArray = new ByteArray(buf, getHeaderLength(), 0); GTInfo info = record.getInfo(); byte fill; int pos = 0; for (int i = 0; i < info.getPrimaryKey().trueBitCount(); i++) { int c = info.getPrimaryKey().trueBitAt(i); int colLength = info.getCodeSystem().maxCodeLength(c); if (record.get(c).array() != null) { fill = RowConstants.BYTE_ZERO; } else { fill = RowConstants.BYTE_ONE; } Arrays.fill(byteArray.array(), byteArray.offset() + pos, byteArray.offset() + pos + colLength, fill); pos += colLength; } byteArray.setLength(pos); //fill shard and cuboid fillHeader(buf); }
private RawScan preparedHBaseScan(GTRecord pkStart, GTRecord pkEnd, List<GTRecord> fuzzyKeys, ImmutableBitSet selectedColBlocks) { final List<Pair<byte[], byte[]>> selectedColumns = makeHBaseColumns(selectedColBlocks); LazyRowKeyEncoder encoder = new LazyRowKeyEncoder(cubeSeg, cuboid); byte[] start = encoder.createBuf(); byte[] end = encoder.createBuf(); encoder.setBlankByte(RowConstants.ROWKEY_LOWER_BYTE); encoder.encode(pkStart, pkStart.getInfo().getPrimaryKey(), start); encoder.setBlankByte(RowConstants.ROWKEY_UPPER_BYTE); encoder.encode(pkEnd, pkEnd.getInfo().getPrimaryKey(), end); byte[] temp = new byte[end.length + 1];//append extra 0 to the end key to make it inclusive while scanning System.arraycopy(end, 0, temp, 0, end.length); end = temp; List<Pair<byte[], byte[]>> hbaseFuzzyKeys = translateFuzzyKeys(fuzzyKeys); KylinConfig config = cubeSeg.getCubeDesc().getConfig(); int hbaseCaching = config.getHBaseScanCacheRows(); int hbaseMaxResultSize = config.getHBaseScanMaxResultSize(); // if (isMemoryHungry(selectedColBlocks)) // hbaseCaching /= 10; return new RawScan(start, end, selectedColumns, hbaseFuzzyKeys, hbaseCaching, hbaseMaxResultSize); }
private static RawScan mockFullScan(GTInfo gtInfo, KylinConfig kylinConfig) { final List<Pair<byte[], byte[]>> selectedColumns = Lists.newArrayList(); selectedColumns.add(new Pair<>(FAM, COL_M)); int headerLength = RowConstants.ROWKEY_SHARD_AND_CUBOID_LEN; int bodyLength = 0; ImmutableBitSet primaryKey = gtInfo.getPrimaryKey(); for (int i = 0; i < primaryKey.trueBitCount(); i++) { bodyLength += gtInfo.getCodeSystem().getDimEnc(primaryKey.trueBitAt(i)).getLengthOfEncoding(); } //Mock start key byte[] start = new byte[headerLength + bodyLength]; BytesUtil.writeShort((short) 0, start, 0, RowConstants.ROWKEY_SHARDID_LEN); System.arraycopy(Bytes.toBytes(baseCuboid), 0, start, RowConstants.ROWKEY_SHARDID_LEN, RowConstants.ROWKEY_CUBOIDID_LEN); //Mock end key byte[] end = new byte[headerLength + bodyLength + 1]; for (int i = 0; i < end.length - 1; i++) { end[i] = RowConstants.ROWKEY_UPPER_BYTE; } BytesUtil.writeShort((short) 0, end, 0, RowConstants.ROWKEY_SHARDID_LEN); System.arraycopy(Bytes.toBytes(baseCuboid), 0, end, RowConstants.ROWKEY_SHARDID_LEN, RowConstants.ROWKEY_CUBOIDID_LEN); //Mock fuzzy key List<Pair<byte[], byte[]>> fuzzyKeys = Collections.emptyList(); return new RawScan(start, end, selectedColumns, fuzzyKeys, kylinConfig.getHBaseScanCacheRows(), kylinConfig.getHBaseScanMaxResultSize()); }
public static void prepareTestData() throws Exception { try { util.getHBaseAdmin().disableTable(TABLE); util.getHBaseAdmin().deleteTable(TABLE); } catch (Exception e) { // ignore table not found } Table table = util.createTable(TABLE, FAM); HRegionInfo hRegionInfo = new HRegionInfo(table.getName()); region = util.createLocalHRegion(hRegionInfo, table.getTableDescriptor()); gtInfo = newInfo(); GridTable gridTable = newTable(gtInfo); IGTScanner scanner = gridTable.scan(new GTScanRequestBuilder().setInfo(gtInfo).setRanges(null) .setDimensions(null).setFilterPushDown(null).createGTScanRequest()); for (GTRecord record : scanner) { byte[] value = record.exportColumns(gtInfo.getPrimaryKey()).toBytes(); byte[] key = new byte[RowConstants.ROWKEY_SHARD_AND_CUBOID_LEN + value.length]; System.arraycopy(Bytes.toBytes(baseCuboid), 0, key, RowConstants.ROWKEY_SHARDID_LEN, RowConstants.ROWKEY_CUBOIDID_LEN); System.arraycopy(value, 0, key, RowConstants.ROWKEY_SHARD_AND_CUBOID_LEN, value.length); Put put = new Put(key); put.addColumn(FAM, COL_M, record.exportColumns(gtInfo.getColumnBlock(1)).toBytes()); region.put(put); } }
/** * translate GTRecord format fuzzy keys to hbase expected format * @return */ private List<Pair<byte[], byte[]>> translateFuzzyKeys(List<GTRecord> fuzzyKeys) { if (fuzzyKeys == null || fuzzyKeys.isEmpty()) { return Collections.emptyList(); } List<Pair<byte[], byte[]>> ret = Lists.newArrayList(); for (GTRecord gtRecordFuzzyKey : fuzzyKeys) { byte[] hbaseFuzzyKey = fuzzyKeyEncoder.createBuf(); byte[] hbaseFuzzyMask = fuzzyMaskEncoder.createBuf(); fuzzyKeyEncoder.encode(gtRecordFuzzyKey, gtRecordFuzzyKey.getInfo().getPrimaryKey(), hbaseFuzzyKey); fuzzyMaskEncoder.encode(gtRecordFuzzyKey, gtRecordFuzzyKey.getInfo().getPrimaryKey(), hbaseFuzzyMask); ret.add(Pair.newPair(hbaseFuzzyKey, hbaseFuzzyMask)); } return ret; }
/** * for each selected hbase column, it might contain values of multiple GT columns. * The mapping should be passed down to storage */ protected List<List<Integer>> getHBaseColumnsGTMapping(ImmutableBitSet selectedColBlocks) { List<List<Integer>> ret = Lists.newArrayList(); int colBlkIndex = 1; int metricOffset = fullGTInfo.getPrimaryKey().trueBitCount(); HBaseMappingDesc hbaseMapping = cubeSeg.getCubeDesc().getHbaseMapping(); for (HBaseColumnFamilyDesc familyDesc : hbaseMapping.getColumnFamily()) { for (HBaseColumnDesc hbaseColDesc : familyDesc.getColumns()) { if (selectedColBlocks.get(colBlkIndex)) { int[] metricIndexes = hbaseColDesc.getMeasureIndex(); Integer[] gtIndexes = new Integer[metricIndexes.length]; for (int i = 0; i < gtIndexes.length; i++) { gtIndexes[i] = metricIndexes[i] + metricOffset; } ret.add(Arrays.asList(gtIndexes)); } colBlkIndex++; } } Preconditions.checkState(selectedColBlocks.trueBitCount() == ret.size() + 1); return ret; }
@Override public void write(long cuboidId, GTRecord record) throws IOException { if (lastCuboidId == null || !lastCuboidId.equals(cuboidId)) { if (lastCuboidId != null) { logger.info("Cuboid " + lastCuboidId + " has " + cuboidRowCount + " rows"); cuboidRowCount = 0; } // output another cuboid initVariables(cuboidId); lastCuboidId = cuboidId; } cuboidRowCount++; rowKeyEncoder.encode(record, record.getInfo().getPrimaryKey(), keyBuf); //output measures valueBuf.clear(); try { record.exportColumns(measureColumns, valueBuf); } catch (BufferOverflowException boe) { valueBuf = ByteBuffer.allocate((int) (record.sizeOf(measureColumns) * 1.5)); record.exportColumns(measureColumns, valueBuf); } outputKey.set(keyBuf, 0, keyBuf.length); outputValue.set(valueBuf.array(), 0, valueBuf.position()); writeAsKeyValue(outputKey, outputValue); }
protected GTScanRange newScanRange(Collection<ColumnRange> andDimRanges) { GTRecord pkStart = new GTRecord(gtInfo); GTRecord pkEnd = new GTRecord(gtInfo); Map<Integer, Set<ByteArray>> fuzzyValues = Maps.newHashMap(); for (ColumnRange range : andDimRanges) { int col = range.column.getColumnDesc().getZeroBasedIndex(); if (!gtInfo.getPrimaryKey().get(col)) continue; pkStart.set(col, range.begin); pkEnd.set(col, range.end); if (range.valueSet != null && !range.valueSet.isEmpty()) { fuzzyValues.put(col, range.valueSet); } } List<GTRecord> fuzzyKeys = buildFuzzyKeys(fuzzyValues); return new GTScanRange(pkStart, pkEnd, fuzzyKeys); }
@Override public int compareTo(ResultMergeSlot o) { long cuboidComp = this.currentCuboidId - o.currentCuboidId; if (cuboidComp != 0) return cuboidComp < 0 ? -1 : 1; // note GTRecord.equals() don't work because the two GTRecord comes from different GridTable ImmutableBitSet pk = this.currentRecord.getInfo().getPrimaryKey(); for (int i = 0; i < pk.trueBitCount(); i++) { int c = pk.trueBitAt(i); int comp = this.currentRecord.get(c).compareTo(o.currentRecord.get(c)); if (comp != 0) return comp; } return 0; }
@Override public int compareTo(MergeSlot o) { long cuboidComp = this.currentCuboidId - o.currentCuboidId; if (cuboidComp != 0) return cuboidComp < 0 ? -1 : 1; // note GTRecord.equals() don't work because the two GTRecord comes from different GridTable ImmutableBitSet pk = this.currentRecord.getInfo().getPrimaryKey(); for (int i = 0; i < pk.trueBitCount(); i++) { int c = pk.trueBitAt(i); int comp = this.currentRecord.get(c).compareTo(o.currentRecord.get(c)); if (comp != 0) return comp; } return 0; }
@Override public void encode(GTRecord record, ImmutableBitSet keyColumns, byte[] buf) { ByteArray byteArray = new ByteArray(buf, getHeaderLength(), 0); GTInfo info = record.getInfo(); byte fill; int pos = 0; for (int i = 0; i < info.getPrimaryKey().trueBitCount(); i++) { int c = info.getPrimaryKey().trueBitAt(i); int colLength = info.getCodeSystem().maxCodeLength(c); if (record.get(c).array() != null) { fill = RowConstants.BYTE_ZERO; } else { fill = RowConstants.BYTE_ONE; } Arrays.fill(byteArray.array(), byteArray.offset() + pos, byteArray.offset() + pos + colLength, fill); pos += colLength; } byteArray.setLength(pos); //fill shard and cuboid fillHeader(buf); }
private RawScan preparedHBaseScan(GTRecord pkStart, GTRecord pkEnd, List<GTRecord> fuzzyKeys, ImmutableBitSet selectedColBlocks) { final List<Pair<byte[], byte[]>> selectedColumns = makeHBaseColumns(selectedColBlocks); LazyRowKeyEncoder encoder = new LazyRowKeyEncoder(cubeSeg, cuboid); byte[] start = encoder.createBuf(); byte[] end = encoder.createBuf(); encoder.setBlankByte(RowConstants.ROWKEY_LOWER_BYTE); encoder.encode(pkStart, pkStart.getInfo().getPrimaryKey(), start); encoder.setBlankByte(RowConstants.ROWKEY_UPPER_BYTE); encoder.encode(pkEnd, pkEnd.getInfo().getPrimaryKey(), end); byte[] temp = new byte[end.length + 1];//append extra 0 to the end key to make it inclusive while scanning System.arraycopy(end, 0, temp, 0, end.length); end = temp; List<Pair<byte[], byte[]>> hbaseFuzzyKeys = translateFuzzyKeys(fuzzyKeys); KylinConfig config = cubeSeg.getCubeDesc().getConfig(); int hbaseCaching = config.getHBaseScanCacheRows(); int hbaseMaxResultSize = config.getHBaseScanMaxResultSize(); // if (isMemoryHungry(selectedColBlocks)) // hbaseCaching /= 10; return new RawScan(start, end, selectedColumns, hbaseFuzzyKeys, hbaseCaching, hbaseMaxResultSize); }