/** * Constructs a new instance with the given number of buckets for distributed scan. */ public ShardedHBaseQueueStrategy(HBaseTableUtil tableUtil, int distributorBuckets) { this.tableUtil = tableUtil; this.distributorBuckets = distributorBuckets; this.rowKeyDistributor = new RowKeyDistributorByHashPrefix( new RowKeyDistributorByHashPrefix.OneByteSimpleHash(distributorBuckets)); // Using the "direct handoff" approach, new threads will only be created // if it is necessary and will grow unbounded. This could be bad but in DistributedScanner // we only create as many Runnables as there are buckets data is distributed to. It means // it also scales when buckets amount changes. ThreadPoolExecutor executor = new ThreadPoolExecutor(1, 20, 60, TimeUnit.SECONDS, new SynchronousQueue<Runnable>(), Threads.newDaemonThreadFactory("queue-consumer-scan")); executor.allowCoreThreadTimeOut(true); this.scansExecutor = executor; }
private static byte[][] getV3MetricsTableSplits(int splits) { RowKeyDistributorByHashPrefix rowKeyDistributor = new RowKeyDistributorByHashPrefix( new RowKeyDistributorByHashPrefix.OneByteSimpleHash(splits)); return rowKeyDistributor.getSplitKeys(splits, splits); }
private void testDistribution(int maxBuckets, int countForEachBucket) { RowKeyDistributorByHashPrefix distributor = new RowKeyDistributorByHashPrefix(new RowKeyDistributorByHashPrefix.OneByteSimpleHash(maxBuckets)); int[] bucketCounts = new int[maxBuckets]; for (int i = 0; i < maxBuckets * countForEachBucket; i++) { byte[] original = Bytes.toBytes(i); byte[] distributed = distributor.getDistributedKey(original); bucketCounts[distributed[0] & 0xff]++; } byte[][] allKeys = distributor.getAllDistributedKeys(new byte[0]); Assert.assertEquals(maxBuckets, allKeys.length); for (int bucketCount : bucketCounts) { // i.e. all buckets expected to have similar amount of values (+- 10%) Assert.assertTrue("Unexpected values count in bucket: " + bucketCount + ", avg: " + countForEachBucket, Math.abs((countForEachBucket - bucketCount) / countForEachBucket) < 0.10); } } }
/** * Creates a {@link AbstractRowKeyDistributor} based on the meta data in the given {@link HTableDescriptor}. */ private AbstractRowKeyDistributor createKeyDistributor(HTableDescriptor htd) { int buckets = QueueConstants.DEFAULT_ROW_KEY_BUCKETS; String value = htd.getValue(QueueConstants.DISTRIBUTOR_BUCKETS); if (value != null) { buckets = Integer.parseInt(value); } return new RowKeyDistributorByHashPrefix( new RowKeyDistributorByHashPrefix.OneByteSimpleHash(buckets)); } }
private static byte[][] getMetricsTableSplits(int splits) { RowKeyDistributorByHashPrefix rowKeyDistributor = new RowKeyDistributorByHashPrefix( new RowKeyDistributorByHashPrefix.OneByteSimpleHash(splits)); return rowKeyDistributor.getSplitKeys(splits, splits); }
private void initializeVars(CConfiguration cConf, DatasetSpecification spec) { this.scanExecutor = null; this.rowKeyDistributor = null; RejectedExecutionHandler callerRunsPolicy = (r, executor) -> { REJECTION_LOG.info( "No more threads in the HBase scan thread pool. Consider increase {}. Performing scan in caller thread {}", Constants.Metrics.METRICS_HBASE_MAX_SCAN_THREADS, Thread.currentThread().getName() ); // Runs it from the caller thread if (!executor.isShutdown()) { r.run(); } }; int maxScanThread = cConf.getInt(Constants.Metrics.METRICS_HBASE_MAX_SCAN_THREADS); // Creates a executor that will shrink to 0 threads if left idle // Uses daemon thread, hence no need to worry about shutdown // When all threads are busy, use the caller thread to execute this.scanExecutor = new ThreadPoolExecutor(0, maxScanThread, 60L, TimeUnit.SECONDS, new SynchronousQueue<Runnable>(), Threads.createDaemonThreadFactory("metrics-hbase-scanner-%d"), callerRunsPolicy); this.rowKeyDistributor = new RowKeyDistributorByHashPrefix( new RowKeyDistributorByHashPrefix. OneByteSimpleHash(spec.getIntProperty(Constants.Metrics.METRICS_HBASE_TABLE_SPLITS, 16))); }
/** * Constructs a new instance with the given number of buckets for distributed scan. */ SaltedHBaseQueueStrategy(HBaseTableUtil tableUtil, int distributorBuckets) { this.tableUtil = tableUtil; this.distributorBuckets = distributorBuckets; this.rowKeyDistributor = new RowKeyDistributorByHashPrefix( new RowKeyDistributorByHashPrefix.OneByteSimpleHash(distributorBuckets)); this.rowKeyConverter = new Function<byte[], byte[]>() { @Override public byte[] apply(byte[] input) { return rowKeyDistributor.getOriginalKey(input); } }; // Using the "direct handoff" approach, new threads will only be created // if it is necessary and will grow unbounded. This could be bad but in DistributedScanner // we only create as many Runnables as there are buckets data is distributed to. It means // it also scales when buckets amount changes. ThreadPoolExecutor executor = new ThreadPoolExecutor(1, 20, 60, TimeUnit.SECONDS, new SynchronousQueue<Runnable>(), Threads.newDaemonThreadFactory("queue-consumer-scan")); executor.allowCoreThreadTimeOut(true); this.scansExecutor = executor; }
callerRunsPolicy); this.rowKeyDistributor = new RowKeyDistributorByHashPrefix( new RowKeyDistributorByHashPrefix. OneByteSimpleHash(spec.getIntProperty(Constants.Metrics.METRICS_HBASE_TABLE_SPLITS, 16)));
public IdentityHashDistributorTestRun() { super(new RowKeyDistributorByHashPrefix(new IdentityHash())); }
public OneByteSimpleHashDistributorTestRun() { super(new RowKeyDistributorByHashPrefix(new RowKeyDistributorByHashPrefix.OneByteSimpleHash(15))); } }
public MultiBytesPrefixHashDistributorTestRun() { super(new RowKeyDistributorByHashPrefix(new MultiBytesPrefixHash())); }
@Override public void create() throws IOException { // Create the queue table TableDescriptorBuilder tdBuilder = HBaseTableUtil.getTableDescriptorBuilder(tableId, cConf); for (String key : properties.stringPropertyNames()) { tdBuilder.addProperty(key, properties.getProperty(key)); } ColumnFamilyDescriptorBuilder cfdBuilder = HBaseTableUtil.getColumnFamilyDescriptorBuilder(Bytes.toString(QueueEntryRow.COLUMN_FAMILY), hConf); tdBuilder.addColumnFamily(cfdBuilder.build()); // Add coprocessors CoprocessorJar coprocessorJar = createCoprocessorJar(); for (Class<? extends Coprocessor> coprocessor : coprocessorJar.getCoprocessors()) { tdBuilder.addCoprocessor( coprocessorManager.getCoprocessorDescriptor(coprocessor, coprocessorJar.getPriority(coprocessor))); } // Create queue table with splits. The distributor bucket size is the same as splits. int splits = cConf.getInt(QueueConstants.ConfigKeys.QUEUE_TABLE_PRESPLITS); AbstractRowKeyDistributor distributor = new RowKeyDistributorByHashPrefix( new RowKeyDistributorByHashPrefix.OneByteSimpleHash(splits)); byte[][] splitKeys = HBaseTableUtil.getSplitKeys(splits, splits, distributor); tdBuilder.addProperty(QueueConstants.DISTRIBUTOR_BUCKETS, Integer.toString(splits)); createQueueTable(tdBuilder, splitKeys); }
splits, splits, new RowKeyDistributorByHashPrefix(new OneByteSimpleHash(splits))); ddlExecutor.createTableIfNotExists(tdBuilder.build(), splitKeys); hTable, new RowKeyDistributorByHashPrefix(new OneByteSimpleHash(getKeyDistributorBuckets(tableId, htd))) );
splits, splits, new RowKeyDistributorByHashPrefix(new OneByteSimpleHash(splits))); ddlExecutor.createTableIfNotExists(tdBuilder.build(), splitKeys); hTable, new RowKeyDistributorByHashPrefix(new OneByteSimpleHash(getKeyDistributorBuckets(tableId, htd))) );
@Override protected StreamConsumer create(TableId tableId, StreamConfig streamConfig, ConsumerConfig consumerConfig, StreamConsumerStateStore stateStore, StreamConsumerState beginConsumerState, FileReader<StreamEventOffset, Iterable<StreamFileOffset>> reader, @Nullable ReadFilter extraFilter) throws IOException { int splits = cConf.getInt(Constants.Stream.CONSUMER_TABLE_PRESPLITS); AbstractRowKeyDistributor distributor = new RowKeyDistributorByHashPrefix( new RowKeyDistributorByHashPrefix.OneByteSimpleHash(splits)); byte[][] splitKeys = HBaseTableUtil.getSplitKeys(splits, splits, distributor); TableId hBaseTableId = tableUtil.createHTableId(new NamespaceId(tableId.getNamespace()), tableId.getTableName()); TableDescriptorBuilder tdBuilder = HBaseTableUtil.getTableDescriptorBuilder(hBaseTableId, cConf); ColumnFamilyDescriptorBuilder cfdBuilder = HBaseTableUtil.getColumnFamilyDescriptorBuilder(Bytes.toString(QueueEntryRow.COLUMN_FAMILY), hConf); tdBuilder.addColumnFamily(cfdBuilder.build()); tdBuilder.addProperty(QueueConstants.DISTRIBUTOR_BUCKETS, Integer.toString(splits)); try (HBaseDDLExecutor ddlExecutor = ddlExecutorFactory.get()) { ddlExecutor.createTableIfNotExists(tdBuilder.build(), splitKeys); } HTable hTable = tableUtil.createHTable(hConf, hBaseTableId); hTable.setWriteBufferSize(Constants.Stream.HBASE_WRITE_BUFFER_SIZE); hTable.setAutoFlushTo(false); return new HBaseStreamFileConsumer(cConf, streamConfig, consumerConfig, tableUtil, hTable, reader, stateStore, beginConsumerState, extraFilter, createKeyDistributor(hTable.getTableDescriptor())); }
@Test public void testGetSplitKeys() { int buckets = 16; AbstractRowKeyDistributor distributor = new RowKeyDistributorByHashPrefix( new RowKeyDistributorByHashPrefix.OneByteSimpleHash(buckets)); // Number of splits will be no less than user asked. If splits > buckets, the number of splits will bumped to // next multiple of bucket that is no less than user splits requested. // it should return one key less than required splits count, because HBase will take care of the first automatically Assert.assertEquals(getSplitSize(buckets, 12) - 1, HBaseTableUtil.getSplitKeys(12, buckets, distributor).length); Assert.assertEquals(getSplitSize(buckets, 16) - 1, HBaseTableUtil.getSplitKeys(16, buckets, distributor).length); // at least #buckets - 1, but no less than user asked Assert.assertEquals(buckets - 1, HBaseTableUtil.getSplitKeys(6, buckets, distributor).length); Assert.assertEquals(buckets - 1, HBaseTableUtil.getSplitKeys(2, buckets, distributor).length); // "1" can be used for queue tables that we know are not "hot", so we do not pre-split in this case Assert.assertEquals(0, HBaseTableUtil.getSplitKeys(1, buckets, distributor).length); // allows up to 255 * 8 - 1 splits Assert.assertEquals(255 * buckets - 1, HBaseTableUtil.getSplitKeys(255 * buckets, buckets, distributor).length); try { HBaseTableUtil.getSplitKeys(256 * buckets, buckets, distributor); Assert.fail("getSplitKeys(256) should have thrown IllegalArgumentException"); } catch (IllegalArgumentException e) { // expected } try { HBaseTableUtil.getSplitKeys(0, buckets, distributor); Assert.fail("getSplitKeys(0) should have thrown IllegalArgumentException"); } catch (IllegalArgumentException e) { // expected } }