co.cask.cdap.hbase.wd.RowKeyDistributorByHashPrefix java code examples

/**
 * Constructs a new instance with the given number of buckets for distributed scan.
 */
public ShardedHBaseQueueStrategy(HBaseTableUtil tableUtil, int distributorBuckets) {
 this.tableUtil = tableUtil;
 this.distributorBuckets = distributorBuckets;
 this.rowKeyDistributor = new RowKeyDistributorByHashPrefix(
  new RowKeyDistributorByHashPrefix.OneByteSimpleHash(distributorBuckets));
 // Using the "direct handoff" approach, new threads will only be created
 // if it is necessary and will grow unbounded. This could be bad but in DistributedScanner
 // we only create as many Runnables as there are buckets data is distributed to. It means
 // it also scales when buckets amount changes.
 ThreadPoolExecutor executor = new ThreadPoolExecutor(1, 20,
                            60, TimeUnit.SECONDS,
                            new SynchronousQueue<Runnable>(),
                            Threads.newDaemonThreadFactory("queue-consumer-scan"));
 executor.allowCoreThreadTimeOut(true);
 this.scansExecutor = executor;
}

private static byte[][] getV3MetricsTableSplits(int splits) {
 RowKeyDistributorByHashPrefix rowKeyDistributor = new RowKeyDistributorByHashPrefix(
  new RowKeyDistributorByHashPrefix.OneByteSimpleHash(splits));
 return rowKeyDistributor.getSplitKeys(splits, splits);
}

 private void testDistribution(int maxBuckets, int countForEachBucket) {
  RowKeyDistributorByHashPrefix distributor =
   new RowKeyDistributorByHashPrefix(new RowKeyDistributorByHashPrefix.OneByteSimpleHash(maxBuckets));
  int[] bucketCounts = new int[maxBuckets];
  for (int i = 0; i < maxBuckets * countForEachBucket; i++) {
   byte[] original = Bytes.toBytes(i);
   byte[] distributed = distributor.getDistributedKey(original);
   bucketCounts[distributed[0] & 0xff]++;
  }

  byte[][] allKeys = distributor.getAllDistributedKeys(new byte[0]);
  Assert.assertEquals(maxBuckets, allKeys.length);

  for (int bucketCount : bucketCounts) {
   // i.e. all buckets expected to have similar amount of values (+- 10%)
   Assert.assertTrue("Unexpected values count in bucket: " + bucketCount + ", avg: " + countForEachBucket,
       Math.abs((countForEachBucket - bucketCount) / countForEachBucket) < 0.10);
  }
 }
}

 /**
  * Creates a {@link AbstractRowKeyDistributor} based on the meta data in the given {@link HTableDescriptor}.
  */
 private AbstractRowKeyDistributor createKeyDistributor(HTableDescriptor htd) {
  int buckets = QueueConstants.DEFAULT_ROW_KEY_BUCKETS;
  String value = htd.getValue(QueueConstants.DISTRIBUTOR_BUCKETS);

  if (value != null) {
   buckets = Integer.parseInt(value);
  }

  return new RowKeyDistributorByHashPrefix(
   new RowKeyDistributorByHashPrefix.OneByteSimpleHash(buckets));
 }
}

private static byte[][] getMetricsTableSplits(int splits) {
 RowKeyDistributorByHashPrefix rowKeyDistributor = new RowKeyDistributorByHashPrefix(
  new RowKeyDistributorByHashPrefix.OneByteSimpleHash(splits));
 return rowKeyDistributor.getSplitKeys(splits, splits);
}

private void initializeVars(CConfiguration cConf, DatasetSpecification spec) {
 this.scanExecutor = null;
 this.rowKeyDistributor = null;
 RejectedExecutionHandler callerRunsPolicy = (r, executor) -> {
  REJECTION_LOG.info(
   "No more threads in the HBase scan thread pool. Consider increase {}. Performing scan in caller thread {}",
   Constants.Metrics.METRICS_HBASE_MAX_SCAN_THREADS, Thread.currentThread().getName()
  );
  // Runs it from the caller thread
  if (!executor.isShutdown()) {
   r.run();
  }
 };
 int maxScanThread = cConf.getInt(Constants.Metrics.METRICS_HBASE_MAX_SCAN_THREADS);
 // Creates a executor that will shrink to 0 threads if left idle
 // Uses daemon thread, hence no need to worry about shutdown
 // When all threads are busy, use the caller thread to execute
 this.scanExecutor = new ThreadPoolExecutor(0, maxScanThread, 60L, TimeUnit.SECONDS,
                       new SynchronousQueue<Runnable>(),
                       Threads.createDaemonThreadFactory("metrics-hbase-scanner-%d"),
                       callerRunsPolicy);
 this.rowKeyDistributor = new RowKeyDistributorByHashPrefix(
  new RowKeyDistributorByHashPrefix.
   OneByteSimpleHash(spec.getIntProperty(Constants.Metrics.METRICS_HBASE_TABLE_SPLITS, 16)));
}

/**
 * Constructs a new instance with the given number of buckets for distributed scan.
 */
SaltedHBaseQueueStrategy(HBaseTableUtil tableUtil, int distributorBuckets) {
 this.tableUtil = tableUtil;
 this.distributorBuckets = distributorBuckets;
 this.rowKeyDistributor = new RowKeyDistributorByHashPrefix(
  new RowKeyDistributorByHashPrefix.OneByteSimpleHash(distributorBuckets));
 this.rowKeyConverter = new Function<byte[], byte[]>() {
  @Override
  public byte[] apply(byte[] input) {
   return rowKeyDistributor.getOriginalKey(input);
  }
 };
 // Using the "direct handoff" approach, new threads will only be created
 // if it is necessary and will grow unbounded. This could be bad but in DistributedScanner
 // we only create as many Runnables as there are buckets data is distributed to. It means
 // it also scales when buckets amount changes.
 ThreadPoolExecutor executor = new ThreadPoolExecutor(1, 20,
                            60, TimeUnit.SECONDS,
                            new SynchronousQueue<Runnable>(),
                            Threads.newDaemonThreadFactory("queue-consumer-scan"));
 executor.allowCoreThreadTimeOut(true);
 this.scansExecutor = executor;
}

                      callerRunsPolicy);
this.rowKeyDistributor = new RowKeyDistributorByHashPrefix(
 new RowKeyDistributorByHashPrefix.
  OneByteSimpleHash(spec.getIntProperty(Constants.Metrics.METRICS_HBASE_TABLE_SPLITS, 16)));

public IdentityHashDistributorTestRun() {
 super(new RowKeyDistributorByHashPrefix(new IdentityHash()));
}

 public OneByteSimpleHashDistributorTestRun() {
  super(new RowKeyDistributorByHashPrefix(new RowKeyDistributorByHashPrefix.OneByteSimpleHash(15)));
 }
}

public MultiBytesPrefixHashDistributorTestRun() {
 super(new RowKeyDistributorByHashPrefix(new MultiBytesPrefixHash()));
}

@Override
public void create() throws IOException {
 // Create the queue table
 TableDescriptorBuilder tdBuilder = HBaseTableUtil.getTableDescriptorBuilder(tableId, cConf);
 for (String key : properties.stringPropertyNames()) {
  tdBuilder.addProperty(key, properties.getProperty(key));
 }
 ColumnFamilyDescriptorBuilder cfdBuilder
  = HBaseTableUtil.getColumnFamilyDescriptorBuilder(Bytes.toString(QueueEntryRow.COLUMN_FAMILY), hConf);
 tdBuilder.addColumnFamily(cfdBuilder.build());
 // Add coprocessors
 CoprocessorJar coprocessorJar = createCoprocessorJar();
 for (Class<? extends Coprocessor> coprocessor : coprocessorJar.getCoprocessors()) {
  tdBuilder.addCoprocessor(
   coprocessorManager.getCoprocessorDescriptor(coprocessor, coprocessorJar.getPriority(coprocessor)));
 }
 // Create queue table with splits. The distributor bucket size is the same as splits.
 int splits = cConf.getInt(QueueConstants.ConfigKeys.QUEUE_TABLE_PRESPLITS);
 AbstractRowKeyDistributor distributor = new RowKeyDistributorByHashPrefix(
  new RowKeyDistributorByHashPrefix.OneByteSimpleHash(splits));
 byte[][] splitKeys = HBaseTableUtil.getSplitKeys(splits, splits, distributor);
 tdBuilder.addProperty(QueueConstants.DISTRIBUTOR_BUCKETS, Integer.toString(splits));
 createQueueTable(tdBuilder, splitKeys);
}

      splits, splits, new RowKeyDistributorByHashPrefix(new OneByteSimpleHash(splits)));
     ddlExecutor.createTableIfNotExists(tdBuilder.build(), splitKeys);
 hTable, new RowKeyDistributorByHashPrefix(new OneByteSimpleHash(getKeyDistributorBuckets(tableId, htd)))
);

      splits, splits, new RowKeyDistributorByHashPrefix(new OneByteSimpleHash(splits)));
     ddlExecutor.createTableIfNotExists(tdBuilder.build(), splitKeys);
 hTable, new RowKeyDistributorByHashPrefix(new OneByteSimpleHash(getKeyDistributorBuckets(tableId, htd)))
);

@Override
protected StreamConsumer create(TableId tableId, StreamConfig streamConfig, ConsumerConfig consumerConfig,
                StreamConsumerStateStore stateStore, StreamConsumerState beginConsumerState,
                FileReader<StreamEventOffset, Iterable<StreamFileOffset>> reader,
                @Nullable ReadFilter extraFilter) throws IOException {
 int splits = cConf.getInt(Constants.Stream.CONSUMER_TABLE_PRESPLITS);
 AbstractRowKeyDistributor distributor = new RowKeyDistributorByHashPrefix(
  new RowKeyDistributorByHashPrefix.OneByteSimpleHash(splits));
 byte[][] splitKeys = HBaseTableUtil.getSplitKeys(splits, splits, distributor);
 TableId hBaseTableId = tableUtil.createHTableId(new NamespaceId(tableId.getNamespace()), tableId.getTableName());
 TableDescriptorBuilder tdBuilder = HBaseTableUtil.getTableDescriptorBuilder(hBaseTableId, cConf);
 ColumnFamilyDescriptorBuilder cfdBuilder =
  HBaseTableUtil.getColumnFamilyDescriptorBuilder(Bytes.toString(QueueEntryRow.COLUMN_FAMILY), hConf);
 tdBuilder.addColumnFamily(cfdBuilder.build());
 tdBuilder.addProperty(QueueConstants.DISTRIBUTOR_BUCKETS, Integer.toString(splits));
 try (HBaseDDLExecutor ddlExecutor = ddlExecutorFactory.get()) {
  ddlExecutor.createTableIfNotExists(tdBuilder.build(), splitKeys);
 }
 HTable hTable = tableUtil.createHTable(hConf, hBaseTableId);
 hTable.setWriteBufferSize(Constants.Stream.HBASE_WRITE_BUFFER_SIZE);
 hTable.setAutoFlushTo(false);
 return new HBaseStreamFileConsumer(cConf, streamConfig, consumerConfig, tableUtil, hTable, reader,
                   stateStore, beginConsumerState, extraFilter,
                   createKeyDistributor(hTable.getTableDescriptor()));
}

@Test
public void testGetSplitKeys() {
 int buckets = 16;
 AbstractRowKeyDistributor distributor = new RowKeyDistributorByHashPrefix(
  new RowKeyDistributorByHashPrefix.OneByteSimpleHash(buckets));
 // Number of splits will be no less than user asked. If splits > buckets, the number of splits will bumped to
 // next multiple of bucket that is no less than user splits requested.
 // it should return one key less than required splits count, because HBase will take care of the first automatically
 Assert.assertEquals(getSplitSize(buckets, 12) - 1, HBaseTableUtil.getSplitKeys(12, buckets, distributor).length);
 Assert.assertEquals(getSplitSize(buckets, 16) - 1, HBaseTableUtil.getSplitKeys(16, buckets, distributor).length);
 // at least #buckets - 1, but no less than user asked
 Assert.assertEquals(buckets - 1, HBaseTableUtil.getSplitKeys(6, buckets, distributor).length);
 Assert.assertEquals(buckets - 1, HBaseTableUtil.getSplitKeys(2, buckets, distributor).length);
 // "1" can be used for queue tables that we know are not "hot", so we do not pre-split in this case
 Assert.assertEquals(0, HBaseTableUtil.getSplitKeys(1, buckets, distributor).length);
 // allows up to 255 * 8 - 1 splits
 Assert.assertEquals(255 * buckets - 1, HBaseTableUtil.getSplitKeys(255 * buckets, buckets, distributor).length);
 try {
  HBaseTableUtil.getSplitKeys(256 * buckets, buckets, distributor);
  Assert.fail("getSplitKeys(256) should have thrown IllegalArgumentException");
 } catch (IllegalArgumentException e) {
  // expected
 }
 try {
  HBaseTableUtil.getSplitKeys(0, buckets, distributor);
  Assert.fail("getSplitKeys(0) should have thrown IllegalArgumentException");
 } catch (IllegalArgumentException e) {
  // expected
 }
}

Javadoc

Provides handy methods to distribute

Most used methods

Popular in Java

Making http post requests using okhttp
findViewById (Activity)
scheduleAtFixedRate (Timer)
setScale (BigDecimal)
FileInputStream (java.io)
An input stream that reads bytes from a file. File file = ...finally if (in != null) in.clos
URL (java.net)
A Uniform Resource Locator that identifies the location of an Internet resource as specified by RFC
DateFormat (java.text)
Formats or parses dates and times.This class provides factories for obtaining instances configured f
HashMap (java.util)
HashMap is an implementation of Map. All optional operations are supported.All elements are permitte
SortedSet (java.util)
SortedSet is a Set which iterates over its elements in a sorted order. The order is determined eithe
Executor (java.util.concurrent)
An object that executes submitted Runnable tasks. This interface provides a way of decoupling task s
Best IntelliJ plugins

How to useRowKeyDistributorByHashPrefix in co.cask.cdap.hbase.wd

Best Java code snippets using co.cask.cdap.hbase.wd.RowKeyDistributorByHashPrefix (Showing top 16 results out of 315)

How to use
RowKeyDistributorByHashPrefix
in
co.cask.cdap.hbase.wd