co.cask.cdap.hbase.wd.AbstractRowKeyDistributor java code examples

 @Override
 public byte[] apply(byte[] input) {
  return rowKeyDistributor.getOriginalKey(input);
 }
};

@Override
public byte[] getActualRowKey(ConsumerConfig consumerConfig, byte[] originalRowKey) {
 return rowKeyDistributor.getDistributedKey(originalRowKey);
}

byte[][] bucketSplits = keyDistributor.getAllDistributedKeys(Bytes.EMPTY_BYTE_ARRAY);
Preconditions.checkArgument(splits >= 1 && splits <= MAX_SPLIT_COUNT_PER_BUCKET * bucketSplits.length,
              "Number of pre-splits should be in [1.." +

/** Testing simple get. */
@Test
public void testGet() throws IOException, InterruptedException {
 // Testing simple get
 byte[] key = new byte[] {123, 124, 122};
 byte[] distributedKey = keyDistributor.getDistributedKey(key);
 byte[] value = Bytes.toBytes("some");
 hTable.put(new Put(distributedKey).add(CF, QUAL, value));
 Result result = hTable.get(new Get(distributedKey));
 Assert.assertArrayEquals(key, keyDistributor.getOriginalKey(result.getRow()));
 Assert.assertArrayEquals(value, result.getValue(CF, QUAL));
}

 @Override
 public List<InputSplit> getSplits(JobContext context) throws IOException {
  List<InputSplit> allSplits = new ArrayList<>();
  Scan originalScan = getScan();

  Scan[] scans = rowKeyDistributor.getDistributedScans(originalScan);

  for (Scan scan : scans) {
   // Internally super.getSplits(...) uses scan object stored in private variable,
   // to re-use the code of super class we switch scan object with scans we
   setScan(scan);
   List<InputSplit> splits = super.getSplits(context);
   allSplits.addAll(splits);
  }

  // Setting original scan back
  setScan(originalScan);

  return allSplits;
 }
}

public final Scan[] getDistributedScans(Scan original) throws IOException {
 Pair<byte[], byte[]>[] intervals = getDistributedIntervals(original.getStartRow(), original.getStopRow());
 Scan[] scans = new Scan[intervals.length];
 for (int i = 0; i < intervals.length; i++) {
  scans[i] = new Scan(original);
  scans[i].setStartRow(intervals[i].getFirst());
  scans[i].setStopRow(intervals[i].getSecond());
 }
 return scans;
}

private ScanBuilder configureRangeScan(ScanBuilder scan, @Nullable byte[] startRow, @Nullable byte[] stopRow,
                    @Nullable FuzzyRowFilter filter) {
 // todo: should be configurable
 scan.setCaching(1000);
 if (startRow != null) {
  scan.setStartRow(startRow);
 }
 if (stopRow != null) {
  scan.setStopRow(stopRow);
 }
 scan.addFamily(columnFamily);
 if (filter != null) {
  List<Pair<byte[], byte[]>> fuzzyPairs = Lists.newArrayListWithExpectedSize(filter.getFuzzyKeysData().size());
  for (ImmutablePair<byte[], byte[]> pair : filter.getFuzzyKeysData()) {
   if (rowKeyDistributor != null) {
    fuzzyPairs.addAll(rowKeyDistributor.getDistributedFilterPairs(pair));
   } else {
    // Make a copy of filter pair because the key and mask will get modified in HBase FuzzyRowFilter.
    fuzzyPairs.add(Pair.newPair(Arrays.copyOf(pair.getFirst(), pair.getFirst().length),
                  Arrays.copyOf(pair.getSecond(), pair.getSecond().length)));
   }
  }
  scan.setFilter(new org.apache.hadoop.hbase.filter.FuzzyRowFilter(fuzzyPairs));
 }
 return scan;
}

private void testMapReduceInternal(long origKeyPrefix, Scan scan, int numValues, int startWithValue,
                  int seekIntervalMinValue, int seekIntervalMaxValue)
    throws IOException, InterruptedException, ClassNotFoundException {
 int valuesCountInSeekInterval =
     writeTestData(origKeyPrefix, numValues, startWithValue, seekIntervalMinValue, seekIntervalMaxValue);
 // Reading data
 Configuration conf = new Configuration(testingUtility.getConfiguration());
 conf.set("fs.defaultFS", "file:///");
 conf.set("fs.default.name", "file:///");
 conf.setInt("mapreduce.local.map.tasks.maximum", 16);
 conf.setInt("mapreduce.local.reduce.tasks.maximum", 16);
 Job job = Job.getInstance(conf, "testMapReduceInternal()-Job");
 TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan,
     RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
 // Substituting standard TableInputFormat which was set in TableMapReduceUtil.initTableMapperJob(...)
 job.setInputFormatClass(WdTableInputFormat.class);
 keyDistributor.addInfo(job.getConfiguration());
 job.setOutputFormatClass(NullOutputFormat.class);
 job.setNumReduceTasks(0);
 boolean succeeded = job.waitForCompletion(true);
 Assert.assertTrue(succeeded);
 long mapInputRecords = job.getCounters().findCounter(RowCounterMapper.Counters.ROWS).getValue();
 Assert.assertEquals(valuesCountInSeekInterval, mapInputRecords);
 // Need to kill the job after completion, after it could leave MRAppMaster running not terminated.
 // Not sure what causing this, but maybe problem in MiniYarnCluster
 job.killJob();
}

 @Override
 public List<InputSplit> getSplits(JobContext context) throws IOException {
  List<InputSplit> allSplits = new ArrayList<>();
  Scan originalScan = getScan();

  Scan[] scans = rowKeyDistributor.getDistributedScans(originalScan);

  for (Scan scan : scans) {
   // Internally super.getSplits(...) uses scan object stored in private variable,
   // to re-use the code of super class we switch scan object with scans we
   setScan(scan);
   List<InputSplit> splits = super.getSplits(context);
   allSplits.addAll(splits);
  }

  // Setting original scan back
  setScan(originalScan);

  return allSplits;
 }
}

public final Scan[] getDistributedScans(Scan original) throws IOException {
 Pair<byte[], byte[]>[] intervals = getDistributedIntervals(original.getStartRow(), original.getStopRow());
 Scan[] scans = new Scan[intervals.length];
 for (int i = 0; i < intervals.length; i++) {
  scans[i] = new Scan(original);
  scans[i].setStartRow(intervals[i].getFirst());
  scans[i].setStopRow(intervals[i].getSecond());
 }
 return scans;
}

private ScanBuilder configureRangeScan(ScanBuilder scan, @Nullable byte[] startRow, @Nullable byte[] stopRow,
                    @Nullable FuzzyRowFilter filter) {
 // todo: should be configurable
 scan.setCaching(1000);
 if (startRow != null) {
  scan.setStartRow(startRow);
 }
 if (stopRow != null) {
  scan.setStopRow(stopRow);
 }
 scan.addFamily(columnFamily);
 if (filter != null) {
  List<Pair<byte[], byte[]>> fuzzyPairs = Lists.newArrayListWithExpectedSize(filter.getFuzzyKeysData().size());
  for (ImmutablePair<byte[], byte[]> pair : filter.getFuzzyKeysData()) {
   if (rowKeyDistributor != null) {
    fuzzyPairs.addAll(rowKeyDistributor.getDistributedFilterPairs(pair));
   } else {
    // Make a copy of filter pair because the key and mask will get modified in HBase FuzzyRowFilter.
    fuzzyPairs.add(Pair.newPair(Arrays.copyOf(pair.getFirst(), pair.getFirst().length),
                  Arrays.copyOf(pair.getSecond(), pair.getSecond().length)));
   }
  }
  scan.setFilter(new org.apache.hadoop.hbase.filter.FuzzyRowFilter(fuzzyPairs));
 }
 return scan;
}

 @Override
 public byte[] getRow() {
  return rowKeyDistributor.getOriginalKey(result.getRow());
 }
};

private byte[] createDistributedRowKey(byte[] row) {
 return rowKeyDistributor == null ? row : rowKeyDistributor.getDistributedKey(row);
}

byte[][] bucketSplits = keyDistributor.getAllDistributedKeys(Bytes.EMPTY_BYTE_ARRAY);
Preconditions.checkArgument(splits >= 1 && splits <= MAX_SPLIT_COUNT_PER_BUCKET * bucketSplits.length,
              "Number of pre-splits should be in [1.." +

public static DistributedScanner create(HTableInterface hTable,
                    Scan originalScan,
                    AbstractRowKeyDistributor keyDistributor,
                    ExecutorService scansExecutor) throws IOException {
 Scan[] scans = keyDistributor.getDistributedScans(originalScan);
 ResultScanner[] rss = new ResultScanner[scans.length];
 for (int i = 0; i < scans.length; i++) {
  rss[i] = hTable.getScanner(scans[i]);
 }
 int caching = originalScan.getCaching();
 // to optimize work of distributed scan we need to know that, so we are resolving it from config in the case it is
 // not set for scan
 if (caching < 1) {
  caching = hTable.getConfiguration().getInt("hbase.client.scanner.caching", 1);
 }
 return new DistributedScanner(keyDistributor, rss, caching, scansExecutor);
}

 @Override
 public byte[] getRow() {
  return rowKeyDistributor.getOriginalKey(result.getRow());
 }
};

private byte[] createDistributedRowKey(byte[] row) {
 return rowKeyDistributor == null ? row : rowKeyDistributor.getDistributedKey(row);
}

byte[][] bucketSplits = getAllDistributedKeys(co.cask.cdap.api.common.Bytes.EMPTY_BYTE_ARRAY);
Preconditions.checkArgument(splits >= 1 && splits <= 0xff * bucketSplits.length,
              "Number of pre-splits should be in [1.." +

public static DistributedScanner create(HTableInterface hTable,
                    Scan originalScan,
                    AbstractRowKeyDistributor keyDistributor,
                    ExecutorService scansExecutor) throws IOException {
 Scan[] scans = keyDistributor.getDistributedScans(originalScan);
 ResultScanner[] rss = new ResultScanner[scans.length];
 for (int i = 0; i < scans.length; i++) {
  rss[i] = hTable.getScanner(scans[i]);
 }
 int caching = originalScan.getCaching();
 // to optimize work of distributed scan we need to know that, so we are resolving it from config in the case it is
 // not set for scan
 if (caching < 1) {
  caching = hTable.getConfiguration().getInt("hbase.client.scanner.caching", 1);
 }
 return new DistributedScanner(keyDistributor, rss, caching, scansExecutor);
}

@Override
public byte[] getRow() {
 return keyDistributor.getOriginalKey(result.getRow());
}

Javadoc

Defines the way row keys are distributed.

Most used methods

getOriginalKey
getDistributedKey
addInfo
getAllDistributedKeys
getDistributedFilterPairs
Method to salt the row key of filter pair. It also adds a new byte with value 1 which means that thi
getDistributedIntervals
Gets all distributed intervals based on the original start & stop keys. Used when scanning all bucke
getDistributedScans
getParamsToStore
init

Popular in Java

Running tasks concurrently on multiple threads
notifyDataSetChanged (ArrayAdapter)
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
runOnUiThread (Activity)
File (java.io)
An "abstract" representation of a file system entity identified by a pathname. The pathname may be a
FileWriter (java.io)
A specialized Writer that writes to a file in the file system. All write requests made by calling me
DecimalFormat (java.text)
A concrete subclass of NumberFormat that formats decimal numbers. It has a variety of features desig
Dictionary (java.util)
Note: Do not use this class since it is obsolete. Please use the Map interface for new implementatio
Semaphore (java.util.concurrent)
A counting semaphore. Conceptually, a semaphore maintains a set of permits. Each #acquire blocks if
Stream (java.util.stream)
A sequence of elements supporting sequential and parallel aggregate operations. The following exampl
Top PhpStorm plugins

How to useAbstractRowKeyDistributor in co.cask.cdap.hbase.wd

Best Java code snippets using co.cask.cdap.hbase.wd.AbstractRowKeyDistributor (Showing top 20 results out of 315)

How to use
AbstractRowKeyDistributor
in
co.cask.cdap.hbase.wd