org.apache.hadoop.mapreduce.lib.partition java code examples

/**
 * Confirm the absence of the {@link TotalOrderPartitioner} partitions file.
 */
protected static void validateDeletedPartitionsFile(Configuration conf) throws IOException {
 if (!conf.getBoolean(IntegrationTestingUtility.IS_DISTRIBUTED_CLUSTER, false))
  return;
 FileSystem fs = FileSystem.get(conf);
 Path partitionsFile = new Path(TotalOrderPartitioner.getPartitionFile(conf));
 assertFalse("Failed to clean up partitions file.", fs.exists(partitionsFile));
}

/**
 * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
 * <code>splitPoints</code>. Cleans up the partitions file after job exists.
 */
static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints) throws IOException {
  Configuration conf = job.getConfiguration();
  // create the partitions file
  FileSystem fs = FileSystem.get(conf);
  Path partitionsPath = new Path(conf.get("hbase.fs.tmp.dir"), "partitions_" + RandomUtil.randomUUID());
  fs.makeQualified(partitionsPath);
  writePartitions(conf, partitionsPath, splitPoints);
  fs.deleteOnExit(partitionsPath);
  // configure job to use it
  job.setPartitionerClass(TotalOrderPartitioner.class);
  TotalOrderPartitioner.setPartitionFile(conf, partitionsPath);
}

 ReflectionUtils.newInstance(job.getInputFormatClass(), conf);
int numPartitions = job.getNumReduceTasks();
K[] samples = (K[])sampler.getSample(inf, job);
LOG.info("Using " + samples.length + " samples");
RawComparator<K> comparator = (RawComparator<K>) job.getGroupingComparator();
Arrays.sort(samples, comparator);
Path dst = new Path(TotalOrderPartitioner.getPartitionFile(conf));
FileSystem fs = dst.getFileSystem(conf);
if (fs.exists(dst)) fs.delete(dst, false);

/**
 * Given a sorted set of cut points, build a trie that will find the correct
 * partition quickly.
 * @param splits the list of cut points
 * @param lower the lower bound of partitions 0..numPartitions-1
 * @param upper the upper bound of partitions 0..numPartitions-1
 * @param prefix the prefix that we have already checked against
 * @param maxDepth the maximum depth we will build a trie for
 * @return the trie node that will divide the splits correctly
 */
private TrieNode buildTrie(BinaryComparable[] splits, int lower,
    int upper, byte[] prefix, int maxDepth) {
  return buildTrieRec
       (splits, lower, upper, prefix, maxDepth, new CarriedTrieNodeRef());
}

 public void configure(JobConf job) {
  super.setConf(job);
 }
}

 public void configure(JobConf job) {
  super.setConf(job);
 }
}

@SuppressWarnings("unchecked") // is memcmp-able and uses the trie
public int getPartition(K key, V value, int numPartitions) {
 return partitions.findPartition(key);
}

 /** test partitioner for patterns */
 @Test
 public void testPatterns() {
  int results[] = new int[PARTITIONS];
  RehashPartitioner <IntWritable, NullWritable> p = new RehashPartitioner < IntWritable, NullWritable> ();
  /* test sequence 4, 8, 12, ... 128 */
  for(int i = 0; i < END; i+= STEP) {
   results[p.getPartition(new IntWritable(i), null, PARTITIONS)]++;
  }
  int badbuckets = 0;
  Integer min = Collections.min(Arrays.asList(ArrayUtils.toObject(results)));
  Integer max = Collections.max(Arrays.asList(ArrayUtils.toObject(results)));
  Integer avg = (int) Math.round((max+min)/2.0);
  System.out.println("Dumping buckets distribution: min="+min+" avg="+avg+" max="+max);
  for (int i = 0; i < PARTITIONS; i++) {
   double var = (results[i]-avg)/(double)(avg);
   System.out.println("bucket "+i+" "+results[i]+" items, variance "+var);
   if (Math.abs(var) > MAX_ERROR)
    badbuckets++;
  }
  System.out.println(badbuckets + " of "+PARTITIONS+" are too small or large buckets");
  assertTrue("too many overflow buckets", badbuckets < PARTITIONS * MAX_BADBUCKETS);
 }
}

/**
 * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
 * <code>splitPoints</code>. Cleans up the partitions file after job exists.
 */
static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints, boolean
    writeMultipleTables)
  throws IOException {
 Configuration conf = job.getConfiguration();
 // create the partitions file
 FileSystem fs = FileSystem.get(conf);
 String hbaseTmpFsDir =
   conf.get(HConstants.TEMPORARY_FS_DIRECTORY_KEY,
     fs.getHomeDirectory() + "/hbase-staging");
 Path partitionsPath = new Path(hbaseTmpFsDir, "partitions_" + UUID.randomUUID());
 fs.makeQualified(partitionsPath);
 writePartitions(conf, partitionsPath, splitPoints, writeMultipleTables);
 fs.deleteOnExit(partitionsPath);
 // configure job to use it
 job.setPartitionerClass(TotalOrderPartitioner.class);
 TotalOrderPartitioner.setPartitionFile(conf, partitionsPath);
}

/**
 * Given a sorted set of cut points, build a trie that will find the correct
 * partition quickly.
 * @param splits the list of cut points
 * @param lower the lower bound of partitions 0..numPartitions-1
 * @param upper the upper bound of partitions 0..numPartitions-1
 * @param prefix the prefix that we have already checked against
 * @param maxDepth the maximum depth we will build a trie for
 * @return the trie node that will divide the splits correctly
 */
private TrieNode buildTrie(BinaryComparable[] splits, int lower,
    int upper, byte[] prefix, int maxDepth) {
  return buildTrieRec
       (splits, lower, upper, prefix, maxDepth, new CarriedTrieNodeRef());
}

  RawComparator.class);
Path partitionsPath =
  new Path(TotalOrderPartitioner.getPartitionFile(job.getConfiguration()));
FileSystem fs = FileSystem.get(job.getConfiguration());
fs.deleteOnExit(partitionsPath);

 public void configure(JobConf job) {
  super.setConf(job);
 }
}

/**
 * Check if there's partition files for hfile, if yes replace the table splits, to make the job more reducers
 * @param conf the job configuration
 * @param path the hfile partition file
 * @throws IOException
 */
@SuppressWarnings("deprecation")
private void reconfigurePartitions(Configuration conf, Path path) throws IOException {
  FileSystem fs = path.getFileSystem(conf);
  if (fs.exists(path)) {
    try (SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf)) {
      int partitionCount = 0;
      Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
      Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
      while (reader.next(key, value)) {
        partitionCount++;
      }
      TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), path);
      // The reduce tasks should be one more than partition keys
      job.setNumReduceTasks(partitionCount + 1);
    }
  } else {
    logger.info("File '" + path.toString() + " doesn't exist, will not reconfigure hfile Partitions");
  }
}

/**
 * Given a sorted set of cut points, build a trie that will find the correct
 * partition quickly.
 * @param splits the list of cut points
 * @param lower the lower bound of partitions 0..numPartitions-1
 * @param upper the upper bound of partitions 0..numPartitions-1
 * @param prefix the prefix that we have already checked against
 * @param maxDepth the maximum depth we will build a trie for
 * @return the trie node that will divide the splits correctly
 */
private TrieNode buildTrie(BinaryComparable[] splits, int lower,
    int upper, byte[] prefix, int maxDepth) {
  return buildTrieRec
       (splits, lower, upper, prefix, maxDepth, new CarriedTrieNodeRef());
}

 public void configure(JobConf job) {
  super.setConf(job);
 }
}

public Job createSubmittableJob(String[] args) throws IOException {
 Path partitionsPath = new Path(destPath, PARTITIONS_FILE_NAME);
 generatePartitions(partitionsPath);
 Job job = Job.getInstance(getConf(),
    getConf().get("mapreduce.job.name", "hashTable_" + tableHash.tableName));
 Configuration jobConf = job.getConfiguration();
 jobConf.setLong(HASH_BATCH_SIZE_CONF_KEY, tableHash.batchSize);
 job.setJarByClass(HashTable.class);
 TableMapReduceUtil.initTableMapperJob(tableHash.tableName, tableHash.initScan(),
   HashMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
 // use a TotalOrderPartitioner and reducers to group region output into hash files
 job.setPartitionerClass(TotalOrderPartitioner.class);
 TotalOrderPartitioner.setPartitionFile(jobConf, partitionsPath);
 job.setReducerClass(Reducer.class);  // identity reducer
 job.setNumReduceTasks(tableHash.numHashFiles);
 job.setOutputKeyClass(ImmutableBytesWritable.class);
 job.setOutputValueClass(ImmutableBytesWritable.class);
 job.setOutputFormatClass(MapFileOutputFormat.class);
 FileOutputFormat.setOutputPath(job, new Path(destPath, HASH_DATA_DIR));
 return job;
}

/**
 * Given a sorted set of cut points, build a trie that will find the correct
 * partition quickly.
 * @param splits the list of cut points
 * @param lower the lower bound of partitions 0..numPartitions-1
 * @param upper the upper bound of partitions 0..numPartitions-1
 * @param prefix the prefix that we have already checked against
 * @param maxDepth the maximum depth we will build a trie for
 * @return the trie node that will divide the splits correctly
 */
private TrieNode buildTrie(BinaryComparable[] splits, int lower,
    int upper, byte[] prefix, int maxDepth) {
  return buildTrieRec
       (splits, lower, upper, prefix, maxDepth, new CarriedTrieNodeRef());
}

 public void configure(JobConf job) {
  super.setConf(job);
 }
}

/**
 * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
 * <code>splitPoints</code>. Cleans up the partitions file after job exists.
 */
static void configurePartitioner(Job job, Set<TableRowkeyPair> tablesStartKeys)
    throws IOException {
  
  Configuration conf = job.getConfiguration();
  // create the partitions file
  Path partitionsPath = new Path(conf.get("hadoop.tmp.dir"), "partitions_" + UUID.randomUUID());
  FileSystem fs = partitionsPath.getFileSystem(conf);
  fs.makeQualified(partitionsPath);
  writePartitions(conf, partitionsPath, tablesStartKeys);
  fs.deleteOnExit(partitionsPath);
  // configure job to use it
  job.setPartitionerClass(TotalOrderPartitioner.class);
  TotalOrderPartitioner.setPartitionFile(conf, partitionsPath);
}

TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), partitionsPath);

How to use org.apache.hadoop.mapreduce.lib.partition

Best Java code snippets using org.apache.hadoop.mapreduce.lib.partition (Showing top 20 results out of 315)