org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner java code examples

Refine search

/**
 * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
 * <code>splitPoints</code>. Cleans up the partitions file after job exists.
 */
static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints, boolean
    writeMultipleTables)
  throws IOException {
 Configuration conf = job.getConfiguration();
 // create the partitions file
 FileSystem fs = FileSystem.get(conf);
 String hbaseTmpFsDir =
   conf.get(HConstants.TEMPORARY_FS_DIRECTORY_KEY,
     fs.getHomeDirectory() + "/hbase-staging");
 Path partitionsPath = new Path(hbaseTmpFsDir, "partitions_" + UUID.randomUUID());
 fs.makeQualified(partitionsPath);
 writePartitions(conf, partitionsPath, splitPoints, writeMultipleTables);
 fs.deleteOnExit(partitionsPath);
 // configure job to use it
 job.setPartitionerClass(TotalOrderPartitioner.class);
 TotalOrderPartitioner.setPartitionFile(conf, partitionsPath);
}

throws IOException {
 TableName tableName = TableName.valueOf(args[0]);
 conf.set(TABLE_NAME, tableName.getNameAsString());
 Path inputDir = new Path(args[1]);
 Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
 job.setJarByClass(Importer.class);
 FileInputFormat.setInputPaths(job, inputDir);
 job.setInputFormatClass(SequenceFileInputFormat.class);
 String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
   job.setMapperClass(CellSortImporter.class);
   job.setReducerClass(CellReducer.class);
   Path outputDir = new Path(hfileOutPath);
   FileOutputFormat.setOutputPath(job, outputDir);
   job.setMapOutputKeyClass(CellWritableComparable.class);
     RawComparator.class);
   Path partitionsPath =
     new Path(TotalOrderPartitioner.getPartitionFile(job.getConfiguration()));
   FileSystem fs = FileSystem.get(job.getConfiguration());
   fs.deleteOnExit(partitionsPath);
   job.setPartitionerClass(CellWritableComparablePartitioner.class);
   job.setNumReduceTasks(regionLocator.getStartKeys().length);

protected Job createJob(String[] args)
    throws Exception {
  Job job = new Job();
  job.setJobName(
      "ArchiveCDXGenerator" + "_" + System.currentTimeMillis());
  Configuration conf = job.getConfiguration();
    } else {
      job.setPartitionerClass(TotalOrderPartitioner.class);
      TotalOrderPartitioner.setPartitionFile(job.getConfiguration(),
          new Path(outputPath, "_partitions.lst"));
  FileSystem fs = input.getFileSystem(conf);
  FileStatus inputStatus = fs.getFileStatus(input);
  FileInputFormat.setMaxInputSplitSize(job, inputStatus.getLen()
      / getNumMapTasks(new Path(this.inputPath), conf));

try {
 this.conf = conf;
 String parts = getPartitionFile(conf);
 final Path partFile = new Path(parts);
 final FileSystem fs = (DEFAULT_PATH.equals(parts))
  ? FileSystem.getLocal(conf)     // assume in DistributedCache
  : partFile.getFileSystem(conf);
 Job job = Job.getInstance(conf);
 Class<K> keyClass = (Class<K>)job.getMapOutputKeyClass();
 K[] splitPoints = readPartitions(fs, partFile, keyClass, conf);
 if (splitPoints.length != job.getNumReduceTasks() - 1) {
  throw new IOException("Wrong number of partitions in keyset");
  conf.getBoolean(NATURAL_ORDER, true);
 if (natOrder && BinaryComparable.class.isAssignableFrom(keyClass)) {
  partitions = buildTrie((BinaryComparable[])splitPoints, 0,
    splitPoints.length, new byte[0],
    conf.getInt(MAX_TRIE_DEPTH, 200));
 } else {
  partitions = new BinarySearchNode(splitPoints, comparator);

public Job createSubmittableJob(String[] args) throws IOException {
 Path partitionsPath = new Path(destPath, PARTITIONS_FILE_NAME);
 generatePartitions(partitionsPath);
 Job job = Job.getInstance(getConf(),
    getConf().get("mapreduce.job.name", "hashTable_" + tableHash.tableName));
 Configuration jobConf = job.getConfiguration();
 jobConf.setLong(HASH_BATCH_SIZE_CONF_KEY, tableHash.batchSize);
 job.setJarByClass(HashTable.class);
 TableMapReduceUtil.initTableMapperJob(tableHash.tableName, tableHash.initScan(),
   HashMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
 // use a TotalOrderPartitioner and reducers to group region output into hash files
 job.setPartitionerClass(TotalOrderPartitioner.class);
 TotalOrderPartitioner.setPartitionFile(jobConf, partitionsPath);
 job.setReducerClass(Reducer.class);  // identity reducer
 job.setNumReduceTasks(tableHash.numHashFiles);
 job.setOutputKeyClass(ImmutableBytesWritable.class);
 job.setOutputValueClass(ImmutableBytesWritable.class);
 job.setOutputFormatClass(MapFileOutputFormat.class);
 FileOutputFormat.setOutputPath(job, new Path(destPath, HASH_DATA_DIR));
 return job;
}

throws IOException, ClassNotFoundException, InterruptedException {
LinkedList<K> splits = new LinkedList<K>();
Configuration conf = job.getConfiguration();
final InputFormat inf =
 ReflectionUtils.newInstance(job.getInputFormatClass(), conf);
int numPartitions = job.getNumReduceTasks();
K[] samples = (K[])sampler.getSample(inf, job);
LOG.info("Using " + samples.length + " samples");
RawComparator<K> comparator = (RawComparator<K>) job.getGroupingComparator();
Arrays.sort(samples, comparator);
Path dst = new Path(TotalOrderPartitioner.getPartitionFile(conf));
FileSystem fs = dst.getFileSystem(conf);
if (fs.exists(dst)) fs.delete(dst, false);
SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, dst, job.getMapOutputKeyClass(), NullWritable.class);
NullWritable nullValue = NullWritable.get();

public static Job createSubmittableJob(Configuration conf, String[] args)
 throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {
 Path inputDir = new Path(args[0]);
 Path outputDir = new Path(args[1]);
 boolean createPartitionFile = Boolean.parseBoolean(args[2]);
 Job job = Job.getInstance(conf,
  "Import delicious RSS feed into Hush tables.");
 job.setJarByClass(BulkImportJobExample.class);
 job.setInputFormatClass(TextInputFormat.class);
 job.getConfiguration().set("hfile.compression", "gz");
 Path partitionsPath = new Path(job.getWorkingDirectory(),
  "partitions_" + System.currentTimeMillis());
 TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), partitionsPath);

@SuppressWarnings("unchecked")
@Override
public int run(String[] args) throws Exception {
  Configuration conf = new Configuration();
  Path inputPath = new Path(args[0]);
  Path partitionFile = new Path(args[1] + "_partitions.lst");
  Path outputStage = new Path(args[1] + "_staging");
  Path outputOrder = new Path(args[1]);
  Job sampleJob = new Job(conf, "TotalOrderSortingStage");
  sampleJob.setJarByClass(TotalOrderSortingStage.class);
  sampleJob.setMapperClass(LastAccessMapper.class);
  sampleJob.setNumReduceTasks(0);
  sampleJob.setOutputKeyClass(Text.class);
    orderJob.setPartitionerClass(TotalOrderPartitioner.class);
    TotalOrderPartitioner.setPartitionFile(orderJob.getConfiguration(),
        partitionFile);
    orderJob.setOutputKeyClass(Text.class);
    orderJob.getConfiguration().set(
        "mapred.textoutputformat.separator", "");
  FileSystem.get(new Configuration()).delete(partitionFile, false);
  FileSystem.get(new Configuration()).delete(outputStage, true);
  return code;

public void testTotalOrderMemCmp() throws Exception {
 TotalOrderPartitioner<Text,NullWritable> partitioner =
  new TotalOrderPartitioner<Text,NullWritable>();
 Configuration conf = new Configuration();
 Path p = TestTotalOrderPartitioner.<Text>writePartitionFile(
   "totalordermemcmp", conf, splitStrings);
 conf.setClass(MRJobConfig.MAP_OUTPUT_KEY_CLASS, Text.class, Object.class);
 try {
  partitioner.setConf(conf);
  NullWritable nw = NullWritable.get();
  for (Check<Text> chk : testStrings) {
   assertEquals(chk.data.toString(), chk.part,
     partitioner.getPartition(chk.data, nw, splitStrings.length + 1));
  }
 } finally {
  p.getFileSystem(conf).delete(p, true);
 }
}

ClusterStatus cluster = client.getClusterStatus();
int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
String sort_reduces = conf.get(REDUCES_PER_HOST);
if (sort_reduces != null) {
  num_reduces = cluster.getTaskTrackers() * 
job = Job.getInstance(conf);
job.setJobName("sorter");
job.setJarByClass(Sort.class);
job.setMapperClass(Mapper.class);        
FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(1)));
 job.setPartitionerClass(TotalOrderPartitioner.class);
 Path inputDir = FileInputFormat.getInputPaths(job)[0];
 FileSystem fs = inputDir.getFileSystem(conf);
 inputDir = inputDir.makeQualified(fs.getUri(), fs.getWorkingDirectory());
 Path partitionFile = new Path(inputDir, "_sortPartitioning");
 TotalOrderPartitioner.setPartitionFile(conf, partitionFile);
 InputSampler.<K,V>writePartitionFile(job, sampler);
 URI partitionUri = new URI(partitionFile.toString() +

ClusterStatus cluster = client.getClusterStatus();
int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
String sort_reduces = conf.get(REDUCES_PER_HOST);
if (sort_reduces != null) {
  num_reduces = cluster.getTaskTrackers() * 
job = new Job(conf);
job.setJobName("sorter");
job.setJarByClass(Sort.class);
job.setMapperClass(Mapper.class);        
FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(1)));
 job.setPartitionerClass(TotalOrderPartitioner.class);
 Path inputDir = FileInputFormat.getInputPaths(job)[0];
 inputDir = inputDir.makeQualified(inputDir.getFileSystem(conf));
 Path partitionFile = new Path(inputDir, "_sortPartitioning");
 TotalOrderPartitioner.setPartitionFile(conf, partitionFile);
 InputSampler.<K,V>writePartitionFile(job, sampler);
 URI partitionUri = new URI(partitionFile.toString() +

/**
 * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
 * <code>splitPoints</code>. Cleans up the partitions file after job exists.
 */
static void configurePartitioner(Job job, Set<TableRowkeyPair> tablesStartKeys)
    throws IOException {
  
  Configuration conf = job.getConfiguration();
  // create the partitions file
  Path partitionsPath = new Path(conf.get("hadoop.tmp.dir"), "partitions_" + UUID.randomUUID());
  FileSystem fs = partitionsPath.getFileSystem(conf);
  fs.makeQualified(partitionsPath);
  writePartitions(conf, partitionsPath, tablesStartKeys);
  fs.deleteOnExit(partitionsPath);
  // configure job to use it
  job.setPartitionerClass(TotalOrderPartitioner.class);
  TotalOrderPartitioner.setPartitionFile(conf, partitionsPath);
}

private int createParitionFile(String sequenceFileInput, int regionCnt) throws IOException, ClassNotFoundException, InterruptedException {
  Path input = new Path(sequenceFileInput);
  Job sampler = new Job(getConf());
  sampler.setNumReduceTasks(regionCnt);
  sampler.setInputFormatClass(SequenceFileInputFormat.class);
  sampler.setOutputFormatClass(SequenceFileOutputFormat.class);
  sampler.setOutputKeyClass(BytesWritable.class);
  SequenceFileInputFormat.addInputPath(sampler, input);
  Configuration config = sampler.getConfiguration();
  InputSampler.Sampler<BytesWritable, NullWritable> inputSampler =
      new InputSampler.RandomSampler<BytesWritable, NullWritable>(
      config.getFloat(SAMPLER_FREQUENCY_KEY, SAMPLER_FREQUENCY_DEFAULT_VALUE),
      config.getInt(SAMPLER_NUM_SAMPLES_KEY, SAMPLER_NUM_SAMPLES_DEFAULT_VALUE));
  Path partitionFile = new Path(config.get(OUTPUT_KEYS_FILE_NAME_KEY, OUTPUT_KEYS_FILE_NAME_DEFAULT_VALUE));
  TotalOrderPartitioner.setPartitionFile(config, partitionFile);
  InputSampler.<BytesWritable, NullWritable>writePartitionFile(sampler, inputSampler);
  return 0;
}

Job job = Job.getInstance(getConf());
ArrayList<String> otherArgs = new ArrayList<String>();
Sampler<K,V> sampler = null;
 try {
  if ("-r".equals(args[i])) {
   job.setNumReduceTasks(Integer.parseInt(args[++i]));
  } else if ("-inFormat".equals(args[i])) {
   job.setInputFormatClass(
     Class.forName(args[++i]).asSubclass(InputFormat.class));
  } else if ("-keyClass".equals(args[i])) {
Path outf = new Path(otherArgs.remove(otherArgs.size() - 1));
TotalOrderPartitioner.setPartitionFile(getConf(), outf);
for (String s : otherArgs) {
 FileInputFormat.addInputPath(job, new Path(s));

/**
 * Confirm the absence of the {@link TotalOrderPartitioner} partitions file.
 */
protected static void validateDeletedPartitionsFile(Configuration conf) throws IOException {
 if (!conf.getBoolean(IntegrationTestingUtility.IS_DISTRIBUTED_CLUSTER, false))
  return;
 FileSystem fs = FileSystem.get(conf);
 Path partitionsFile = new Path(TotalOrderPartitioner.getPartitionFile(conf));
 assertFalse("Failed to clean up partitions file.", fs.exists(partitionsFile));
}

/**
 * Check if there's partition files for hfile, if yes replace the table splits, to make the job more reducers
 * @param conf the job configuration
 * @param path the hfile partition file
 * @throws IOException
 */
@SuppressWarnings("deprecation")
private void reconfigurePartitions(Configuration conf, Path path) throws IOException {
  FileSystem fs = path.getFileSystem(conf);
  if (fs.exists(path)) {
    try (SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf)) {
      int partitionCount = 0;
      Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
      Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
      while (reader.next(key, value)) {
        partitionCount++;
      }
      TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), path);
      // The reduce tasks should be one more than partition keys
      job.setNumReduceTasks(partitionCount + 1);
    }
  } else {
    logger.info("File '" + path.toString() + " doesn't exist, will not reconfigure hfile Partitions");
  }
}

private static Job createSubmittableJob(Configuration conf, String tableName, Path inputDir, Path scratchDir, boolean localMode)
 throws IOException {
 HBaseHCatStorageHandler.setHBaseSerializers(conf);
 Job job = new Job(conf, NAME + "_" + tableName);
 job.setJarByClass(SequenceFileImporter.class);
 FileInputFormat.setInputPaths(job, inputDir);
 job.setInputFormatClass(SequenceFileInputFormat.class);
 job.setMapperClass(SequenceFileImporter.class);
 URI partitionURI;
 try {
  partitionURI = new URI(TotalOrderPartitioner.getPartitionFile(job.getConfiguration())
    + "#" + TotalOrderPartitioner.DEFAULT_PATH);
 } catch (URISyntaxException e) {
  job.getConfiguration().set(TotalOrderPartitioner.PARTITIONER_PATH, partitionFile.toString());

private static <T extends WritableComparable<?>> Path writePartitionFile(
  String testname, Configuration conf, T[] splits) throws IOException {
 final FileSystem fs = FileSystem.getLocal(conf);
 final Path testdir = new Path(System.getProperty("test.build.data", "/tmp")
                ).makeQualified(fs);
 Path p = new Path(testdir, testname + "/_partition.lst");
 TotalOrderPartitioner.setPartitionFile(conf, p);
 conf.setInt(MRJobConfig.NUM_REDUCES, splits.length + 1);
 SequenceFile.Writer w = null;
 try {
  w = SequenceFile.createWriter(fs, conf, p,
    splits[0].getClass(), NullWritable.class,
    SequenceFile.CompressionType.NONE);
  for (int i = 0; i < splits.length; ++i) {
   w.append(splits[i], NullWritable.get());
  }
 } finally {
  if (null != w)
   w.close();
 }
 return p;
}

Job job = new Job(getConf());
ArrayList<String> otherArgs = new ArrayList<String>();
Sampler<K,V> sampler = null;
 try {
  if ("-r".equals(args[i])) {
   job.setNumReduceTasks(Integer.parseInt(args[++i]));
  } else if ("-inFormat".equals(args[i])) {
   job.setInputFormatClass(
     Class.forName(args[++i]).asSubclass(InputFormat.class));
  } else if ("-keyClass".equals(args[i])) {
Path outf = new Path(otherArgs.remove(otherArgs.size() - 1));
TotalOrderPartitioner.setPartitionFile(getConf(), outf);
for (String s : otherArgs) {
 FileInputFormat.addInputPath(job, new Path(s));

private int createParitionFile(String inputPath, String outputFile, float frequency, int samplesCnt) throws IOException, ClassNotFoundException, InterruptedException {
  Path input = new Path(inputPath);
  Job sampler = new Job(getConf());
  
  TextInputFormat.addInputPath(sampler, input);
  InputSampler.Sampler<LongWritable, Text> inputSampler =
      new InputSampler.RandomSampler<LongWritable, Text>(frequency, samplesCnt);
  Path partitionFile = new Path(outputFile);
  TotalOrderPartitioner.setPartitionFile(sampler.getConfiguration(), partitionFile);
  InputSampler.writePartitionFile(sampler, inputSampler);
  return 0;
}

Javadoc

Partitioner effecting a total order by reading split points from an externally generated source. BACKPORTED HERE AS WE RUN A REALLY OLD HADOOP - ANJ

Most used methods

setPartitionFile
Set the path to the SequenceFile storing the sorted partition keyset. It must be the case that for R
getPartitionFile
Get the path to the SequenceFile storing the sorted partition keyset.
setConf
Read in the partition file and build indexing data structures. If the keytype is org.apache.hadoop.i
LeafTrieNodeFactory
buildTrie
Given a sorted set of cut points, build a trie that will find the correct partition quickly.
buildTrieRec
This is the core of buildTrie. The interface, and stub, above, just adds an empty CarriedTrieNodeRef
readPartitions
Read the cut points from the given IFile.
<init>
getPartition

Popular in Java

Finding current android device location
notifyDataSetChanged (ArrayAdapter)
getContentResolver (Context)
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
Proxy (java.net)
This class represents proxy server settings. A created instance of Proxy stores a type and an addres
SocketException (java.net)
This SocketException may be thrown during socket creation or setting options, and is the superclass
SocketTimeoutException (java.net)
This exception is thrown when a timeout expired on a socket read or accept operation.
Format (java.text)
The base class for all formats. This is an abstract base class which specifies the protocol for clas
Locale (java.util)
Locale represents a language/country/variant combination. Locales are used to alter the presentatio
Option (scala)
Top Sublime Text plugins

How to useTotalOrderPartitioner in org.apache.hadoop.mapreduce.lib.partition

Best Java code snippets using org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner (Showing top 20 results out of 315)

Refine search

How to use
TotalOrderPartitioner
in
org.apache.hadoop.mapreduce.lib.partition