org.apache.hadoop.mapreduce.Job.setOutputKeyClass java code examples

Refine search

public static void setHadoopConfForCuboid(Job job, CubeSegment segment, String metaUrl) throws Exception {
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
}

protected void configureReducer(Job job) throws IOException {
 job.setOutputFormatClass(AvroKeyCompactorOutputFormat.class);
 job.setReducerClass(AvroKeyDedupReducer.class);
 job.setOutputKeyClass(AvroKey.class);
 job.setOutputValueClass(NullWritable.class);
 setNumberOfReducers(job);
}

  private void setupReducer(Path output, CubeSegment cubeSeg) throws IOException {
    int hllShardBase = MapReduceUtil.getCuboidHLLCounterReducerNum(cubeSeg.getCubeInstance());

    job.setReducerClass(CalculateStatsFromBaseCuboidReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(hllShardBase);

    FileOutputFormat.setOutputPath(job, output);
    job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString());

    deletePath(job.getConfiguration(), output);
  }
}

private void setupMapper(CubeSegment cubeSeg) throws IOException {
  // set the segment's offset info to job conf
  Map<Integer, Long> offsetStart = cubeSeg.getSourcePartitionOffsetStart();
  Map<Integer, Long> offsetEnd = cubeSeg.getSourcePartitionOffsetEnd();
  Integer minPartition = Collections.min(offsetStart.keySet());
  Integer maxPartition = Collections.max(offsetStart.keySet());
  job.getConfiguration().set(CONFIG_KAFKA_PARITION_MIN, minPartition.toString());
  job.getConfiguration().set(CONFIG_KAFKA_PARITION_MAX, maxPartition.toString());
  for(Integer partition: offsetStart.keySet()) {
    job.getConfiguration().set(CONFIG_KAFKA_PARITION_START + partition, offsetStart.get(partition).toString());
    job.getConfiguration().set(CONFIG_KAFKA_PARITION_END + partition, offsetEnd.get(partition).toString());
  }
  job.setMapperClass(KafkaFlatTableMapper.class);
  job.setInputFormatClass(KafkaInputFormat.class);
  job.setOutputKeyClass(BytesWritable.class);
  job.setOutputValueClass(Text.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  job.setNumReduceTasks(0);
}

private Job getVertexJobWithDefaultMapper(org.apache.hadoop.conf.Configuration c) throws IOException {
  Job job = Job.getInstance(c);
  job.setJarByClass(HadoopScanMapper.class);
  job.setJobName("testPartitionedVertexScan");
  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(NullWritable.class);
  job.setMapOutputKeyClass(NullWritable.class);
  job.setMapOutputValueClass(NullWritable.class);
  job.setNumReduceTasks(0);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setInputFormatClass(CassandraInputFormat.class);
  return job;
}

FileOutputFormat.setOutputPath(job, outputDir);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);

public static void main(String[] args) throws IOException,
   InterruptedException, ClassNotFoundException {
 Configuration conf = new Configuration();
 Job job = new Job(conf);
 job.setJobName("Convert Text");
 job.setJarByClass(Mapper.class);
 job.setMapperClass(Mapper.class);
 job.setReducerClass(Reducer.class);
 // increase if you need sorting or a special number of files
 job.setNumReduceTasks(0);
 job.setOutputKeyClass(LongWritable.class);
 job.setOutputValueClass(Text.class);
 job.setOutputFormatClass(SequenceFileOutputFormat.class);
 job.setInputFormatClass(TextInputFormat.class);
 TextInputFormat.addInputPath(job, new Path("/lol"));
 SequenceFileOutputFormat.setOutputPath(job, new Path("/lolz"));
 // submit and wait for completion
 job.waitForCompletion(true);
 }

HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
job.setOutputFormatClass(TableOutputFormat.class);
if (reducer != null) job.setReducerClass(reducer);
conf.set(TableOutputFormat.OUTPUT_TABLE, table);
conf.setStrings("io.serializations", conf.get("io.serializations"),
 conf.set(TableOutputFormat.REGION_SERVER_IMPL, serverImpl);
job.setOutputKeyClass(ImmutableBytesWritable.class);
job.setOutputValueClass(Writable.class);
if (partitioner == HRegionPartitioner.class) {
 job.setPartitionerClass(HRegionPartitioner.class);

public int runGenerator(int numMappers, long numNodes, Path tmpOutput,
  Integer width, Integer wrapMultiplier, Integer numWalkers)
  throws Exception {
 LOG.info("Running Generator with numMappers=" + numMappers +", numNodes=" + numNodes);
 createSchema();
 job = Job.getInstance(getConf());
 job.setJobName("Link Generator");
 job.setNumReduceTasks(0);
 job.setJarByClass(getClass());
 FileInputFormat.setInputPaths(job, tmpOutput);
 job.setInputFormatClass(OneFilePerMapperSFIF.class);
 job.setOutputKeyClass(NullWritable.class);
 job.setOutputValueClass(NullWritable.class);
 setJobConf(job, numMappers, numNodes, width, wrapMultiplier, numWalkers);
 setMapperForGenerator(job);
 job.setOutputFormatClass(NullOutputFormat.class);
 job.getConfiguration().setBoolean("mapreduce.map.speculative", false);
 TableMapReduceUtil.addDependencyJars(job);
 TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
                         AbstractHBaseTool.class);
 TableMapReduceUtil.initCredentials(job);
 boolean success = jobCompletion(job);
 return success ? 0 : 1;
}

/**
 * Add a OutputFormat configuration to the Job with a alias name.
 *
 * @param alias the name to be given to the OutputFormat configuration
 * @param outputFormatClass OutputFormat class
 * @param keyClass the key class for the output data
 * @param valueClass the value class for the output data
 * @throws IOException
 */
public void addOutputFormat(String alias,
  Class<? extends OutputFormat> outputFormatClass,
  Class<?> keyClass, Class<?> valueClass) throws IOException {
 Job copy = new Job(this.job.getConfiguration());
 outputConfigs.put(alias, copy);
 copy.setOutputFormatClass(outputFormatClass);
 copy.setOutputKeyClass(keyClass);
 copy.setOutputValueClass(valueClass);
}

public static void main(String[] args) throws Exception {
 CommandLine cli = StressTestUtils.parseCommandLine(OPTIONS, args);
 Configuration configuration = new Configuration();
 if (cli.hasOption(THROTTLING_SERVER_URI.getOpt())) {
  configuration.setBoolean(USE_THROTTLING_SERVER, true);
  String resourceLimited = cli.getOptionValue(RESOURCE_ID_OPT.getOpt(), "MRStressTest");
  configuration.set(RESOURCE_ID, resourceLimited);
  configuration.set(
    BrokerConfigurationKeyGenerator.generateKey(new SharedRestClientFactory(),
      new SharedRestClientKey(RestliLimiterFactory.RESTLI_SERVICE_NAME),
      null, SharedRestClientFactory.SERVER_URI_KEY), cli.getOptionValue(THROTTLING_SERVER_URI.getOpt()));
 }
 if (cli.hasOption(LOCAL_QPS_OPT.getOpt())) {
  configuration .set(LOCALLY_ENFORCED_QPS, cli.getOptionValue(LOCAL_QPS_OPT.getOpt()));
 }
 Job job = Job.getInstance(configuration, "ThrottlingStressTest");
 job.getConfiguration().setBoolean("mapreduce.job.user.classpath.first", true);
 job.getConfiguration().setBoolean("mapreduce.map.speculative", false);
 job.getConfiguration().set(NUM_MAPPERS, cli.getOptionValue(NUM_MAPPERS_OPT.getOpt(), DEFAULT_MAPPERS));
 StressTestUtils.populateConfigFromCli(job.getConfiguration(), cli);
 job.setJarByClass(MRStressTest.class);
 job.setMapperClass(StresserMapper.class);
 job.setReducerClass(AggregatorReducer.class);
 job.setInputFormatClass(MyInputFormat.class);
 job.setOutputKeyClass(LongWritable.class);
 job.setOutputValueClass(DoubleWritable.class);
 FileOutputFormat.setOutputPath(job, new Path("/tmp/MRStressTest" + System.currentTimeMillis()));
 System.exit(job.waitForCompletion(true) ? 0 : 1);
}

static void configureIncrementalLoad(Job job, HTableDescriptor tableDescriptor, RegionLocator regionLocator,
    Class<? extends OutputFormat<?, ?>> cls) throws IOException, UnsupportedEncodingException {
  Configuration conf = job.getConfiguration();
  job.setOutputKeyClass(ImmutableBytesWritable.class);
  job.setOutputValueClass(KeyValue.class);
  job.setOutputFormatClass(cls);
    job.setReducerClass(KeyValueSortReducer.class);
  } else if (Put.class.equals(job.getMapOutputValueClass())) {
    job.setReducerClass(PutSortReducer.class);
  } else if (Text.class.equals(job.getMapOutputValueClass())) {
    job.setReducerClass(TextSortReducer.class);
  } else {
    LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());

public static void configureIncrementalLoadMap(Job job, TableDescriptor tableDescriptor) throws
  IOException {
 Configuration conf = job.getConfiguration();
 job.setOutputKeyClass(ImmutableBytesWritable.class);
 job.setOutputValueClass(MapReduceExtendedCell.class);
 job.setOutputFormatClass(HFileOutputFormat2.class);
 ArrayList<TableDescriptor> singleTableDescriptor = new ArrayList<>(1);
 singleTableDescriptor.add(tableDescriptor);
 conf.set(OUTPUT_TABLE_NAME_CONF_KEY, tableDescriptor.getTableName().getNameAsString());
 // Set compression algorithms based on column families
 conf.set(COMPRESSION_FAMILIES_CONF_KEY,
   serializeColumnFamilyAttribute(compressionDetails, singleTableDescriptor));
 conf.set(BLOCK_SIZE_FAMILIES_CONF_KEY,
   serializeColumnFamilyAttribute(blockSizeDetails, singleTableDescriptor));
 conf.set(BLOOM_TYPE_FAMILIES_CONF_KEY,
   serializeColumnFamilyAttribute(bloomTypeDetails, singleTableDescriptor));
 conf.set(BLOOM_PARAM_FAMILIES_CONF_KEY,
   serializeColumnFamilyAttribute(bloomParamDetails, singleTableDescriptor));
 conf.set(DATABLOCK_ENCODING_FAMILIES_CONF_KEY,
   serializeColumnFamilyAttribute(dataBlockEncodingDetails, singleTableDescriptor));
 TableMapReduceUtil.addDependencyJars(job);
 TableMapReduceUtil.initCredentials(job);
 LOG.info("Incremental table " + tableDescriptor.getTableName() + " output configured.");
}

job.setMapperClass(Mapper.class);
job.setReducerClass(Reducer.class);
job.setJarByClass(Mapper.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.waitForCompletion(true);

static void configureIncrementalLoad(Job job, List<TableInfo> multiTableInfo,
  Class<? extends OutputFormat<?, ?>> cls) throws IOException {
 Configuration conf = job.getConfiguration();
 job.setOutputKeyClass(ImmutableBytesWritable.class);
 job.setOutputValueClass(MapReduceExtendedCell.class);
 job.setOutputFormatClass(cls);
  job.setReducerClass(CellSortReducer.class);
 } else if (Put.class.equals(job.getMapOutputValueClass())) {
  job.setReducerClass(PutSortReducer.class);
 } else if (Text.class.equals(job.getMapOutputValueClass())) {
  job.setReducerClass(TextSortReducer.class);
 } else {
  LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());

public static void configureIncrementalLoadMap(Job job, Table table) throws IOException {
  Configuration conf = job.getConfiguration();
  job.setOutputKeyClass(ImmutableBytesWritable.class);
  job.setOutputValueClass(KeyValue.class);
  job.setOutputFormatClass(HFileOutputFormat3.class);
  // Set compression algorithms based on column families
  configureCompression(conf, table.getTableDescriptor());
  configureBloomType(table.getTableDescriptor(), conf);
  configureBlockSize(table.getTableDescriptor(), conf);
  HTableDescriptor tableDescriptor = table.getTableDescriptor();
  configureDataBlockEncoding(tableDescriptor, conf);
  TableMapReduceUtil.addDependencyJars(job);
  TableMapReduceUtil.initCredentials(job);
  LOG.info("Incremental table " + table.getName() + " output configured.");
}

/**
 * Sets up the actual job.
 *
 * @param conf The current configuration.
 * @param args The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args)
  throws IOException {
 String tableName = args[0];
 Path outputDir = new Path(args[1]);
 String reportSeparatorString = (args.length > 2) ? args[2]: ":";
 conf.set("ReportSeparator", reportSeparatorString);
 Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
 job.setJarByClass(CellCounter.class);
 Scan scan = getConfiguredScanForJob(conf, args);
 TableMapReduceUtil.initTableMapperJob(tableName, scan,
   CellCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
 job.setNumReduceTasks(1);
 job.setMapOutputKeyClass(Text.class);
 job.setMapOutputValueClass(IntWritable.class);
 job.setOutputFormatClass(TextOutputFormat.class);
 job.setOutputKeyClass(Text.class);
 job.setOutputValueClass(IntWritable.class);
 FileOutputFormat.setOutputPath(job, outputDir);
 job.setReducerClass(IntSumReducer.class);
 return job;
}

job.setJarByClass(MapReduceIntegrationChecker.class);
job.setMapperClass(CheckerMapper.class);
job.setCombinerClass(CheckerReducer.class);
job.setReducerClass(CheckerReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(EmptyInputFormat.class);
FileOutputFormat.setOutputPath(job, mOutputFilePath);
 if (!job.waitForCompletion(true)) {
  return 1;

job.setJarByClass(mapperClass);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(NullWritable.class);
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(NullWritable.class);
job.setNumReduceTasks(0);
job.setMapperClass(mapperClass);
job.setOutputFormatClass(NullOutputFormat.class);
job.setInputFormatClass(inputFormat);
boolean success = job.waitForCompletion(true);

public Job createSubmittableJob(String[] args) throws IOException {
 Path partitionsPath = new Path(destPath, PARTITIONS_FILE_NAME);
 generatePartitions(partitionsPath);
 Job job = Job.getInstance(getConf(),
    getConf().get("mapreduce.job.name", "hashTable_" + tableHash.tableName));
 Configuration jobConf = job.getConfiguration();
 jobConf.setLong(HASH_BATCH_SIZE_CONF_KEY, tableHash.batchSize);
 job.setJarByClass(HashTable.class);
 TableMapReduceUtil.initTableMapperJob(tableHash.tableName, tableHash.initScan(),
   HashMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
 // use a TotalOrderPartitioner and reducers to group region output into hash files
 job.setPartitionerClass(TotalOrderPartitioner.class);
 TotalOrderPartitioner.setPartitionFile(jobConf, partitionsPath);
 job.setReducerClass(Reducer.class);  // identity reducer
 job.setNumReduceTasks(tableHash.numHashFiles);
 job.setOutputKeyClass(ImmutableBytesWritable.class);
 job.setOutputValueClass(ImmutableBytesWritable.class);
 job.setOutputFormatClass(MapFileOutputFormat.class);
 FileOutputFormat.setOutputPath(job, new Path(destPath, HASH_DATA_DIR));
 return job;
}

Javadoc

Set the key class for the job output data.

Popular methods of Job

getConfiguration
setMapperClass
Set the Mapper for the job.
waitForCompletion
Submit the job to the cluster and wait for it to finish.
setInputFormatClass
Set the InputFormat for the job.
setJarByClass
Set the Jar by finding where a given class came from.
setOutputFormatClass
Set the OutputFormat for the job.
setOutputValueClass
Set the value class for job outputs.
setNumReduceTasks
Set the number of reduce tasks for the job.
setReducerClass
Set the Reducer for the job.
setMapOutputKeyClass
Set the key class for the map output data. This allows the user to specify the map output key class
setMapOutputValueClass
Set the value class for the map output data. This allows the user to specify the map output value cl
<init>

Popular in Java

Running tasks concurrently on multiple threads
scheduleAtFixedRate (ScheduledExecutorService)
setContentView (Activity)
getSharedPreferences (Context)
SocketException (java.net)
This SocketException may be thrown during socket creation or setting options, and is the superclass
ConcurrentHashMap (java.util.concurrent)
A plug-in replacement for JDK1.5 java.util.concurrent.ConcurrentHashMap. This version is based on or
Pattern (java.util.regex)
Patterns are compiled regular expressions. In many cases, convenience methods such as String#matches
ZipFile (java.util.zip)
This class provides random read access to a zip file. You pay more to read the zip file's central di
Logger (org.apache.log4j)
This is the central class in the log4j package. Most logging operations, except configuration, are d
Annotation (javassist.bytecode.annotation)
The annotation structure.An instance of this class is returned bygetAnnotations() in AnnotationsAttr
Github Copilot alternatives

How to use setOutputKeyClassmethodin org.apache.hadoop.mapreduce.Job

Best Java code snippets using org.apache.hadoop.mapreduce.Job.setOutputKeyClass (Showing top 20 results out of 1,899)

Refine search

How to use
setOutputKeyClass
method
in
org.apache.hadoop.mapreduce.Job