org.apache.hadoop.mapred.JobConf.setNumMapTasks java code examples

Refine search

HiveFileFormatUtils.prepareJobOutput(job);
job.setOutputFormat(HiveOutputFormatImpl.class);
job.setMapperClass(work.getMapperClass());
job.setMapOutputValueClass(NullWritable.class);
if(work.getNumMapTasks() != null) {
 job.setNumMapTasks(work.getNumMapTasks());
job.setNumReduceTasks(0);
 job.setInputFormat(JavaUtils.loadClass(inpFormat));
} catch (ClassNotFoundException e) {
 throw new RuntimeException(e.getMessage(), e);

public static JobConf configureJob(JobConf conf, String[] args) {
  conf.set(KEY_INPUT_FILE, args[0]) ;
  conf.set(KEY_LANG_FILE, args[1]) ;
  conf.set(KEY_LANG_CODE, args[2]) ;
  conf.set(KEY_SENTENCE_MODEL, args[3]) ;
  conf.set(KEY_OUTPUT_DIR, args[4]) ;
  //set a reasonable number of maps. This is going to be ignored for very large inputs (e.g. the en wiki dump) anyway. 
  conf.setNumMapTasks(16) ;
  
  //force one reducer by default. These don't take very long, and multiple reducers would make finalise file functions more complicated.  
  conf.setNumReduceTasks(1) ;
  //many of our tasks require pre-loading lots of data, may as well reuse this as much as we can.
  //conf.setNumTasksToExecutePerJvm(-1) ;
  //conf.setInt("mapred.tasktracker.map.tasks.maximum", 2) ;
  //conf.setInt("mapred.tasktracker.reduce.tasks.maximum", 1) ;
  
  //TODO: really don't want this hard coded.
  conf.set("mapred.child.java.opts", "-Xmx500M -Dapple.awt.UIElement=true") ;
  //conf.setBoolean("mapred.used.genericoptionsparser", true) ;
  return conf ;
}

private void configure(JobConf conf, Path inDir, Path outDir, String input,
            Class<? extends Mapper> map, 
            Class<? extends Reducer> reduce) 
throws IOException {
 // set up the input file system and write input text.
 FileSystem inFs = inDir.getFileSystem(conf);
 FileSystem outFs = outDir.getFileSystem(conf);
 outFs.delete(outDir, true);
 if (!inFs.mkdirs(inDir)) {
  throw new IOException("Mkdirs failed to create " + inDir.toString());
 }
 {
  // write input into input file
  DataOutputStream file = inFs.create(new Path(inDir, "part-0"));
  file.writeBytes(input);
  file.close();
 }
 // configure the mapred Job which creates a tempfile in map.
 conf.setJobName("testmap");
 conf.setMapperClass(map);
 conf.setReducerClass(reduce);
 conf.setNumMapTasks(1);
 conf.setNumReduceTasks(0);
 FileInputFormat.setInputPaths(conf, inDir);
 FileOutputFormat.setOutputPath(conf, outDir);
 String TEST_ROOT_DIR = new Path(System.getProperty("test.build.data",
                  "/tmp")).toString().replace(' ', '+');
 conf.set("test.build.data", TEST_ROOT_DIR);
}

static RunningJob runJob(JobConf conf, Path inDir, Path outDir, int numMaps, 
             int numReds) throws IOException {
 FileSystem fs = FileSystem.get(conf);
 if (fs.exists(outDir)) {
  fs.delete(outDir, true);
 }
 if (!fs.exists(inDir)) {
  fs.mkdirs(inDir);
 }
 String input = "The quick brown fox\n" + "has many silly\n"
   + "red fox sox\n";
 for (int i = 0; i < numMaps; ++i) {
  DataOutputStream file = fs.create(new Path(inDir, "part-" + i));
  file.writeBytes(input);
  file.close();
 }    
 conf.setInputFormat(TextInputFormat.class);
 conf.setOutputKeyClass(LongWritable.class);
 conf.setOutputValueClass(Text.class);
 FileInputFormat.setInputPaths(conf, inDir);
 FileOutputFormat.setOutputPath(conf, outDir);
 conf.setNumMapTasks(numMaps);
 conf.setNumReduceTasks(numReds);
 JobClient jobClient = new JobClient(conf);
 RunningJob job = jobClient.submitJob(conf);
 return job;
}

/**
 * When no input dir is specified, generate random data.
 */
protected static void confRandom(JobConf job)
  throws IOException {
 // from RandomWriter
 job.setInputFormat(RandomInputFormat.class);
 job.setMapperClass(RandomMapOutput.class);
 final ClusterStatus cluster = new JobClient(job).getClusterStatus();
 int numMapsPerHost = job.getInt(RandomTextWriter.MAPS_PER_HOST, 10);
 long numBytesToWritePerMap =
  job.getLong(RandomTextWriter.BYTES_PER_MAP, 1*1024*1024*1024);
 if (numBytesToWritePerMap == 0) {
  throw new IOException(
    "Cannot have " + RandomTextWriter.BYTES_PER_MAP + " set to 0");
 }
 long totalBytesToWrite = job.getLong(RandomTextWriter.TOTAL_BYTES,
    numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers());
 int numMaps = (int)(totalBytesToWrite / numBytesToWritePerMap);
 if (numMaps == 0 && totalBytesToWrite > 0) {
  numMaps = 1;
  job.setLong(RandomTextWriter.BYTES_PER_MAP, totalBytesToWrite);
 }
 job.setNumMapTasks(numMaps);
}

FileSystem fs = FileSystem.get(conf);
fs.delete(testdir, true);
conf.setInputFormat(SequenceFileInputFormat.class);
FileInputFormat.setInputPaths(conf, inDir);
FileOutputFormat.setOutputPath(conf, outDir);
conf.setMapOutputValueClass(IntWritable.class);
conf.setNumMapTasks(2);

  @Override
  public void configure(JobConf actionConf) throws OozieActionConfiguratorException {
    if (actionConf.getUser() == null) {
      throw new OozieActionConfiguratorException("No user set");
    }
    if (actionConf.get("examples.root") == null) {
      throw new OozieActionConfiguratorException("examples.root not set");
    }
    if (actionConf.get("output.dir.name") == null) {
      throw new OozieActionConfiguratorException("output.dir.name not set");
    }

    actionConf.setMapperClass(SampleMapper.class);
    actionConf.setReducerClass(SampleReducer.class);
    actionConf.setNumMapTasks(1);
    FileInputFormat.setInputPaths(actionConf,
        new Path("/user/" + actionConf.getUser() + "/" + actionConf.get("examples.root") + "/input-data/text"));
    FileOutputFormat.setOutputPath(actionConf,
        new Path("/user/" + actionConf.getUser() + "/" + actionConf.get("examples.root") + "/output-data/"
            + actionConf.get("output.dir.name")));
  }
}

HiveFileFormatUtils.prepareJobOutput(job);
job.setOutputFormat(HiveOutputFormatImpl.class);
job.setMapperClass(work.getMapperClass());
job.setMapOutputValueClass(NullWritable.class);
if(work.getNumMapTasks() != null) {
 job.setNumMapTasks(work.getNumMapTasks());
job.setNumReduceTasks(0);
 job.setInputFormat(JavaUtils.loadClass(inpFormat));
} catch (ClassNotFoundException e) {
 throw new RuntimeException(e.getMessage(), e);

/**
 * @param args
 */
@SuppressWarnings("deprecation")
public static void main(String[] args) {
  JobConf conf = new JobConf(HBitextCompiler.class);
  conf.set(OUTPUT_BASENAME, "/shared/bitexts/ep700k+nc.de-en/ep700k+nc");
  conf.set(FR_PATH, "filt.lc.de");
  conf.set(EN_PATH, "filt.lc.en");
  conf.set(AL_PATH, ""); ///user/redpony/model-5M/aligned.grow-diag-final");
  conf.setJobName("bitext.compile");
  conf.setOutputKeyClass(LongWritable.class);
  conf.setOutputValueClass(Text.class);
  conf.setMapperClass(BitextCompilerMapper.class); 
  conf.setNumMapTasks(1);
  conf.setNumReduceTasks(0);
  FileInputFormat.setInputPaths(conf, new Path("dummy"));
  try {
    FileSystem.get(conf).delete(new Path("dummy.out"));
    FileOutputFormat.setOutputPath(conf, new Path("dummy.out"));
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    JobClient.runJob(conf);
  } catch (IOException e) {
    System.err.println("Caught " + e);
    e.printStackTrace();
  }
}

conf.set(JTConfig.JT_IPC_ADDRESS, jobTracker);
conf.setJobName("wordcount");
conf.setInputFormat(TextInputFormat.class);
FileInputFormat.setInputPaths(conf, inDir);
FileOutputFormat.setOutputPath(conf, outDir);
conf.setNumMapTasks(numMaps);
conf.setNumReduceTasks(numReduces);

conf.setNumMapTasks(1);
conf.setNumReduceTasks(0);

/**
 * When no input dir is specified, generate random data.
 */
protected static void confRandom(JobConf job)
  throws IOException {
 // from RandomWriter
 job.setInputFormat(RandomInputFormat.class);
 job.setMapperClass(RandomMapOutput.class);
 final ClusterStatus cluster = new JobClient(job).getClusterStatus();
 int numMapsPerHost = job.getInt(RandomTextWriter.MAPS_PER_HOST, 10);
 long numBytesToWritePerMap =
  job.getLong(RandomTextWriter.BYTES_PER_MAP, 1*1024*1024*1024);
 if (numBytesToWritePerMap == 0) {
  throw new IOException(
    "Cannot have " + RandomTextWriter.BYTES_PER_MAP + " set to 0");
 }
 long totalBytesToWrite = job.getLong(RandomTextWriter.TOTAL_BYTES,
    numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers());
 int numMaps = (int)(totalBytesToWrite / numBytesToWritePerMap);
 if (numMaps == 0 && totalBytesToWrite > 0) {
  numMaps = 1;
  job.setLong(RandomTextWriter.BYTES_PER_MAP, totalBytesToWrite);
 }
 job.setNumMapTasks(numMaps);
}

HiveFileFormatUtils.prepareJobOutput(job);
job.setOutputFormat(HiveOutputFormatImpl.class);
job.setMapperClass(work.getMapperClass());
job.setMapOutputValueClass(NullWritable.class);
if(work.getNumMapTasks() != null) {
 job.setNumMapTasks(work.getNumMapTasks());
job.setNumReduceTasks(0);
 job.setInputFormat(JavaUtils.loadClass(inpFormat));
} catch (ClassNotFoundException e) {
 throw new RuntimeException(e.getMessage(), e);

  public static void main(String[] args) {
    JobConf conf = new JobConf(HSymAlign.class);
    conf.setJobName("alignment-sym");
    
    conf.setOutputKeyClass(IntWritable.class);            // the keys are words (strings)
    conf.setOutputValueClass(Text.class);   // the values are counts (ints)
      
    conf.setMapperClass(MapClass.class);
    conf.setReducerClass(Reduce.class);
        
    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(500);
    String filename="infiles";
    String outputPath="align";
    FileInputFormat.setInputPaths(conf, new Path(filename));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
          
    try{
      JobClient.runJob(conf);
    } catch (Exception e) {
      e.printStackTrace();
    }
  }
}

private static void distributedCopy(Path inputPath, Path outputPath,
  OperationsParams params) throws IOException {
 JobConf job = new JobConf(params, DistributedCopy.class);
 job.setJobName("distcp3");
 // Set input
 job.setInputFormat(BlockInputFormat.class);
 BlockInputFormat.addInputPath(job, inputPath);
 // Set output
 job.setOutputFormat(BlockOutputFormat.class);
 BlockOutputFormat.setOutputPath(job, outputPath);
 job.setOutputCommitter(BlockOutputCommitter.class);
 
 // Set number of mappers/reducers
 ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
 job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);
 job.setNumReduceTasks(0);
 
 // Run the job
 JobClient.runJob(job);
}

@Override
public JobConf createJobConf(final SampleDataForSplitPoints operation, final String mapperGeneratorClassName, final Store store) throws IOException {
  final JobConf jobConf = new JobConf(new Configuration());
  LOGGER.info("Setting up job conf");
  jobConf.set(SCHEMA, new String(store.getSchema().toCompactJson(), CommonConstants.UTF_8));
  LOGGER.info("Added {} {} to job conf", SCHEMA, new String(store.getSchema().toCompactJson(), CommonConstants.UTF_8));
  jobConf.set(MAPPER_GENERATOR, mapperGeneratorClassName);
  LOGGER.info("Added {} of {} to job conf", MAPPER_GENERATOR, mapperGeneratorClassName);
  jobConf.set(VALIDATE, String.valueOf(operation.isValidate()));
  LOGGER.info("Added {} option of {} to job conf", VALIDATE, operation.isValidate());
  jobConf.set(PROPORTION_TO_SAMPLE, String.valueOf(operation.getProportionToSample()));
  LOGGER.info("Added {} option of {} to job conf", PROPORTION_TO_SAMPLE, String.valueOf(operation.getProportionToSample()));
  final Integer numTasks = operation.getNumMapTasks();
  if (null != numTasks) {
    jobConf.setNumMapTasks(numTasks);
    LOGGER.info("Set number of map tasks to {} on job conf", numTasks);
  }
  jobConf.setNumReduceTasks(1);
  LOGGER.info("Set number of reduce tasks to 1 on job conf");
  jobConf.set(AccumuloStoreConstants.ACCUMULO_ELEMENT_CONVERTER_CLASS,
      ((AccumuloStore) store).getKeyPackage().getKeyConverter().getClass().getName());
  return jobConf;
}

 /**
  * Runs the demo.
  */
 public static void main(String[] args) throws IOException {
  JobConf conf = new JobConf(DemoMapredNullInput.class);
  conf.setJobName("DemoMapredNullInput");

  conf.setNumMapTasks(10);
  conf.setNumReduceTasks(0);

  conf.setInputFormat(NullInputFormat.class);
  conf.setOutputFormat(NullOutputFormat.class);
  conf.setMapperClass(MyMapper.class);

  JobClient.runJob(conf);
 }
}

/**
 * Creates a simple copy job.
 * 
 * @param indirs List of input directories.
 * @param outdir Output directory.
 * @return JobConf initialised for a simple copy job.
 * @throws Exception If an error occurs creating job configuration.
 */
static JobConf createCopyJob(List<Path> indirs, Path outdir) throws Exception {
 Configuration defaults = new Configuration();
 JobConf theJob = new JobConf(defaults, TestJobControl.class);
 theJob.setJobName("DataMoveJob");
 FileInputFormat.setInputPaths(theJob, indirs.toArray(new Path[0]));
 theJob.setMapperClass(DataCopy.class);
 FileOutputFormat.setOutputPath(theJob, outdir);
 theJob.setOutputKeyClass(Text.class);
 theJob.setOutputValueClass(Text.class);
 theJob.setReducerClass(DataCopy.class);
 theJob.setNumMapTasks(12);
 theJob.setNumReduceTasks(4);
 return theJob;
}

static RunningJob runJob(JobConf conf, Path inDir, Path outDir, int numMaps, 
             int numReds, String input) throws IOException {
 FileSystem fs = FileSystem.get(conf);
 if (fs.exists(outDir)) {
  fs.delete(outDir, true);
 }
 if (!fs.exists(inDir)) {
  fs.mkdirs(inDir);
 }
 
 for (int i = 0; i < numMaps; ++i) {
  DataOutputStream file = fs.create(new Path(inDir, "part-" + i));
  file.writeBytes(input);
  file.close();
 }    
 conf.setInputFormat(TextInputFormat.class);
 conf.setOutputKeyClass(LongWritable.class);
 conf.setOutputValueClass(Text.class);
 FileInputFormat.setInputPaths(conf, inDir);
 FileOutputFormat.setOutputPath(conf, outDir);
 conf.setNumMapTasks(numMaps);
 conf.setNumReduceTasks(numReds);
 JobClient jobClient = new JobClient(conf);
 RunningJob job = jobClient.submitJob(conf);
 return job;
}

private void testKilledJob(JobConf job, MyListener myListener) 
throws IOException {
 LOG.info("Testing job-kill");
 
 Path inDir = new Path(TEST_ROOT_DIR + "/jiplistenerkilljob/input");
 Path outDir = new Path(TEST_ROOT_DIR + "/jiplistenerkilljob/output");
 job.setNumMapTasks(1);
 job.setNumReduceTasks(0);
 
 // submit and kill the job   
 RunningJob rJob = UtilsForTests.runJobKill(job, inDir, outDir);
 JobID id = rJob.getID();
 // check if the job failure was notified
 assertFalse("Missing event notification on killing a running job", 
       myListener.contains(id));
 // check if killed
 assertEquals("Job failed!", JobStatus.KILLED, rJob.getJobState());
}

Javadoc

Set the number of map tasks for this job.

Note: This is only a hint to the framework. The actual number of spawned map tasks depends on the number of InputSplits generated by the job's InputFormat#getSplits(JobConf,int). A custom InputFormat is typically used to accurately control the number of map tasks for the job.

How many maps?

The number of maps is usually driven by the total size of the inputs i.e. total number of blocks of the input files.

The right level of parallelism for maps seems to be around 10-100 maps per-node, although it has been set up to 300 or so for very cpu-light map tasks. Task setup takes awhile, so it is best if the maps take at least a minute to execute.

The default behavior of file-based InputFormats is to split the input into logical InputSplits based on the total size, in bytes, of input files. However, the FileSystem blocksize of the input files is treated as an upper bound for input splits. A lower bound on the split size can be set via mapred.min.split.size.

Thus, if you expect 10TB of input data and have a blocksize of 128MB, you'll end up with 82,000 maps, unless #setNumMapTasks(int) is used to set it even higher.

Popular methods of JobConf

<init>
A new map/reduce configuration where the behavior of reading from the default resources can be turne
set
get
setInputFormat
Set the InputFormat implementation for the map-reduce job.
setOutputFormat
Set the OutputFormat implementation for the map-reduce job.
getInt
setMapperClass
Set the Mapper class for the job.
setOutputKeyClass
Set the key class for the job output data.
setOutputValueClass
Set the value class for job outputs.
setReducerClass
Set the Reducer class for the job.
setNumReduceTasks
Set the requisite number of reduce tasks for this job.HOW MANY REDUCES? The right number of reduces
setBoolean

Popular in Java

Start an intent from android
putExtra (Intent)
onCreateOptionsMenu (Activity)
findViewById (Activity)
SocketException (java.net)
This SocketException may be thrown during socket creation or setting options, and is the superclass
SocketTimeoutException (java.net)
This exception is thrown when a timeout expired on a socket read or accept operation.
LoggerFactory (org.slf4j)
The LoggerFactory is a utility class producing Loggers for various logging APIs, most notably for lo
Reference (javax.naming)
JTextField (javax.swing)
Get (org.apache.hadoop.hbase.client)
Used to perform Get operations on a single row. To get everything for a row, instantiate a Get objec
Github Copilot alternatives

How to use setNumMapTasksmethodin org.apache.hadoop.mapred.JobConf

Best Java code snippets using org.apache.hadoop.mapred.JobConf.setNumMapTasks (Showing top 20 results out of 414)

Refine search

How to use
setNumMapTasks
method
in
org.apache.hadoop.mapred.JobConf