org.apache.hadoop.mapred.JobClient.runJob java code examples

Refine search

public int run(String[] args) throws Exception {
  if(args.length != 3)
    Utils.croak("USAGE: GenerateData input-file output-dir value-size");
  JobConf conf = new JobConf(getConf(), GenerateData.class);
  conf.setJobName("generate-data");
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(IntWritable.class);
  conf.setMapperClass(GenerateDataMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  conf.setNumReduceTasks(0);
  conf.setInputFormat(TextInputFormat.class);
  conf.setOutputFormat(SequenceFileOutputFormat.class);
  conf.setOutputKeyClass(BytesWritable.class);
  conf.setOutputValueClass(BytesWritable.class);
  Path inputPath = new Path(args[0]);
  FileInputFormat.setInputPaths(conf, inputPath);
  Path outputPath = new Path(args[1]);
  // delete output path if it already exists
  FileSystem fs = outputPath.getFileSystem(conf);
  if(fs.exists(outputPath))
    fs.delete(outputPath, true);
  FileOutputFormat.setOutputPath(conf, outputPath);
  conf.setInt("value.size", Integer.parseInt(args[2]));
  JobClient.runJob(conf);
  return 0;
}

@Test
public void testNonAvroMapper() throws Exception {
 JobConf job = new JobConf();
 Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath());
 outputPath.getFileSystem(job).delete(outputPath);
 // configure input for non-Avro sequence file
 job.setInputFormat(SequenceFileInputFormat.class);
 FileInputFormat.setInputPaths(job, file().toURI().toString());
 // use a hadoop mapper that emits Avro output
 job.setMapperClass(NonAvroMapper.class);
 // reducer is default, identity
 // configure output for avro
 FileOutputFormat.setOutputPath(job, outputPath);
 AvroJob.setOutputSchema(job, SCHEMA);
 JobClient.runJob(job);
 checkFile(new DataFileReader<>
      (new File(outputPath.toString() + "/part-00000.avro"),
       new SpecificDatumReader<>()));
}

@Test
/**
 * Run the identity job on a "bytes" Avro file using AvroAsTextInputFormat
 * and AvroTextOutputFormat to produce a sorted "bytes" Avro file.
 */
public void testSort() throws Exception {
 JobConf job = new JobConf();
 String inputPath = INPUT_DIR.getRoot().getPath();
 Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath());
 outputPath.getFileSystem(job).delete(outputPath);
 WordCountUtil.writeLinesBytesFile(inputPath);
 job.setInputFormat(AvroAsTextInputFormat.class);
 job.setOutputFormat(AvroTextOutputFormat.class);
 job.setOutputKeyClass(Text.class);
 FileInputFormat.setInputPaths(job, new Path(inputPath));
 FileOutputFormat.setOutputPath(job, outputPath);
 JobClient.runJob(job);
 WordCountUtil.validateSortedFile(outputPath.toString() + "/part-00000.avro");
}

void testInputFormat(Class<? extends InputFormat> clazz) throws IOException {
 Configuration conf = UTIL.getConfiguration();
 final JobConf job = new JobConf(conf);
 job.setInputFormat(clazz);
 job.setOutputFormat(NullOutputFormat.class);
 job.setMapperClass(ExampleVerifier.class);
 job.setNumReduceTasks(0);
 LOG.debug("submitting job.");
 final RunningJob run = JobClient.runJob(job);
 assertTrue("job failed!", run.isSuccessful());
 assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, run.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getCounter());
 assertEquals("Saw any instances of the filtered out row.", 0, run.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getCounter());
 assertEquals("Saw the wrong number of instances of columnA.", 1, run.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getCounter());
 assertEquals("Saw the wrong number of instances of columnB.", 1, run.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getCounter());
 assertEquals("Saw the wrong count of values for the filtered-for row.", 2, run.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getCounter());
 assertEquals("Saw the wrong count of values for the filtered-out row.", 0, run.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getCounter());
}

private static void runIOTest( Class<? extends Mapper> mapperClass, 
                Path outputDir
                ) throws IOException {
 JobConf job = new JobConf(fsConfig, TestDFSIO.class);
 FileInputFormat.setInputPaths(job, CONTROL_DIR);
 job.setInputFormat(SequenceFileInputFormat.class);
 job.setMapperClass(mapperClass);
 job.setReducerClass(AccumulatingReducer.class);
 FileOutputFormat.setOutputPath(job, outputDir);
 job.setOutputKeyClass(Text.class);
 job.setOutputValueClass(Text.class);
 job.setNumReduceTasks(1);
 JobClient.runJob(job);
}

@Test
public void testNonAvroMapOnly() throws Exception {
 JobConf job = new JobConf();
 Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath());
 outputPath.getFileSystem(job).delete(outputPath);
 // configure input for non-Avro sequence file
 job.setInputFormat(SequenceFileInputFormat.class);
 FileInputFormat.setInputPaths(job, file().toURI().toString());
 // use a hadoop mapper that emits Avro output
 job.setMapperClass(NonAvroOnlyMapper.class);
 // configure output for avro
 job.setNumReduceTasks(0);                     // map-only
 FileOutputFormat.setOutputPath(job, outputPath);
 AvroJob.setOutputSchema(job, SCHEMA);
 JobClient.runJob(job);
 checkFile(new DataFileReader<>
      (new File(outputPath.toString() + "/part-00000.avro"),
       new SpecificDatumReader<>()));
}

 /**
  * Runs the demo.
  */
 public static void main(String[] args) throws IOException {
  JobConf conf = new JobConf(DemoMapredNullInput.class);
  conf.setJobName("DemoMapredNullInput");

  conf.setNumMapTasks(10);
  conf.setNumReduceTasks(0);

  conf.setInputFormat(NullInputFormat.class);
  conf.setOutputFormat(NullOutputFormat.class);
  conf.setMapperClass(MyMapper.class);

  JobClient.runJob(conf);
 }
}

protected static void runIOTest( @SuppressWarnings("rawtypes") Class<? extends Mapper> mapperClass,
                Path outputDir,
                JobConf job
                ) throws IOException {
 FileInputFormat.setInputPaths(job, DfsioeConfig.getInstance().getControlDir(fsConfig));
 job.setInputFormat(SequenceFileInputFormat.class);
 job.setMapperClass(mapperClass);
 job.setReducerClass(AccumulatingReducer.class);
 FileOutputFormat.setOutputPath(job, outputDir);
 job.setOutputKeyClass(Text.class);
 job.setOutputValueClass(Text.class);
 job.setNumReduceTasks(1);
 JobClient.runJob(job);
}

 @Test
  public void testJob() throws Exception {
  JobConf job = new JobConf();
  Path outputPath = new Path(DIR.getRoot().getPath() + "/out");
  outputPath.getFileSystem(job).delete(outputPath);

  job.setInputFormat(TextInputFormat.class);
  FileInputFormat.setInputPaths(job, DIR.getRoot().getPath() + "/in");

  job.setMapperClass(AvroTestConverter.class);
  job.setNumReduceTasks(0);

  FileOutputFormat.setOutputPath(job, outputPath);
  System.out.println(createSchema());
  AvroJob.setOutputSchema(job,
              Pair.getPairSchema(Schema.create(Schema.Type.LONG),
                        createSchema()));
  job.setOutputFormat(AvroOutputFormat.class);

  JobClient.runJob(job);
 }
}

public long produceSamples(Path samplePath, boolean textOutput) throws Exception {
  Path input = new Path(samplePath.toString() + "-seeds");
  this.numSamples = writeSeeds(input);
  LOG.info("Generating " + this.numSamples + " of samples");
  JobConf jobConf = getJobConf();
  jobConf.set("genkmeansdataset.dimensions", Integer.toString(dimension));
  FileInputFormat.setInputPaths(jobConf, input);
  FileOutputFormat.setOutputPath(jobConf, samplePath);
  jobConf.setMapperClass(MapClass.class);
  if (textOutput){
    jobConf.setInputFormat(SequenceFileInputFormat.class);
    jobConf.setOutputFormat(TextOutputFormat.class);
    jobConf.setOutputKeyClass(LongWritable.class);
    jobConf.setOutputValueClass(VectorWritable.class);
  } else {
    jobConf.setInputFormat(SequenceFileInputFormat.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);
    jobConf.setOutputKeyClass(LongWritable.class);
    jobConf.setOutputValueClass(VectorWritable.class);
  }
  jobConf.setNumReduceTasks(0);
  JobClient.runJob(jobConf);
  return this.numSamples;
}

@Override
protected void runJob(String jobName, Configuration c, List<Scan> scans)
  throws IOException, InterruptedException, ClassNotFoundException {
 JobConf job = new JobConf(TEST_UTIL.getConfiguration());
 job.setJobName(jobName);
 job.setMapperClass(Mapper.class);
 job.setReducerClass(Reducer.class);
 TableMapReduceUtil.initMultiTableSnapshotMapperJob(getSnapshotScanMapping(scans), Mapper.class,
   ImmutableBytesWritable.class, ImmutableBytesWritable.class, job, true, restoreDir);
 TableMapReduceUtil.addDependencyJars(job);
 job.setReducerClass(Reducer.class);
 job.setNumReduceTasks(1); // one to get final "first" and "last" key
 FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
 LOG.info("Started " + job.getJobName());
 RunningJob runningJob = JobClient.runJob(job);
 runningJob.waitForCompletion();
 assertTrue(runningJob.isSuccessful());
 LOG.info("After map/reduce completion - job " + jobName);
}

public static void main(String[] args) throws Exception {
  long startTime = System.currentTimeMillis();	
  Configuration conf = new Configuration();
  JobConf jobconf = new JobConf(conf, SecondarySortProjectionDriver.class);
  jobconf.setJobName("SecondarySortProjectionDriver");
  jobconf.setMapperClass(SecondarySortProjectionMapper.class);
  jobconf.setReducerClass(SecondarySortProjectionReducer.class);
  jobconf.setOutputKeyClass(Text.class);
  jobconf.setOutputValueClass(Text.class);
  FileInputFormat.setInputPaths(jobconf, new Path(otherArgs[0]));
  FileOutputFormat.setOutputPath(jobconf, new Path(otherArgs[1]));
  jobconf.setInputFormat(TextInputFormat.class); 
  jobconf.setOutputFormat(TextOutputFormat.class);
  jobconf.setCompressMapOutput(true);       
  jobconf.setOutputValueGroupingComparator(NaturalKeyGroupingComparator.class);
  JobClient.runJob(jobconf).waitForCompletion();

@Test
public void testNonAvroReducer() throws Exception {
 JobConf job = new JobConf();
 Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath());
 outputPath.getFileSystem(job).delete(outputPath);
 // configure input for Avro from sequence file
 AvroJob.setInputSequenceFile(job);
 AvroJob.setInputSchema(job, SCHEMA);
 FileInputFormat.setInputPaths(job, file().toURI().toString());
 // mapper is default, identity
 // use a hadoop reducer that consumes Avro input
 AvroJob.setMapOutputSchema(job, SCHEMA);
 job.setReducerClass(NonAvroReducer.class);
 // configure outputPath for non-Avro SequenceFile
 job.setOutputFormat(SequenceFileOutputFormat.class);
 FileOutputFormat.setOutputPath(job, outputPath);
 // output key/value classes are default, LongWritable/Text
 JobClient.runJob(job);
 checkFile(new SequenceFileReader<>
      (new File(outputPath.toString() + "/part-00000")));
}

JobConf job = new JobConf(PagerankData.class);
String jobname = "Create pagerank links";
Path fout = new Path(options.getResultPath(), EDGES_DIR_NAME);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
job.setInputFormat(NLineInputFormat.class);
job.setMapperClass(DummyToPageRankLinksMapper.class);
FileOutputFormat.setOutputPath(job, fout);
JobClient.runJob(job);
log.info("Finished Running Job: " + jobname);

@Test
public void testSequenceFileInputFormat() throws Exception {
 JobConf job = new JobConf();
 Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath());
 outputPath.getFileSystem(job).delete(outputPath);
 // configure input for Avro from sequence file
 AvroJob.setInputSequenceFile(job);
 FileInputFormat.setInputPaths(job, file().toURI().toString());
 AvroJob.setInputSchema(job, SCHEMA);
 // mapper is default, identity
 // reducer is default, identity
 // configure output for avro
 AvroJob.setOutputSchema(job, SCHEMA);
 FileOutputFormat.setOutputPath(job, outputPath);
 JobClient.runJob(job);
 checkFile(new DataFileReader<>
      (new File(outputPath.toString() + "/part-00000.avro"),
       new SpecificDatumReader<>()));
}

JobConf job = new JobConf();
job.setInputFormat(NLineInputFormat.class);
job.setMapperClass(CreateBayesPages.class);
job.setNumReduceTasks(0);
FileOutputFormat.setOutputPath(job, fout);
job.setOutputFormat(SequenceFileOutputFormat.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
JobClient.runJob(job);
log.info("Finished Running Job: " + jobname);

/** Uses default mapper with no reduces for a map-only identity job. */
@Test
@SuppressWarnings("deprecation")
public void testMapOnly() throws Exception {
 JobConf job = new JobConf();
 String inDir = System.getProperty("share.dir","../../../share")+"/test/data";
 Path input = new Path(inDir+"/weather.avro");
 Path output = new Path("target/test/weather-ident");
 output.getFileSystem(job).delete(output);
 job.setJobName("identity map weather");
 AvroJob.setInputSchema(job, Weather.SCHEMA$);
 AvroJob.setOutputSchema(job, Weather.SCHEMA$);
 FileInputFormat.setInputPaths(job, input);
 FileOutputFormat.setOutputPath(job, output);
 FileOutputFormat.setCompressOutput(job, true);
 job.setNumReduceTasks(0);                     // map-only
 JobClient.runJob(job);
 // check output is correct
 DatumReader<Weather> reader = new SpecificDatumReader<>();
 DataFileReader<Weather> check = new DataFileReader<>
  (new File(inDir + "/weather.avro"), reader);
 DataFileReader<Weather> sorted = new DataFileReader<>
  (new File(output.toString() + "/part-00000.avro"), reader);
 for (Weather w : sorted)
  assertEquals(check.next(), w);
 check.close();
 sorted.close();
}

public static void main(String[] args) throws Exception {
  Configuration conf = new Configuration();
  JobConf jobconf = new JobConf(conf, SortByMRF_MovingAverageDriver.class);
  jobconf.setJobName("SortByMRF_MovingAverageDriver");
  jobconf.setMapperClass(SortByMRF_MovingAverageMapper.class);
  jobconf.setReducerClass(SortByMRF_MovingAverageReducer.class);
  jobconf.setOutputKeyClass(Text.class);
  jobconf.setOutputValueClass(Text.class);
  FileInputFormat.setInputPaths(jobconf, new Path(otherArgs[1]));
  FileOutputFormat.setOutputPath(jobconf, new Path(otherArgs[2]));
  jobconf.setInputFormat(TextInputFormat.class); 
  jobconf.setOutputFormat(TextOutputFormat.class);
  jobconf.setCompressMapOutput(true);       
  jobconf.setOutputValueGroupingComparator(NaturalKeyGroupingComparator.class);
  JobClient.runJob(jobconf);

@SuppressWarnings("deprecation")
public void testJobNoreducer() throws Exception {
 JobConf job = new JobConf();
 job.setNumReduceTasks(0);
 Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath());
 outputPath.getFileSystem(job).delete(outputPath);
 WordCountUtil.writeLinesFile(new File(INPUT_DIR.getRoot(),"lines.avro"));
 job.setJobName("AvroMultipleOutputs_noreducer");
 AvroJob.setInputSchema(job, Schema.create(Schema.Type.STRING));
 AvroJob.setOutputSchema(job, new Pair<Utf8, Long>(new Utf8(""), 0L).getSchema());
 AvroJob.setMapperClass(job, MapImpl.class);
 FileInputFormat.setInputPaths(job, new Path(INPUT_DIR.getRoot().toString()));
 FileOutputFormat.setOutputPath(job, outputPath);
 FileOutputFormat.setCompressOutput(job, false);
 AvroMultipleOutputs.addNamedOutput(job, "myavro2", AvroOutputFormat.class, Schema.create(Schema.Type.STRING));
 JobClient.runJob(job);
}

JobConf job = new JobConf(NutchData.class);
Path urls = new Path(options.getWorkPath(), URLS_DIR_NAME);
Utils.checkHdfsPath(urls);
job.setInputFormat(NLineInputFormat.class);
job.setMapperClass(CreateUrlHash.class);
job.setNumReduceTasks(0);
job.setMapOutputValueClass(Text.class);
job.setOutputFormat(MapFileOutputFormat.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
MapFileOutputFormat.setOutputPath(job, urls);
log.info("Pages file " + dummy.getPath() + " as input");
log.info("Rankings file " + urls + " as output");
JobClient.runJob(job);
log.info("Finished Running Job: " + jobname);

Javadoc

Utility that submits a job, then polls for progress until the job is complete.

Popular methods of JobClient

<init>
Build a job client with the given JobConf, and connect to the default cluster
getJob
Get an RunningJob object to track an ongoing job. Returns null if the id does not correspond to any
submitJob
Submit a job to the MR system. This returns a handle to the RunningJob which can be used to track th
getClusterStatus
Get status information about the Map-Reduce cluster.
getMapTaskReports
Get the information of the current state of the map tasks of a job.
close
Close the JobClient.
getReduceTaskReports
Get the information of the current state of the reduce tasks of a job.
getAllJobs
Get the jobs that are submitted.
monitorAndPrintJob
Monitor a job and print status in real-time as progress is made and tasks fail.
getDelegationToken
Get a delegation token for the user from the JobTracker.
setConf
getConf

Popular in Java

Running tasks concurrently on multiple threads
setContentView (Activity)
setRequestProperty (URLConnection)
getExternalFilesDir (Context)
Comparator (java.util)
A Comparator is used to compare two objects to determine their ordering with respect to each other.
PriorityQueue (java.util)
A PriorityQueue holds elements on a priority heap, which orders the elements according to their natu
Vector (java.util)
Vector is an implementation of List, backed by an array and synchronized. All optional operations in
JarFile (java.util.jar)
JarFile is used to read jar entries and their associated data from jar files.
Kernel (java.awt.image)
Location (org.springframework.beans.factory.parsing)
Class that models an arbitrary location in a Resource.Typically used to track the location of proble
From CI to AI: The AI layer in your organization

How to use runJobmethodin org.apache.hadoop.mapred.JobClient

Best Java code snippets using org.apache.hadoop.mapred.JobClient.runJob (Showing top 20 results out of 666)

Refine search

How to use
runJob
method
in
org.apache.hadoop.mapred.JobClient