HiveFileFormatUtils.prepareJobOutput(job); job.setOutputFormat(HiveOutputFormatImpl.class); job.setMapperClass(work.getMapperClass()); job.setMapOutputValueClass(NullWritable.class); if(work.getNumMapTasks() != null) { job.setNumMapTasks(work.getNumMapTasks()); job.setNumReduceTasks(0); job.setInputFormat(JavaUtils.loadClass(inpFormat)); } catch (ClassNotFoundException e) { throw new RuntimeException(e.getMessage(), e);
public static JobConf configureJob(JobConf conf, String[] args) { conf.set(KEY_INPUT_FILE, args[0]) ; conf.set(KEY_LANG_FILE, args[1]) ; conf.set(KEY_LANG_CODE, args[2]) ; conf.set(KEY_SENTENCE_MODEL, args[3]) ; conf.set(KEY_OUTPUT_DIR, args[4]) ; //set a reasonable number of maps. This is going to be ignored for very large inputs (e.g. the en wiki dump) anyway. conf.setNumMapTasks(16) ; //force one reducer by default. These don't take very long, and multiple reducers would make finalise file functions more complicated. conf.setNumReduceTasks(1) ; //many of our tasks require pre-loading lots of data, may as well reuse this as much as we can. //conf.setNumTasksToExecutePerJvm(-1) ; //conf.setInt("mapred.tasktracker.map.tasks.maximum", 2) ; //conf.setInt("mapred.tasktracker.reduce.tasks.maximum", 1) ; //TODO: really don't want this hard coded. conf.set("mapred.child.java.opts", "-Xmx500M -Dapple.awt.UIElement=true") ; //conf.setBoolean("mapred.used.genericoptionsparser", true) ; return conf ; }
private void configure(JobConf conf, Path inDir, Path outDir, String input, Class<? extends Mapper> map, Class<? extends Reducer> reduce) throws IOException { // set up the input file system and write input text. FileSystem inFs = inDir.getFileSystem(conf); FileSystem outFs = outDir.getFileSystem(conf); outFs.delete(outDir, true); if (!inFs.mkdirs(inDir)) { throw new IOException("Mkdirs failed to create " + inDir.toString()); } { // write input into input file DataOutputStream file = inFs.create(new Path(inDir, "part-0")); file.writeBytes(input); file.close(); } // configure the mapred Job which creates a tempfile in map. conf.setJobName("testmap"); conf.setMapperClass(map); conf.setReducerClass(reduce); conf.setNumMapTasks(1); conf.setNumReduceTasks(0); FileInputFormat.setInputPaths(conf, inDir); FileOutputFormat.setOutputPath(conf, outDir); String TEST_ROOT_DIR = new Path(System.getProperty("test.build.data", "/tmp")).toString().replace(' ', '+'); conf.set("test.build.data", TEST_ROOT_DIR); }
static RunningJob runJob(JobConf conf, Path inDir, Path outDir, int numMaps, int numReds) throws IOException { FileSystem fs = FileSystem.get(conf); if (fs.exists(outDir)) { fs.delete(outDir, true); } if (!fs.exists(inDir)) { fs.mkdirs(inDir); } String input = "The quick brown fox\n" + "has many silly\n" + "red fox sox\n"; for (int i = 0; i < numMaps; ++i) { DataOutputStream file = fs.create(new Path(inDir, "part-" + i)); file.writeBytes(input); file.close(); } conf.setInputFormat(TextInputFormat.class); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(conf, inDir); FileOutputFormat.setOutputPath(conf, outDir); conf.setNumMapTasks(numMaps); conf.setNumReduceTasks(numReds); JobClient jobClient = new JobClient(conf); RunningJob job = jobClient.submitJob(conf); return job; }
/** * When no input dir is specified, generate random data. */ protected static void confRandom(JobConf job) throws IOException { // from RandomWriter job.setInputFormat(RandomInputFormat.class); job.setMapperClass(RandomMapOutput.class); final ClusterStatus cluster = new JobClient(job).getClusterStatus(); int numMapsPerHost = job.getInt(RandomTextWriter.MAPS_PER_HOST, 10); long numBytesToWritePerMap = job.getLong(RandomTextWriter.BYTES_PER_MAP, 1*1024*1024*1024); if (numBytesToWritePerMap == 0) { throw new IOException( "Cannot have " + RandomTextWriter.BYTES_PER_MAP + " set to 0"); } long totalBytesToWrite = job.getLong(RandomTextWriter.TOTAL_BYTES, numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers()); int numMaps = (int)(totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; job.setLong(RandomTextWriter.BYTES_PER_MAP, totalBytesToWrite); } job.setNumMapTasks(numMaps); }
FileSystem fs = FileSystem.get(conf); fs.delete(testdir, true); conf.setInputFormat(SequenceFileInputFormat.class); FileInputFormat.setInputPaths(conf, inDir); FileOutputFormat.setOutputPath(conf, outDir); conf.setMapOutputValueClass(IntWritable.class); conf.setNumMapTasks(2);
@Override public void configure(JobConf actionConf) throws OozieActionConfiguratorException { if (actionConf.getUser() == null) { throw new OozieActionConfiguratorException("No user set"); } if (actionConf.get("examples.root") == null) { throw new OozieActionConfiguratorException("examples.root not set"); } if (actionConf.get("output.dir.name") == null) { throw new OozieActionConfiguratorException("output.dir.name not set"); } actionConf.setMapperClass(SampleMapper.class); actionConf.setReducerClass(SampleReducer.class); actionConf.setNumMapTasks(1); FileInputFormat.setInputPaths(actionConf, new Path("/user/" + actionConf.getUser() + "/" + actionConf.get("examples.root") + "/input-data/text")); FileOutputFormat.setOutputPath(actionConf, new Path("/user/" + actionConf.getUser() + "/" + actionConf.get("examples.root") + "/output-data/" + actionConf.get("output.dir.name"))); } }
HiveFileFormatUtils.prepareJobOutput(job); job.setOutputFormat(HiveOutputFormatImpl.class); job.setMapperClass(work.getMapperClass()); job.setMapOutputValueClass(NullWritable.class); if(work.getNumMapTasks() != null) { job.setNumMapTasks(work.getNumMapTasks()); job.setNumReduceTasks(0); job.setInputFormat(JavaUtils.loadClass(inpFormat)); } catch (ClassNotFoundException e) { throw new RuntimeException(e.getMessage(), e);
/** * @param args */ @SuppressWarnings("deprecation") public static void main(String[] args) { JobConf conf = new JobConf(HBitextCompiler.class); conf.set(OUTPUT_BASENAME, "/shared/bitexts/ep700k+nc.de-en/ep700k+nc"); conf.set(FR_PATH, "filt.lc.de"); conf.set(EN_PATH, "filt.lc.en"); conf.set(AL_PATH, ""); ///user/redpony/model-5M/aligned.grow-diag-final"); conf.setJobName("bitext.compile"); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(BitextCompilerMapper.class); conf.setNumMapTasks(1); conf.setNumReduceTasks(0); FileInputFormat.setInputPaths(conf, new Path("dummy")); try { FileSystem.get(conf).delete(new Path("dummy.out")); FileOutputFormat.setOutputPath(conf, new Path("dummy.out")); conf.setOutputFormat(SequenceFileOutputFormat.class); JobClient.runJob(conf); } catch (IOException e) { System.err.println("Caught " + e); e.printStackTrace(); } }
conf.set(JTConfig.JT_IPC_ADDRESS, jobTracker); conf.setJobName("wordcount"); conf.setInputFormat(TextInputFormat.class); FileInputFormat.setInputPaths(conf, inDir); FileOutputFormat.setOutputPath(conf, outDir); conf.setNumMapTasks(numMaps); conf.setNumReduceTasks(numReduces);
conf.setNumMapTasks(1); conf.setNumReduceTasks(0);
/** * When no input dir is specified, generate random data. */ protected static void confRandom(JobConf job) throws IOException { // from RandomWriter job.setInputFormat(RandomInputFormat.class); job.setMapperClass(RandomMapOutput.class); final ClusterStatus cluster = new JobClient(job).getClusterStatus(); int numMapsPerHost = job.getInt(RandomTextWriter.MAPS_PER_HOST, 10); long numBytesToWritePerMap = job.getLong(RandomTextWriter.BYTES_PER_MAP, 1*1024*1024*1024); if (numBytesToWritePerMap == 0) { throw new IOException( "Cannot have " + RandomTextWriter.BYTES_PER_MAP + " set to 0"); } long totalBytesToWrite = job.getLong(RandomTextWriter.TOTAL_BYTES, numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers()); int numMaps = (int)(totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; job.setLong(RandomTextWriter.BYTES_PER_MAP, totalBytesToWrite); } job.setNumMapTasks(numMaps); }
HiveFileFormatUtils.prepareJobOutput(job); job.setOutputFormat(HiveOutputFormatImpl.class); job.setMapperClass(work.getMapperClass()); job.setMapOutputValueClass(NullWritable.class); if(work.getNumMapTasks() != null) { job.setNumMapTasks(work.getNumMapTasks()); job.setNumReduceTasks(0); job.setInputFormat(JavaUtils.loadClass(inpFormat)); } catch (ClassNotFoundException e) { throw new RuntimeException(e.getMessage(), e);
public static void main(String[] args) { JobConf conf = new JobConf(HSymAlign.class); conf.setJobName("alignment-sym"); conf.setOutputKeyClass(IntWritable.class); // the keys are words (strings) conf.setOutputValueClass(Text.class); // the values are counts (ints) conf.setMapperClass(MapClass.class); conf.setReducerClass(Reduce.class); conf.setNumMapTasks(1); conf.setNumReduceTasks(500); String filename="infiles"; String outputPath="align"; FileInputFormat.setInputPaths(conf, new Path(filename)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); try{ JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); } } }
private static void distributedCopy(Path inputPath, Path outputPath, OperationsParams params) throws IOException { JobConf job = new JobConf(params, DistributedCopy.class); job.setJobName("distcp3"); // Set input job.setInputFormat(BlockInputFormat.class); BlockInputFormat.addInputPath(job, inputPath); // Set output job.setOutputFormat(BlockOutputFormat.class); BlockOutputFormat.setOutputPath(job, outputPath); job.setOutputCommitter(BlockOutputCommitter.class); // Set number of mappers/reducers ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setNumReduceTasks(0); // Run the job JobClient.runJob(job); }
@Override public JobConf createJobConf(final SampleDataForSplitPoints operation, final String mapperGeneratorClassName, final Store store) throws IOException { final JobConf jobConf = new JobConf(new Configuration()); LOGGER.info("Setting up job conf"); jobConf.set(SCHEMA, new String(store.getSchema().toCompactJson(), CommonConstants.UTF_8)); LOGGER.info("Added {} {} to job conf", SCHEMA, new String(store.getSchema().toCompactJson(), CommonConstants.UTF_8)); jobConf.set(MAPPER_GENERATOR, mapperGeneratorClassName); LOGGER.info("Added {} of {} to job conf", MAPPER_GENERATOR, mapperGeneratorClassName); jobConf.set(VALIDATE, String.valueOf(operation.isValidate())); LOGGER.info("Added {} option of {} to job conf", VALIDATE, operation.isValidate()); jobConf.set(PROPORTION_TO_SAMPLE, String.valueOf(operation.getProportionToSample())); LOGGER.info("Added {} option of {} to job conf", PROPORTION_TO_SAMPLE, String.valueOf(operation.getProportionToSample())); final Integer numTasks = operation.getNumMapTasks(); if (null != numTasks) { jobConf.setNumMapTasks(numTasks); LOGGER.info("Set number of map tasks to {} on job conf", numTasks); } jobConf.setNumReduceTasks(1); LOGGER.info("Set number of reduce tasks to 1 on job conf"); jobConf.set(AccumuloStoreConstants.ACCUMULO_ELEMENT_CONVERTER_CLASS, ((AccumuloStore) store).getKeyPackage().getKeyConverter().getClass().getName()); return jobConf; }
/** * Runs the demo. */ public static void main(String[] args) throws IOException { JobConf conf = new JobConf(DemoMapredNullInput.class); conf.setJobName("DemoMapredNullInput"); conf.setNumMapTasks(10); conf.setNumReduceTasks(0); conf.setInputFormat(NullInputFormat.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapperClass(MyMapper.class); JobClient.runJob(conf); } }
/** * Creates a simple copy job. * * @param indirs List of input directories. * @param outdir Output directory. * @return JobConf initialised for a simple copy job. * @throws Exception If an error occurs creating job configuration. */ static JobConf createCopyJob(List<Path> indirs, Path outdir) throws Exception { Configuration defaults = new Configuration(); JobConf theJob = new JobConf(defaults, TestJobControl.class); theJob.setJobName("DataMoveJob"); FileInputFormat.setInputPaths(theJob, indirs.toArray(new Path[0])); theJob.setMapperClass(DataCopy.class); FileOutputFormat.setOutputPath(theJob, outdir); theJob.setOutputKeyClass(Text.class); theJob.setOutputValueClass(Text.class); theJob.setReducerClass(DataCopy.class); theJob.setNumMapTasks(12); theJob.setNumReduceTasks(4); return theJob; }
static RunningJob runJob(JobConf conf, Path inDir, Path outDir, int numMaps, int numReds, String input) throws IOException { FileSystem fs = FileSystem.get(conf); if (fs.exists(outDir)) { fs.delete(outDir, true); } if (!fs.exists(inDir)) { fs.mkdirs(inDir); } for (int i = 0; i < numMaps; ++i) { DataOutputStream file = fs.create(new Path(inDir, "part-" + i)); file.writeBytes(input); file.close(); } conf.setInputFormat(TextInputFormat.class); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(conf, inDir); FileOutputFormat.setOutputPath(conf, outDir); conf.setNumMapTasks(numMaps); conf.setNumReduceTasks(numReds); JobClient jobClient = new JobClient(conf); RunningJob job = jobClient.submitJob(conf); return job; }
private void testKilledJob(JobConf job, MyListener myListener) throws IOException { LOG.info("Testing job-kill"); Path inDir = new Path(TEST_ROOT_DIR + "/jiplistenerkilljob/input"); Path outDir = new Path(TEST_ROOT_DIR + "/jiplistenerkilljob/output"); job.setNumMapTasks(1); job.setNumReduceTasks(0); // submit and kill the job RunningJob rJob = UtilsForTests.runJobKill(job, inDir, outDir); JobID id = rJob.getID(); // check if the job failure was notified assertFalse("Missing event notification on killing a running job", myListener.contains(id)); // check if killed assertEquals("Job failed!", JobStatus.KILLED, rJob.getJobState()); }