/** * Create the {@link Job} to run in this task. * @return the {@link Job} to run. If this method returns null, no job will be run and the task will be marked as successful. */ protected Job createJob() throws IOException { Job job = Job.getInstance(new Configuration()); for (Map.Entry<Object, Object> entry : this.taskContext.getTaskState().getProperties().entrySet()) { if (entry.getKey() instanceof String && ((String) entry.getKey()).startsWith(JOB_CONFIGURATION_PREFIX)) { String actualKey = ((String) entry.getKey()).substring(JOB_CONFIGURATION_PREFIX.length()); job.getConfiguration().set(actualKey, (String) entry.getValue()); } } return job; }
/** * Sets up the actual job. * * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { String tableName = args[0]; Path outputDir = new Path(args[1]); String reportSeparatorString = (args.length > 2) ? args[2]: ":"; conf.set("ReportSeparator", reportSeparatorString); Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName)); job.setJarByClass(CellCounter.class); Scan scan = getConfiguredScanForJob(conf, args); TableMapReduceUtil.initTableMapperJob(tableName, scan, CellCounterMapper.class, ImmutableBytesWritable.class, Result.class, job); job.setNumReduceTasks(1); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(job, outputDir); job.setReducerClass(IntSumReducer.class); return job; }
throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(hadoopConf); job.setJarByClass(mapperClass); job.setNumReduceTasks(0); job.setMapperClass(mapperClass); job.setOutputFormatClass(NullOutputFormat.class); job.setInputFormatClass(inputFormat);
private void exportHBaseConfiguration(String hbaseTableName) throws IOException { Configuration hbaseConf = HBaseConnection.getCurrentHBaseConfiguration(); HadoopUtil.healSickConfig(hbaseConf); Job job = Job.getInstance(hbaseConf, hbaseTableName); HTable table = new HTable(hbaseConf, hbaseTableName); HFileOutputFormat2.configureIncrementalLoadMap(job, table); logger.info("Saving HBase configuration to {0}", hbaseConfPath); FileSystem fs = HadoopUtil.getWorkingFileSystem(); FSDataOutputStream out = null; try { out = fs.create(new Path(hbaseConfPath)); job.getConfiguration().writeXml(out); } finally { IOUtils.closeQuietly(out); } }
this.mrJobDir = new Path( new Path(this.jobProps.getProperty(ConfigurationKeys.MR_JOB_ROOT_DIR_KEY), this.jobContext.getJobName()), this.jobContext.getJobId()); if (this.fs.exists(this.mrJobDir)) { this.job = Job.getInstance(this.conf, JOB_NAME_PREFIX + this.jobContext.getJobName());
Job job = Job.getInstance(c, jobName); TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan, ScanMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job); job.setReducerClass(ScanReducer.class); job.setNumReduceTasks(1); // one to get final "first" and "last" key FileOutputFormat.setOutputPath(job, new Path(job.getJobName())); LOG.info("Started " + job.getJobName()); assertTrue(job.waitForCompletion(true));
Path restoreDir = new Path(peerSnapshotTmpDir, UUID.randomUUID().toString()); peerSnapshotTmpDir = restoreDir.toString(); conf.set(NAME + ".peerSnapshotTmpDir", peerSnapshotTmpDir); Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName)); job.setJarByClass(VerifyReplication.class); Path snapshotTempPath = new Path(sourceSnapshotTmpDir); LOG.info( "Using source snapshot-" + sourceSnapshotName + " with temp dir:" + sourceSnapshotTmpDir);
/** * Test job submission. * * @param conf Hadoop configuration. * @throws Exception If failed. */ private void checkJobSubmit(Configuration conf) throws Exception { final Job job = Job.getInstance(conf); try { job.setJobName(JOB_NAME); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(OutFormat.class); job.setMapperClass(TestMapper.class); job.setReducerClass(TestReducer.class); job.setNumReduceTasks(0); FileInputFormat.setInputPaths(job, new Path("igfs://" + igfsName + "@" + PATH_INPUT)); job.submit(); job.waitForCompletion(false); assert job.getStatus().getState() == JobStatus.State.SUCCEEDED : job.getStatus().getState(); } finally { job.getCluster().close(); } }
job = Job.getInstance(getConf(), jobName); attachCubeMetadataWithDict(cube, job.getConfiguration()); job.setMapperClass(MergeCuboidMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_ID, segmentID);
public int runGenerator(int numMappers, long numNodes, Path tmpOutput, Integer width, Integer wrapMultiplier, Integer numWalkers) throws Exception { LOG.info("Running Generator with numMappers=" + numMappers +", numNodes=" + numNodes); createSchema(); job = Job.getInstance(getConf()); job.setJobName("Link Generator"); job.setNumReduceTasks(0); job.setJarByClass(getClass()); FileInputFormat.setInputPaths(job, tmpOutput); job.setInputFormatClass(OneFilePerMapperSFIF.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); setJobConf(job, numMappers, numNodes, width, wrapMultiplier, numWalkers); setMapperForGenerator(job); job.setOutputFormatClass(NullOutputFormat.class); job.getConfiguration().setBoolean("mapreduce.map.speculative", false); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), AbstractHBaseTool.class); TableMapReduceUtil.initCredentials(job); boolean success = jobCompletion(job); return success ? 0 : 1; }
private Job getVertexJobWithDefaultMapper(org.apache.hadoop.conf.Configuration c) throws IOException { Job job = Job.getInstance(c); job.setJarByClass(HadoopScanMapper.class); job.setJobName("testPartitionedVertexScan"); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); job.setOutputFormatClass(NullOutputFormat.class); job.setInputFormatClass(CassandraInputFormat.class); return job; }
/** * Sets up the actual job. * * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { Triple<TableName, Scan, Path> arguments = ExportUtils.getArgumentsFromCommandLine(conf, args); String tableName = arguments.getFirst().getNameAsString(); Path outputDir = arguments.getThird(); Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName)); job.setJobName(NAME + "_" + tableName); job.setJarByClass(Export.class); // Set optional scan parameters Scan s = arguments.getSecond(); IdentityTableMapper.initJob(tableName, s, IdentityTableMapper.class, job); // No reducers. Just write straight to output files. job.setNumReduceTasks(0); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(Result.class); FileOutputFormat.setOutputPath(job, outputDir); // job conf doesn't contain the conf so doesn't have a default fs. return job; }
public static void main(String[] args) throws Exception { CommandLine cli = StressTestUtils.parseCommandLine(OPTIONS, args); Configuration configuration = new Configuration(); if (cli.hasOption(THROTTLING_SERVER_URI.getOpt())) { configuration.setBoolean(USE_THROTTLING_SERVER, true); String resourceLimited = cli.getOptionValue(RESOURCE_ID_OPT.getOpt(), "MRStressTest"); configuration.set(RESOURCE_ID, resourceLimited); configuration.set( BrokerConfigurationKeyGenerator.generateKey(new SharedRestClientFactory(), new SharedRestClientKey(RestliLimiterFactory.RESTLI_SERVICE_NAME), null, SharedRestClientFactory.SERVER_URI_KEY), cli.getOptionValue(THROTTLING_SERVER_URI.getOpt())); } if (cli.hasOption(LOCAL_QPS_OPT.getOpt())) { configuration .set(LOCALLY_ENFORCED_QPS, cli.getOptionValue(LOCAL_QPS_OPT.getOpt())); } Job job = Job.getInstance(configuration, "ThrottlingStressTest"); job.getConfiguration().setBoolean("mapreduce.job.user.classpath.first", true); job.getConfiguration().setBoolean("mapreduce.map.speculative", false); job.getConfiguration().set(NUM_MAPPERS, cli.getOptionValue(NUM_MAPPERS_OPT.getOpt(), DEFAULT_MAPPERS)); StressTestUtils.populateConfigFromCli(job.getConfiguration(), cli); job.setJarByClass(MRStressTest.class); job.setMapperClass(StresserMapper.class); job.setReducerClass(AggregatorReducer.class); job.setInputFormatClass(MyInputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(DoubleWritable.class); FileOutputFormat.setOutputPath(job, new Path("/tmp/MRStressTest" + System.currentTimeMillis())); System.exit(job.waitForCompletion(true) ? 0 : 1); }
parseOptions(options, args); job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME)); String job_id = getOptionValue(OPTION_CUBING_JOB_ID); String cubeName = getOptionValue(OPTION_CUBE_NAME); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); Path input = new Path(getOptionValue(OPTION_INPUT_PATH)); attachCubeMetadata(cube, job.getConfiguration()); Path path = new Path(input.toString() + "/" + tblColRef.getIdentity()); if (HadoopUtil.getFileSystem(path).exists(path)) { FileInputFormat.addInputPath(job, path); setupReducer(output, reducerCount); job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); job.getConfiguration().set(BatchConstants.ARG_CUBING_JOB_ID, job_id); job.getConfiguration().set(BatchConstants.CFG_GLOBAL_DICT_BASE_DIR, KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory()); job.getConfiguration().set(BatchConstants.CFG_MAPRED_OUTPUT_COMPRESS, "false");
@Test public void testGetSplits() throws Exception { URI baseUri = new URI(GobblinWorkUnitsInputFormatTest.class.getSimpleName() + "://testGetSplits"); Configuration configuration = new Configuration(); Path workUnitsDir = new Path(new Path(baseUri), "/workUnits"); FileSystem fs = Mockito.mock(FileSystem.class); FileStatus[] statuses = createFileStatuses(20, workUnitsDir); Mockito.when(fs.listStatus(workUnitsDir)).thenReturn(statuses); Mockito.when(fs.makeQualified(Mockito.any(Path.class))).thenAnswer(new Answer<Path>() { @Override public Path answer(InvocationOnMock invocation) throws Throwable { return (Path) invocation.getArguments()[0]; } }); FileSystemTestUtils.addFileSystemForTest(baseUri, configuration, fs); GobblinWorkUnitsInputFormat inputFormat = new GobblinWorkUnitsInputFormat(); Job job = Job.getInstance(configuration); FileInputFormat.addInputPath(job, workUnitsDir); List<InputSplit> splits = inputFormat.getSplits(job); Assert.assertEquals(splits.size(), 20); verifyPaths(splits, statuses); }
@Test public void testSetOutputPathException() throws Exception { Job job = Job.getInstance(); try { // Give it an invalid filesystem so it'll throw an exception FileOutputFormat.setOutputPath(job, new Path("foo:///bar")); fail("Should have thrown a RuntimeException with an IOException inside"); } catch (RuntimeException re) { assertTrue(re.getCause() instanceof IOException); } }
job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME)); job.getConfiguration().set(BatchConstants.ARG_CUBING_JOB_ID, cubingJobId); String shrunkenDictPath = getOptionValue(OPTION_DICTIONARY_SHRUNKEN_PATH); if (shrunkenDictPath != null) { job.getConfiguration().set(BatchConstants.ARG_SHRUNKEN_DICT_PATH, shrunkenDictPath); attachSegmentMetadataWithAll(segment, job.getConfiguration()); job.setMapperClass(this.mapperClass); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class);
public static void ensurePaths(HadoopDruidIndexerConfig config) { authenticate(config); // config.addInputPaths() can have side-effects ( boo! :( ), so this stuff needs to be done before anything else try { Job job = Job.getInstance( new Configuration(), StringUtils.format("%s-determine_partitions-%s", config.getDataSource(), config.getIntervals()) ); job.getConfiguration().set("io.sort.record.percent", "0.19"); injectSystemProperties(job); config.addJobProperties(job); config.addInputPaths(job); } catch (IOException e) { throw Throwables.propagate(e); } }
Job job = Job.getInstance(getConf(), getConf().get(JOB_NAME_CONF_KEY, jobName)); job.setJarByClass(CopyTable.class); Scan scan = new Scan(); job.getConfiguration().set(TableInputFormat.SHUFFLE_MAPS, "true"); Import.configureCfRenaming(job.getConfiguration(), cfRenameMap);
conf.set(WIDTH_CONF, "" + findWidth(beginNS, endNS, numReducers)); filterImpl.addToConfig(fields, conf); Job job = Job.getInstance(conf); jobName.ifPresent(job::setJobName); job.setJarByClass(PcapJob.class); job.setMapperClass(PcapJob.PcapMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(BytesWritable.class);