@VisibleForTesting static void serializeJobState(FileSystem fs, Path mrJobDir, Configuration conf, JobState jobState, Job job) throws IOException { Path jobStateFilePath = new Path(mrJobDir, JOB_STATE_FILE_NAME); // Write the job state with an empty task set (work units are read by the mapper from a different file) try (DataOutputStream dataOutputStream = new DataOutputStream(fs.create(jobStateFilePath))) { jobState.write(dataOutputStream, false, conf.getBoolean(SERIALIZE_PREVIOUS_WORKUNIT_STATES_KEY, DEFAULT_SERIALIZE_PREVIOUS_WORKUNIT_STATES)); } job.getConfiguration().set(ConfigurationKeys.JOB_STATE_FILE_PATH_KEY, jobStateFilePath.toString()); DistributedCache.addCacheFile(jobStateFilePath.toUri(), job.getConfiguration()); job.getConfiguration().set(ConfigurationKeys.JOB_STATE_DISTRIBUTED_CACHE_NAME, jobStateFilePath.getName()); }
Configuration conf = new Configuration(); String numMaps = new GenericOptionsParser(conf, args).getRemainingArgs()[0]; conf.set(MRJobConfig.NUM_MAPS, numMaps); createHdfsFilesystem(conf); Job job = Job.getInstance(conf, "MapReduceIntegrationChecker"); job.setJarByClass(MapReduceIntegrationChecker.class); job.setMapperClass(CheckerMapper.class); job.setCombinerClass(CheckerReducer.class); job.setReducerClass(CheckerReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(EmptyInputFormat.class); FileOutputFormat.setOutputPath(job, mOutputFilePath); if (!job.waitForCompletion(true)) { return 1; : (resultStatus.equals(Status.FAIL_TO_FIND_CLASS) ? 2 : 1); } finally { if (mFileSystem.exists(mOutputFilePath)) { mFileSystem.delete(mOutputFilePath, true); mFileSystem.close();
/** * Sets up the actual job. * * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { String tableName = args[0]; Path outputDir = new Path(args[1]); String reportSeparatorString = (args.length > 2) ? args[2]: ":"; conf.set("ReportSeparator", reportSeparatorString); Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName)); job.setJarByClass(CellCounter.class); Scan scan = getConfiguredScanForJob(conf, args); TableMapReduceUtil.initTableMapperJob(tableName, scan, CellCounterMapper.class, ImmutableBytesWritable.class, Result.class, job); job.setNumReduceTasks(1); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(job, outputDir); job.setReducerClass(IntSumReducer.class); return job; }
public static void setHadoopConfForCuboid(Job job, CubeSegment segment, String metaUrl) throws Exception { job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); }
private void setupMapper() throws IOException { job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(UHCDictionaryMapper.class); job.setMapOutputKeyClass(SelfDefineSortableKey.class); job.setMapOutputValueClass(NullWritable.class); }
public HadoopOutputFormatBase(org.apache.hadoop.mapreduce.OutputFormat<K, V> mapreduceOutputFormat, Job job) { super(job.getCredentials()); this.mapreduceOutputFormat = mapreduceOutputFormat; this.configuration = job.getConfiguration(); HadoopUtils.mergeHadoopConf(configuration); }
/** * Prepare job with mappers to cancel. * @return Fully configured job. * @throws Exception If fails. */ private Configuration prepareJobForCancelling() throws Exception { prepareFile("/testFile", 1500); executedTasks.set(0); cancelledTasks.set(0); failMapperId.set(0); splitsCount.set(0); Configuration cfg = new Configuration(); setupFileSystems(cfg); Job job = Job.getInstance(cfg); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(CancellingTestMapper.class); job.setNumReduceTasks(0); job.setInputFormatClass(InFormat.class); FileInputFormat.setInputPaths(job, new Path("igfs://" + igfsName + "@/")); FileOutputFormat.setOutputPath(job, new Path("igfs://" + igfsName + "@/output/")); job.setJarByClass(getClass()); return job.getConfiguration(); }
@Test public void testAvroSpecificOutput() throws Exception { Job job = new Job(); FileInputFormat.setInputPaths(job, new Path(getClass() .getResource("/org/apache/avro/mapreduce/mapreduce-test-input.txt") .toURI().toString())); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(LineCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); AvroMultipleOutputs.addNamedOutput(job,"myavro3",AvroKeyOutputFormat.class,TextStats.SCHEMA$,null); job.setReducerClass(SpecificStatsReducer.class); AvroJob.setOutputKeySchema(job, TextStats.SCHEMA$); job.setOutputFormatClass(AvroKeyOutputFormat.class); Path outputPath = new Path(tmpFolder.getRoot().getPath() + "/out-specific"); outputPath.getFileSystem(job.getConfiguration()).delete(outputPath); FileOutputFormat.setOutputPath(job, outputPath); Assert.assertTrue(job.waitForCompletion(true)); FileSystem fileSystem = FileSystem.get(job.getConfiguration()); FileStatus[] outputFiles = fileSystem.globStatus(outputPath.suffix("/myavro3-*")); Assert.assertEquals(1, outputFiles.length); DataFileReader<TextStats> reader = new DataFileReader<>( new FsInput(outputFiles[0].getPath(), job.getConfiguration()), new SpecificDatumReader<>()); Map<String, Integer> counts = new HashMap<>();
@Test conf.set("hive.io.file.read.all.columns", "false"); conf.set("hive.io.file.readcolumn.ids", "1,3"); Job job = new Job(conf, "orc test"); job.setInputFormatClass(OrcNewInputFormat.class); job.setJarByClass(TestNewInputOutputFormat.class); job.setMapperClass(OrcTestMapper1.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(HiveTestUtils .getFileFromClasspath("orc-file-11-format.orc"))); Path outputPath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".txt"); localFs.delete(outputPath, true); FileOutputFormat.setOutputPath(job, outputPath); boolean result = job.waitForCompletion(true); assertTrue(result); Path outputFilePath = new Path(outputPath, "part-m-00000"); new InputStreamReader(localFs.open(outputFilePath))); String line=reader.readLine(); "null, null, null, null, null, null, null}"); localFs.delete(outputPath, true);
readRecords.clear(); Configuration conf = new Configuration(); conf.set(HiveConf.ConfVars.METASTORE_INTEGER_JDO_PUSHDOWN.varname,"true"); Job job = new Job(conf, "hcat mapreduce read test"); job.setJarByClass(this.getClass()); job.setMapperClass(HCatMapReduceTest.MapRead.class); job.setInputFormatClass(HCatInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(Text.class); job.setNumReduceTasks(0); Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceOutput"); if (fs.exists(path)) { fs.delete(path, true); job.waitForCompletion(true); Assert.assertEquals(readCount, MapRead.readCount);
private boolean runJob(float badRecordThreshold) throws Exception { Configuration conf = new Configuration(); conf.setFloat(HCatConstants.HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY, badRecordThreshold); Job job = new Job(conf); job.setJarByClass(this.getClass()); job.setMapperClass(MyMapper.class); job.setInputFormatClass(HCatInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); HCatInputFormat.setInput(job, "default", "test_bad_records"); job.setMapOutputKeyClass(HCatRecord.class); job.setMapOutputValueClass(HCatRecord.class); job.setNumReduceTasks(0); Path path = new Path(TEST_DATA_DIR, "test_bad_record_handling_output"); if (path.getFileSystem(conf).exists(path)) { path.getFileSystem(conf).delete(path, true); } TextOutputFormat.setOutputPath(job, path); return job.waitForCompletion(true); }
/** * @throws Exception If failed. */ @Test public void testSimpleTaskSubmit() throws Exception { String testInputFile = "/test"; prepareTestFile(testInputFile); Configuration cfg = new Configuration(); setupFileSystems(cfg); Job job = Job.getInstance(cfg); job.setMapperClass(TestMapper.class); job.setCombinerClass(TestReducer.class); job.setReducerClass(TestReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setNumReduceTasks(1); FileInputFormat.setInputPaths(job, new Path("igfs://:" + getTestIgniteInstanceName(0) + "@/" + testInputFile)); FileOutputFormat.setOutputPath(job, new Path("igfs://:" + getTestIgniteInstanceName(0) + "@/output")); job.setJarByClass(getClass()); IgniteInternalFuture<?> fut = grid(0).hadoop().submit(new HadoopJobId(UUID.randomUUID(), 1), createJobInfo(job.getConfiguration(), null)); fut.get(); }
groupByJob = Job.getInstance( new Configuration(), StringUtils.format("%s-determine_partitions_groupby-%s", config.getDataSource(), config.getIntervals()) ); groupByJob.setMapperClass(DeterminePartitionsGroupByMapper.class); groupByJob.setMapOutputKeyClass(BytesWritable.class); groupByJob.setMapOutputValueClass(NullWritable.class); groupByJob.setCombinerClass(DeterminePartitionsGroupByReducer.class); groupByJob.setReducerClass(DeterminePartitionsGroupByReducer.class); groupByJob.setOutputKeyClass(BytesWritable.class); groupByJob.setOutputValueClass(NullWritable.class); groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class); JobHelper.setupClasspath( JobHelper.distributedClassPath(config.getWorkingPath()), groupByJob.submit(); log.info("Job %s submitted, status available at: %s", groupByJob.getJobName(), groupByJob.getTrackingURL()); if (groupByJob.getJobID() != null) { JobHelper.writeJobIdToFile(config.getHadoopJobIdFileName(), groupByJob.getJobID().toString()); if (!groupByJob.waitForCompletion(true)) { log.error("Job failed: %s", groupByJob.getJobID()); failureCause = Utils.getFailureMessage(groupByJob, config.JSON_MAPPER); return false; final Job dimSelectionJob = Job.getInstance( new Configuration(),
@Test conf.set("hive.exec.orc.default.compress", "SNAPPY"); Job job = new Job(conf, "orc test"); job.setOutputFormatClass(OrcNewOutputFormat.class); job.setJarByClass(TestNewInputOutputFormat.class); job.setMapperClass(OrcTestMapper2.class); job.setNumReduceTasks(0); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(OrcSerdeRow.class); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); boolean result = job.waitForCompletion(true); assertTrue(result); Path outputFilePath = new Path(outputPath, "part-m-00000"); Reader reader = OrcFile.createReader(outputFilePath, OrcFile.readerOptions(conf).filesystem(localFs)); assertEquals(reader.getCompression(), CompressionKind.SNAPPY); localFs.delete(outputPath, true);
private void doMapReduce(final Class<? extends Test> cmd) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = getConf(); Path inputDir = writeInputFile(conf); conf.set(EvaluationMapTask.CMD_KEY, cmd.getName()); conf.set(EvaluationMapTask.PE_KEY, getClass().getName()); Job job = Job.getInstance(conf); job.setJarByClass(PerformanceEvaluation.class); job.setJobName("HBase Performance Evaluation"); job.setInputFormatClass(PeInputFormat.class); PeInputFormat.setInputPaths(job, inputDir); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(EvaluationMapTask.class); job.setReducerClass(LongSumReducer.class); job.setNumReduceTasks(1); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs")); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.initCredentials(job); job.waitForCompletion(true); }
public Job createSubmittableJob(String[] args) throws IOException { Path partitionsPath = new Path(destPath, PARTITIONS_FILE_NAME); generatePartitions(partitionsPath); Job job = Job.getInstance(getConf(), getConf().get("mapreduce.job.name", "hashTable_" + tableHash.tableName)); Configuration jobConf = job.getConfiguration(); jobConf.setLong(HASH_BATCH_SIZE_CONF_KEY, tableHash.batchSize); job.setJarByClass(HashTable.class); TableMapReduceUtil.initTableMapperJob(tableHash.tableName, tableHash.initScan(), HashMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job); // use a TotalOrderPartitioner and reducers to group region output into hash files job.setPartitionerClass(TotalOrderPartitioner.class); TotalOrderPartitioner.setPartitionFile(jobConf, partitionsPath); job.setReducerClass(Reducer.class); // identity reducer job.setNumReduceTasks(tableHash.numHashFiles); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(ImmutableBytesWritable.class); job.setOutputFormatClass(MapFileOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(destPath, HASH_DATA_DIR)); return job; }
@Override public List<WorkUnit> getWorkunits(SourceState state) { List<String> dirs = Splitter.on(",").splitToList(state.getProp(INPUT_DIRECTORIES_KEY)); String outputBase = state.getProp(OUTPUT_LOCATION); List<WorkUnit> workUnits = Lists.newArrayList(); for (String dir : dirs) { try { Path input = new Path(dir); Path output = new Path(outputBase, input.getName()); WorkUnit workUnit = new WorkUnit(); TaskUtils.setTaskFactoryClass(workUnit, MRTaskFactory.class); Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "WordCount_" + input.getName()); job.setJarByClass(MRTaskFactoryTest.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, input); FileOutputFormat.setOutputPath(job, output); MRTask.serializeJobToState(workUnit, job); workUnits.add(workUnit); } catch (IOException ioe) { log.error("Failed to create MR job for " + dir, ioe); } } return workUnits; }
Job job = Job.getInstance(); job.setInputFormatClass(InFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); job.setMapperClass(Mapper.class); job.setNumReduceTasks(0); setupFileSystems(job.getConfiguration()); createJobInfo(job.getConfiguration(), null)).get(180000); job = Job.getInstance(); setupFileSystems(job.getConfiguration()); job.getConfiguration().set(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, JavaSerialization.class.getName() + "," + WritableSerialization.class.getName()); job.setSortComparatorClass(JavaSerializationComparator.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); job.setNumReduceTasks(2); job.setMapOutputKeyClass(UUID.class);
/** * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against * <code>splitPoints</code>. Cleans up the partitions file after job exists. */ static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints, boolean writeMultipleTables) throws IOException { Configuration conf = job.getConfiguration(); // create the partitions file FileSystem fs = FileSystem.get(conf); String hbaseTmpFsDir = conf.get(HConstants.TEMPORARY_FS_DIRECTORY_KEY, fs.getHomeDirectory() + "/hbase-staging"); Path partitionsPath = new Path(hbaseTmpFsDir, "partitions_" + UUID.randomUUID()); fs.makeQualified(partitionsPath); writePartitions(conf, partitionsPath, splitPoints, writeMultipleTables); fs.deleteOnExit(partitionsPath); // configure job to use it job.setPartitionerClass(TotalOrderPartitioner.class); TotalOrderPartitioner.setPartitionFile(conf, partitionsPath); }
public void checkOutputFormat() throws Exception { Job job = new Job(); WordCountUtil wordCountUtil = new WordCountUtil("trevniMapReduceKeyTest", "part-r-00000"); wordCountUtil.writeLinesFile(); AvroJob.setInputKeySchema(job, STRING); AvroJob.setOutputKeySchema(job, Pair.getPairSchema(STRING,LONG)); job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); FileInputFormat.setInputPaths(job, new Path(wordCountUtil.getDir().toString() + "/in")); FileOutputFormat.setOutputPath(job, new Path(wordCountUtil.getDir().toString() + "/out")); FileOutputFormat.setCompressOutput(job, true); job.setInputFormatClass(AvroKeyInputFormat.class); job.setOutputFormatClass(AvroTrevniKeyOutputFormat.class); job.waitForCompletion(true); wordCountUtil.validateCountsFile(); }