Refine search
@Test public void testAvroSpecificOutput() throws Exception { Job job = new Job(); FileInputFormat.setInputPaths(job, new Path(getClass() .getResource("/org/apache/avro/mapreduce/mapreduce-test-input.txt") .toURI().toString())); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(LineCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(AvroKeyOutputFormat.class); Path outputPath = new Path(DIR.getRoot().getPath() + "/testAvroSpecificOutput"); outputPath.getFileSystem(job.getConfiguration()).delete(outputPath); FileOutputFormat.setOutputPath(job, outputPath); Assert.assertTrue(job.waitForCompletion(true)); FileSystem fileSystem = FileSystem.get(job.getConfiguration()); FileStatus[] outputFiles = fileSystem.globStatus(outputPath.suffix("/myavro3-*")); Assert.assertEquals(1, outputFiles.length); Map<String, Integer> counts = new HashMap<>();
this.isSpeculativeEnabled = isSpeculativeExecutionEnabled(HadoopUtils.getStateFromConf(context.getConfiguration()).getProperties()); this.fs = FileSystem.get(context.getConfiguration()); this.taskStateStore = new FsStateStore<>(this.fs, FileOutputFormat.getOutputPath(context).toUri().getPath(), TaskState.class); String jobStateFileName = context.getConfiguration().get(ConfigurationKeys.JOB_STATE_DISTRIBUTED_CACHE_NAME); boolean foundStateFile = false; for (Path dcPath : DistributedCache.getLocalCacheFiles(context.getConfiguration())) { if (dcPath.getName().equals(jobStateFileName)) { SerializationUtils.deserializeStateFromInputStream( closer.register(new FileInputStream(dcPath.toUri().getPath())), this.jobState); foundStateFile = true; break; configuration.set(entry.getKey(), entry.getValue().unwrapped().toString()); configuration.get(ConfigurationKeys.METRICS_ENABLED_KEY, ConfigurationKeys.DEFAULT_METRICS_ENABLED))) { this.jobMetrics = Optional.of(JobMetrics.get(this.jobState)); this.jobMetrics.get()
parseOptions(options, args); job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME)); String cubeName = getOptionValue(OPTION_CUBE_NAME); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); CubeInstance cube = cubeMgr.getCube(cubeName); job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_ID, segmentId); logger.info("Starting: " + job.getJobName()); job.getConfiguration().addResource(new Path(jobEngineConfig.getHadoopJobConfFilePath(null))); KafkaConsumerProperties kafkaConsumerProperties = KafkaConsumerProperties.getInstanceFromEnv(); job.getConfiguration().addResource(new Path(kafkaConsumerProperties.getKafkaConsumerHadoopJobConf())); appendKafkaOverrideProperties(cube.getConfig(), job.getConfiguration()); job.getConfiguration().set(CONFIG_KAFKA_BROKERS, brokers); setupMapper(cube.getSegmentById(segmentId)); job.setNumReduceTasks(0); FileOutputFormat.setOutputPath(job, output); FileOutputFormat.setCompressOutput(job, true); org.apache.log4j.Logger.getRootLogger().info("Output hdfs location: " + output); org.apache.log4j.Logger.getRootLogger().info("Output hdfs compression: " + true);
/** * Constructor. * @param context The TaskAttempContext to supply the writer with information form the job configuration */ public AvroTrevniRecordWriterBase(TaskAttemptContext context) throws IOException { schema = initSchema(context); meta = filterMetadata(context.getConfiguration()); writer = new AvroColumnWriter<>(schema, meta, ReflectData.get()); Path outputPath = FileOutputFormat.getOutputPath(context); String dir = FileOutputFormat.getUniqueFile(context, "part", ""); dirPath = new Path(outputPath.toString() + "/" + dir); fs = dirPath.getFileSystem(context.getConfiguration()); fs.mkdirs(dirPath); blockSize = fs.getDefaultBlockSize(); }
public void checkOutputFormat() throws Exception { Job job = new Job(); WordCountUtil wordCountUtil = new WordCountUtil("trevniMapReduceKeyTest", "part-r-00000"); wordCountUtil.writeLinesFile(); AvroJob.setInputKeySchema(job, STRING); AvroJob.setOutputKeySchema(job, Pair.getPairSchema(STRING,LONG)); job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); FileInputFormat.setInputPaths(job, new Path(wordCountUtil.getDir().toString() + "/in")); FileOutputFormat.setOutputPath(job, new Path(wordCountUtil.getDir().toString() + "/out")); FileOutputFormat.setCompressOutput(job, true); job.setInputFormatClass(AvroKeyInputFormat.class); job.setOutputFormatClass(AvroTrevniKeyOutputFormat.class); job.waitForCompletion(true); wordCountUtil.validateCountsFile(); }
LOGGER.info("Starting {}", getClass().getSimpleName()); if (fs.exists(outputDir)) { LOGGER.warn("Found the output folder {}, deleting it", _outputDir); fs.delete(outputDir, true); fs.mkdirs(outputDir); Job job = Job.getInstance(getConf()); job.setJarByClass(SegmentCreationJob.class); job.setJobName(_jobName); job.getConfiguration().set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); job.setMapOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(_stagingDir + "/input/")); FileOutputFormat.setOutputPath(job, new Path(_stagingDir + "/output/")); job.getConfiguration().setInt(JobContext.NUM_MAPS, inputDataFiles.size()); if (_dataSchema != null) { job.getConfiguration().set(JobConfigConstants.SCHEMA, _dataSchema.toString()); setOutputPath(job.getConfiguration());
/** * Prepare job with mappers to cancel. * @return Fully configured job. * @throws Exception If fails. */ private Configuration prepareJobForCancelling() throws Exception { prepareFile("/testFile", 1500); executedTasks.set(0); cancelledTasks.set(0); failMapperId.set(0); splitsCount.set(0); Configuration cfg = new Configuration(); setupFileSystems(cfg); Job job = Job.getInstance(cfg); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(CancellingTestMapper.class); job.setNumReduceTasks(0); job.setInputFormatClass(InFormat.class); FileInputFormat.setInputPaths(job, new Path("igfs://" + igfsName + "@/")); FileOutputFormat.setOutputPath(job, new Path("igfs://" + igfsName + "@/output/")); job.setJarByClass(getClass()); return job.getConfiguration(); }
job.setMapperClass(CellSortImporter.class); job.setReducerClass(CellReducer.class); Path outputDir = new Path(hfileOutPath); FileOutputFormat.setOutputPath(job, outputDir); job.setMapOutputKeyClass(CellWritableComparable.class); job.setMapOutputValueClass(MapReduceExtendedCell.class); RawComparator.class); Path partitionsPath = new Path(TotalOrderPartitioner.getPartitionFile(job.getConfiguration())); FileSystem fs = FileSystem.get(job.getConfiguration()); fs.deleteOnExit(partitionsPath); job.setPartitionerClass(CellWritableComparablePartitioner.class); job.setNumReduceTasks(regionLocator.getStartKeys().length); job.setReducerClass(CellSortReducer.class); Path outputDir = new Path(hfileOutPath); FileOutputFormat.setOutputPath(job, outputDir); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(MapReduceExtendedCell.class);
@Test conf.set("hive.io.file.read.all.columns", "false"); conf.set("hive.io.file.readcolumn.ids", "1,3"); Job job = new Job(conf, "orc test"); job.setInputFormatClass(OrcNewInputFormat.class); job.setJarByClass(TestNewInputOutputFormat.class); job.setMapperClass(OrcTestMapper1.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(HiveTestUtils .getFileFromClasspath("orc-file-11-format.orc"))); Path outputPath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".txt"); localFs.delete(outputPath, true); FileOutputFormat.setOutputPath(job, outputPath); boolean result = job.waitForCompletion(true); assertTrue(result); Path outputFilePath = new Path(outputPath, "part-m-00000"); new InputStreamReader(localFs.open(outputFilePath))); String line=reader.readLine(); "null, null, null, null, null, null, null}"); localFs.delete(outputPath, true);
private void runLinkedListMRJob(int iteration) throws Exception { String jobName = IntegrationTestBulkLoad.class.getSimpleName() + " - " + EnvironmentEdgeManager.currentTime(); Configuration conf = new Configuration(util.getConfiguration()); Path p = null; if (conf.get(ImportTsv.BULK_OUTPUT_CONF_KEY) == null) { p = util.getDataTestDirOnTestFS(getTablename() + "-" + iteration); } else { p = new Path(conf.get(ImportTsv.BULK_OUTPUT_CONF_KEY)); conf.setInt(ROUND_NUM_KEY, iteration); Job job = new Job(conf); job.setJobName(jobName); job.setInputFormatClass(ITBulkLoadInputFormat.class); FileOutputFormat.setOutputPath(job, p); try (Connection conn = ConnectionFactory.createConnection(conf); Admin admin = conn.getAdmin(); util.getTestFileSystem().delete(p, true);
Configuration conf = new Configuration(); String numMaps = new GenericOptionsParser(conf, args).getRemainingArgs()[0]; conf.set(MRJobConfig.NUM_MAPS, numMaps); createHdfsFilesystem(conf); Job job = Job.getInstance(conf, "MapReduceIntegrationChecker"); job.setJarByClass(MapReduceIntegrationChecker.class); job.setMapperClass(CheckerMapper.class); job.setCombinerClass(CheckerReducer.class); job.setReducerClass(CheckerReducer.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(EmptyInputFormat.class); FileOutputFormat.setOutputPath(job, mOutputFilePath); : (resultStatus.equals(Status.FAIL_TO_FIND_CLASS) ? 2 : 1); } finally { if (mFileSystem.exists(mOutputFilePath)) { mFileSystem.delete(mOutputFilePath, true); mFileSystem.close();
job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME)); logger.info("Starting: " + job.getJobName()); attachSegmentMetadataWithAll(cubeSeg, job.getConfiguration()); job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_ID, segmentID); job.getConfiguration().set(BatchConstants.CFG_CUBOID_MODE, cuboidModeName); job.getConfiguration().set(BatchConstants.CFG_UPDATE_SHARD, ifNeedUpdateBaseCuboidShard); FileInputFormat.setInputPaths(job, new Path(input)); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputValueClass(Text.class); Path outputPath = new Path(output); FileOutputFormat.setOutputPath(job, outputPath); HadoopUtil.deletePath(job.getConfiguration(), outputPath);
/** * Sets up the actual job. * * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { String tableName = args[0]; Path outputDir = new Path(args[1]); String reportSeparatorString = (args.length > 2) ? args[2]: ":"; conf.set("ReportSeparator", reportSeparatorString); Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName)); job.setJarByClass(CellCounter.class); Scan scan = getConfiguredScanForJob(conf, args); TableMapReduceUtil.initTableMapperJob(tableName, scan, CellCounterMapper.class, ImmutableBytesWritable.class, Result.class, job); job.setNumReduceTasks(1); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(job, outputDir); job.setReducerClass(IntSumReducer.class); return job; }
/** * @throws Exception If failed. */ @Test public void testSimpleTaskSubmit() throws Exception { String testInputFile = "/test"; prepareTestFile(testInputFile); Configuration cfg = new Configuration(); setupFileSystems(cfg); Job job = Job.getInstance(cfg); job.setMapperClass(TestMapper.class); job.setCombinerClass(TestReducer.class); job.setReducerClass(TestReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setNumReduceTasks(1); FileInputFormat.setInputPaths(job, new Path("igfs://:" + getTestIgniteInstanceName(0) + "@/" + testInputFile)); FileOutputFormat.setOutputPath(job, new Path("igfs://:" + getTestIgniteInstanceName(0) + "@/output")); job.setJarByClass(getClass()); IgniteInternalFuture<?> fut = grid(0).hadoop().submit(new HadoopJobId(UUID.randomUUID(), 1), createJobInfo(job.getConfiguration(), null)); fut.get(); }
groupByJob = Job.getInstance( new Configuration(), StringUtils.format("%s-determine_partitions_groupby-%s", config.getDataSource(), config.getIntervals()) ); config.addJobProperties(groupByJob); groupByJob.setMapperClass(DeterminePartitionsGroupByMapper.class); groupByJob.setMapOutputKeyClass(BytesWritable.class); groupByJob.setMapOutputValueClass(NullWritable.class); groupByJob.setCombinerClass(DeterminePartitionsGroupByReducer.class); FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir()); groupByJob.submit(); dimSelectionJob.getConfiguration().set("io.sort.record.percent", "0.19"); FileOutputFormat.setOutputPath(dimSelectionJob, config.makeIntermediatePath()); dimSelectionJob.submit(); final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(segmentGranularity); if (fileSystem == null) { fileSystem = partitionInfoPath.getFileSystem(dimSelectionJob.getConfiguration());
@Test conf.set("hive.exec.orc.default.compress", "SNAPPY"); Job job = new Job(conf, "orc test"); job.setOutputFormatClass(OrcNewOutputFormat.class); job.setJarByClass(TestNewInputOutputFormat.class); job.setMapperClass(OrcTestMapper2.class); job.setNumReduceTasks(0); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(OrcSerdeRow.class); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); boolean result = job.waitForCompletion(true); assertTrue(result); Path outputFilePath = new Path(outputPath, "part-m-00000"); Reader reader = OrcFile.createReader(outputFilePath, OrcFile.readerOptions(conf).filesystem(localFs)); assertEquals(reader.getCompression(), CompressionKind.SNAPPY); localFs.delete(outputPath, true);
/** * Tests an MR Scan initialized from properties set in the Configuration. */ protected void testScanFromConfiguration(String start, String stop, String last) throws IOException, InterruptedException, ClassNotFoundException { String jobName = "ScanFromConfig" + (start != null ? start.toUpperCase(Locale.ROOT) : "Empty") + "To" + (stop != null ? stop.toUpperCase(Locale.ROOT) : "Empty"); Configuration c = new Configuration(TEST_UTIL.getConfiguration()); c.set(TableInputFormat.INPUT_TABLE, TABLE_NAME.getNameAsString()); c.set(TableInputFormat.SCAN_COLUMN_FAMILY, Bytes.toString(INPUT_FAMILYS[0]) + ", " + Bytes.toString(INPUT_FAMILYS[1])); c.set(KEY_STARTROW, start != null ? start : ""); c.set(KEY_LASTROW, last != null ? last : ""); if (start != null) { c.set(TableInputFormat.SCAN_ROW_START, start); } if (stop != null) { c.set(TableInputFormat.SCAN_ROW_STOP, stop); } Job job = Job.getInstance(c, jobName); job.setMapperClass(ScanMapper.class); job.setReducerClass(ScanReducer.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(ImmutableBytesWritable.class); job.setInputFormatClass(TableInputFormat.class); job.setNumReduceTasks(1); FileOutputFormat.setOutputPath(job, new Path(job.getJobName())); TableMapReduceUtil.addDependencyJars(job); assertTrue(job.waitForCompletion(true)); }
parseOptions(options, args); job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME)); String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(Locale.ROOT); String segmentID = getOptionValue(OPTION_SEGMENT_ID); Path input = new Path(getOptionValue(OPTION_INPUT_PATH)); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); CubeSegment originalSegment = cube.getOriginalSegmentToOptimize(optSegment); logger.info("Starting: " + job.getJobName()); job.setMapperClass(UpdateOldCuboidShardMapper.class); job.setInputFormatClass(SequenceFileInputFormat.class); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output); job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_ID, segmentID); attachSegmentsMetadataWithDict(Lists.newArrayList(optSegment, originalSegment), job.getConfiguration());
public Job createSubmittableJob(String[] args) throws IOException { Path partitionsPath = new Path(destPath, PARTITIONS_FILE_NAME); generatePartitions(partitionsPath); Job job = Job.getInstance(getConf(), getConf().get("mapreduce.job.name", "hashTable_" + tableHash.tableName)); Configuration jobConf = job.getConfiguration(); jobConf.setLong(HASH_BATCH_SIZE_CONF_KEY, tableHash.batchSize); job.setJarByClass(HashTable.class); TableMapReduceUtil.initTableMapperJob(tableHash.tableName, tableHash.initScan(), HashMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job); // use a TotalOrderPartitioner and reducers to group region output into hash files job.setPartitionerClass(TotalOrderPartitioner.class); TotalOrderPartitioner.setPartitionFile(jobConf, partitionsPath); job.setReducerClass(Reducer.class); // identity reducer job.setNumReduceTasks(tableHash.numHashFiles); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(ImmutableBytesWritable.class); job.setOutputFormatClass(MapFileOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(destPath, HASH_DATA_DIR)); return job; }
@Override public List<WorkUnit> getWorkunits(SourceState state) { List<String> dirs = Splitter.on(",").splitToList(state.getProp(INPUT_DIRECTORIES_KEY)); String outputBase = state.getProp(OUTPUT_LOCATION); List<WorkUnit> workUnits = Lists.newArrayList(); for (String dir : dirs) { try { Path input = new Path(dir); Path output = new Path(outputBase, input.getName()); WorkUnit workUnit = new WorkUnit(); TaskUtils.setTaskFactoryClass(workUnit, MRTaskFactory.class); Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "WordCount_" + input.getName()); job.setJarByClass(MRTaskFactoryTest.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, input); FileOutputFormat.setOutputPath(job, output); MRTask.serializeJobToState(workUnit, job); workUnits.add(workUnit); } catch (IOException ioe) { log.error("Failed to create MR job for " + dir, ioe); } } return workUnits; }