jobConf.setWorkingDirectory(new Path(converterConfig.getDistributedSuccessCache())); final Job job = Job.getInstance(jobConf); job.setInputFormatClass(ConfigInputFormat.class); job.setMapperClass(ConvertingMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); loadedBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_LOADED).getValue(); writtenBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_WRITTEN).getValue(); final Path jobDir = getJobPath(jobID, job.getWorkingDirectory()); final FileSystem fs = jobDir.getFileSystem(job.getConfiguration()); final RemoteIterator<LocatedFileStatus> it = fs.listFiles(jobDir, true); final List<Path> goodPaths = new ArrayList<>(); while (it.hasNext()) { if (locatedFileStatus.isFile()) { final Path myPath = locatedFileStatus.getPath(); if (ConvertingOutputFormat.DATA_SUCCESS_KEY.equals(myPath.getName())) { goodPaths.add(new Path(myPath.getParent(), ConvertingOutputFormat.DATA_FILE_KEY));
@Override protected void handleFailure(Counters counters) throws IOException { try (Connection conn = ConnectionFactory.createConnection(job.getConfiguration())) { TableName tableName = TableName.valueOf(COMMON_TABLE_NAME); CounterGroup g = counters.getGroup("undef"); Iterator<Counter> it = g.iterator(); while (it.hasNext()) { String keyString = it.next().getName(); byte[] key = Bytes.toBytes(keyString); HRegionLocation loc = conn.getRegionLocator(tableName).getRegionLocation(key, true); LOG.error("undefined row " + keyString + ", " + loc); } g = counters.getGroup("unref"); it = g.iterator(); while (it.hasNext()) { String keyString = it.next().getName(); byte[] key = Bytes.toBytes(keyString); HRegionLocation loc = conn.getRegionLocator(tableName).getRegionLocation(key, true); LOG.error("unreferred row " + keyString + ", " + loc); } } } }
public void updateJobCounter() { try { Counters counters = job.getCounters(); if (counters == null) { String errorMsg = "no counters for job " + getMrJobId(); output.append(errorMsg); } else { this.output.append(counters.toString()).append("\n"); logger.debug(counters.toString()); mapInputRecords = String.valueOf(counters.findCounter(TaskCounter.MAP_INPUT_RECORDS).getValue()); rawInputBytesRead = String.valueOf(counters.findCounter(RawDataCounter.BYTES).getValue()); String outputFolder = job.getConfiguration().get("mapreduce.output.fileoutputformat.outputdir", KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory()); logger.debug("outputFolder is " + outputFolder); Path outputPath = new Path(outputFolder); String fsScheme = outputPath.getFileSystem(job.getConfiguration()).getScheme(); long bytesWritten = counters.findCounter(fsScheme, FileSystemCounter.BYTES_WRITTEN).getValue(); if (bytesWritten == 0) { logger.debug("Seems no counter found for " + fsScheme); bytesWritten = counters.findCounter("FileSystemCounters", "HDFS_BYTES_WRITTEN").getValue();
Configuration conf = new Configuration(); conf.set("fs.defaultFS", "file:///"); FileSystem fs = FileSystem.getLocal(conf); fs.delete(new Path(OUT_DIR), true); Job job = Job.getInstance(conf); job.setMapperClass(ValMapper.class); job.setReducerClass(Reducer.class); job.setMapOutputKeyClass(DateCol.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); job.setNumReduceTasks(1); job.getConfiguration().setInt("mapreduce.map.tasks", 2); FileOutputFormat.setOutputPath(job, new Path(OUT_DIR)); DBConfiguration.configureDB(job.getConfiguration(), DRIVER_CLASS, DB_URL, null, null); job.getCounters().findCounter(TaskCounter.REDUCE_OUTPUT_RECORDS) .getValue());
DOMConfigurator.configure("log4j.xml"); FileSystem fs = FileSystem.get(job.getConfiguration()); Path execBasePath = new Path(props.getProperty(ETL_EXECUTION_BASE_PATH)); Path execHistory = new Path(props.getProperty(ETL_EXECUTION_HISTORY_PATH)); if (!fs.exists(execBasePath)) { log.info("The execution base path does not exist. Creating the directory"); fs.mkdirs(execBasePath); if (!fs.exists(execHistory)) { log.info("removing old execution: " + stat.getPath().getName()); ContentSummary execContent = fs.getContentSummary(stat.getPath()); currentCount -= execContent.getFileCount() + execContent.getDirectoryCount(); job.setMapperClass(EtlMapper.class); job.setInputFormatClass(inputFormatClass); Counters counters = job.getCounters(); for (String groupName : counters.getGroupNames()) { CounterGroup group = counters.getGroup(groupName); log.info("Group: " + group.getDisplayName()); for (Counter counter : group) { log.info(counter.getDisplayName() + ":\t" + counter.getValue());
Path outputPath = getOutputPath(); Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); if (fs.exists(outputPath)) { fs.delete(outputPath, true); Job job = Job.getInstance(); job.setMapperClass(GCMapper.class); job.setNumReduceTasks(0); job.getConfiguration().set("io.sort.mb", "25"); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); boolean ret = job.waitForCompletion(true); Counter gcCounter = job.getCounters().findCounter( TaskCounter.GC_TIME_MILLIS); assertNotNull(gcCounter); assertTrue("No time spent in gc", gcCounter.getValue() > 0);
MapCreate.writeCount = 0; Configuration conf = new Configuration(); Job job = new Job(conf, "hcat mapreduce write test"); job.setJarByClass(this.getClass()); job.setMapperClass(HCatMapReduceTest.MapCreate.class); Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput"); createInputFile(path, writeCount); TextInputFormat.setInputPaths(job, path); Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput"); createInputFile(path, writeCount / 2); Path path2 = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput2"); createInputFile(path2, (writeCount - writeCount / 2)); job.getConfiguration().set(HCatConstants.HCAT_DYNAMIC_CUSTOM_PATTERN, customDynamicPathPattern); assertTrue(job.getCounters().getGroup("FileSystemCounters") .findCounter("FILE_BYTES_READ").getValue() > 0);
Path output = new Path(cmdline.getOptionValue(OUTPUT_OPTION)); String pattern = cmdline.getOptionValue(PATTERN_OPTION); LOG.info(" - output: " + output); Job job = Job.getInstance(getConf(), FindWarcUrls.class.getSimpleName() + ":" + input); job.setJarByClass(FindWarcUrls.class); job.setNumReduceTasks(1); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, output); job.setInputFormatClass(WacWarcInputFormat.class); job.setMapperClass(MyMapper.class); job.getConfiguration().set(PATTERN_OPTION, pattern); FileSystem fs = FileSystem.get(getConf()); if ( FileSystem.get(getConf()).exists(output)) { fs.delete(output, true); Counters counters = job.getCounters(); int numDocs = (int) counters.findCounter(Records.TOTAL).getValue(); LOG.info("Read " + numDocs + " records.");
throws IOException, ClassNotFoundException, InterruptedException { Configuration con = new Configuration(baseConf); con.setBoolean(EMIT_UNIGRAMS, emitUnigrams); con.setInt(CollocMapper.MAX_SHINGLE_SIZE, maxNGramSize); con.setInt(CollocReducer.MIN_SUPPORT, minSupport); Job job = new Job(con); job.setJobName(CollocDriver.class.getSimpleName() + ".generateCollocations:" + input); job.setJarByClass(CollocDriver.class); job.setMapOutputKeyClass(GramKey.class); Path outputPath = new Path(output, SUBGRAM_OUTPUT_DIRECTORY); FileOutputFormat.setOutputPath(job, outputPath); job.setInputFormatClass(SequenceFileInputFormat.class); return job.getCounters().findCounter(CollocMapper.Count.NGRAM_TOTAL).getValue();
job = Job.getInstance( new Configuration(), StringUtils.format("%s-index-generator-%s", config.getDataSource(), config.getIntervals()) ); job.getConfiguration().set("io.sort.record.percent", "0.23"); JobHelper.injectDruidProperties(job.getConfiguration(), config.getAllowedHadoopPrefix()); job.setMapperClass(IndexGeneratorMapper.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(IndexGeneratorOutputFormat.class); FileOutputFormat.setOutputPath(job, config.makeIntermediatePath()); log.info("No counters found for job [%s]", job.getJobName()); } else { Counter invalidRowCount = counters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER); if (invalidRowCount != null) { jobStats.setInvalidRowCount(invalidRowCount.getValue()); } else { log.info("No invalid row counter found for job [%s]", job.getJobName());
LOG.info(" - docno mapping file: " + mappingFile); Job job = new Job(getConf(), CountMedlineCitations.class.getSimpleName() + ":" + inputPath); job.setJarByClass(CountMedlineCitations.class); job.setNumReduceTasks(0); job.getConfiguration().set("DocnoMappingClass", MedlineDocnoMapping.class.getCanonicalName()); DistributedCache.addCacheFile(new URI(mappingFile), job.getConfiguration()); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); FileOutputFormat.setCompressOutput(job, false); job.setInputFormatClass(MedlineCitationInputFormat.class); FileSystem.get(job.getConfiguration()).delete(new Path(outputPath), true); job.waitForCompletion(true); Counters counters = job.getCounters(); int numDocs = (int) counters.findCounter(Count.DOCS).getValue(); LOG.info("Read " + numDocs + " docs.");
@SuppressWarnings("deprecation") @Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args) .getRemainingArgs(); Job job = new Job(conf, "StackOverflow Number of Users by State"); job.setJarByClass(CountNumUsersByState.class); job.setMapperClass(CountNumUsersByStateMapper.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); Path outputDir = new Path(otherArgs[1]); FileOutputFormat.setOutputPath(job, outputDir); boolean success = job.waitForCompletion(true); for (Counter counter : job.getCounters().getGroup( CountNumUsersByStateMapper.STATE_COUNTER_GROUP)) { System.out.println(counter.getDisplayName() + "\t" + counter.getValue()); FileSystem.get(conf).delete(outputDir);
CompactionPathParser.CompactionParserResult result = new CompactionPathParser(state).parse(dataset); Path tmpPath = configurator.getMrOutputPath(); Path dstPath = new Path (result.getDstAbsoluteDir()); long oldTotalRecords = helper.readRecordCount(new Path (result.getDstAbsoluteDir())); long executeCount = helper.readExecutionCount (new Path (result.getDstAbsoluteDir())); Path outPath = new Path (dstPath, fileName); if (!this.fs.rename(filePath, outPath)) { throw new IOException( String.format("Unable to move %s to %s", filePath.toString(), outPath.toString())); this.fs.delete(dstPath, true); FsPermission permission = HadoopUtils.deserializeFsPermission(this.state, MRCompactorJobRunner.COMPACTION_JOB_OUTPUT_DIR_PERMISSION, WriterUtils.mkdirsWithRecursivePermission(this.fs, dstPath.getParent(), permission); if (!this.fs.rename(tmpPath, dstPath)) { throw new IOException( String.format("Unable to move %s to %s", tmpPath, dstPath)); Counter counter = job.getCounters().findCounter(AvroKeyMapper.EVENT_COUNTER.RECORD_COUNT); newTotalRecords = counter.getValue();
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: wordmedian <in> <out>"); return 0; } setConf(new Configuration()); Configuration conf = getConf(); Job job = Job.getInstance(conf, "word median"); job.setJarByClass(WordMedian.class); job.setMapperClass(WordMedianMapper.class); job.setCombinerClass(WordMedianReducer.class); job.setReducerClass(WordMedianReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); boolean result = job.waitForCompletion(true); // Wait for JOB 1 -- get middle value to check for Median long totalWords = job.getCounters() .getGroup(TaskCounter.class.getCanonicalName()) .findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue(); int medianIndex1 = (int) Math.ceil((totalWords / 2.0)); int medianIndex2 = (int) Math.floor((totalWords / 2.0)); median = readAndFindMedian(args[1], medianIndex1, medianIndex2, conf); return (result ? 0 : 1); }
String targetNode) throws Exception { Configuration conf = new Configuration(); conf.set(TARGET_NODE, targetNode); Job job = new Job(conf); job.setJarByClass(Main.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); FileOutputFormat.setOutputPath(job, outputPath); if (!job.waitForCompletion(true)) { Counter counter = job.getCounters() .findCounter(Reduce.PathCounter.TARGET_NODE_DISTANCE_COMPUTED); if(counter != null && counter.getValue() > 0) { CounterGroup group = job.getCounters().getGroup(Reduce.PathCounter.PATH.toString()); Iterator<Counter> iter = group.iterator(); iter.hasNext(); String path = iter.next().getName(); System.out.println("=========================================="); System.out.println("= Shortest path found, details as follows.");
final Job job = Job.getInstance(conf); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(TestCountingMapper.class); job.setCombinerClass(TestCountingCombiner.class); FileInputFormat.setInputPaths(job, new Path("igfs://" + igfsName + "@" + PATH_INPUT)); FileOutputFormat.setOutputPath(job, new Path("igfs://" + igfsName + "@" + PATH_OUTPUT)); job.submit(); final Counter cntr = job.getCounters().findCounter(TestCounter.COUNTER1); assertEquals(0, cntr.getValue()); cntr.increment(10); assertEquals(10, cntr.getValue()); assertEquals("wrong counters count", 3, counters.countCounters()); assertEquals("wrong counter value", 15, counters.findCounter(TestCounter.COUNTER1).getValue()); assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER2).getValue()); assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER3).getValue());
Job job = new Job(conf); job.setJobName(jobName); job.setJarByClass(getClass()); job, true, new Path(restoreDir) ); Counters counters = job.getCounters(); long numRows = counters.findCounter(ScanCounter.NUM_ROWS).getValue(); long numCells = counters.findCounter(ScanCounter.NUM_CELLS).getValue(); long totalBytes = counters.findCounter(HBASE_COUNTER_GROUP_NAME, "BYTES_IN_RESULTS").getValue(); double throughput = (double)totalBytes / scanTimer.elapsed(TimeUnit.SECONDS); double throughputRows = (double)numRows / scanTimer.elapsed(TimeUnit.SECONDS);
public static double calcPageRank(Path inputPath, Path outputPath, int numNodes) throws Exception { Configuration conf = new Configuration(); conf.setInt(Reduce.CONF_NUM_NODES_GRAPH, numNodes); Job job = new Job(conf); job.setJarByClass(Main.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); if (!job.waitForCompletion(true)) { throw new Exception("Job failed"); } long summedConvergence = job.getCounters().findCounter( Reduce.Counter.CONV_DELTAS).getValue(); double convergence = ((double) summedConvergence / Reduce.CONVERGENCE_SCALING_FACTOR) / (double) numNodes; System.out.println("======================================"); System.out.println("= Num nodes: " + numNodes); System.out.println("= Summed convergence: " + summedConvergence); System.out.println("= Convergence: " + convergence); System.out.println("======================================"); return convergence; }
public static String[] takeSample(Path[] files, OperationsParams params) throws IOException, ClassNotFoundException, InterruptedException { FileSystem fs = files[0].getFileSystem(params); Path tempPath; do { tempPath = new Path(String.format("temp_sample_%06d", (int)(Math.random()*1000000))); } while (fs.exists(tempPath)); Job job = sampleMapReduce(files, tempPath, params); job.waitForCompletion(false); int outputSize = (int) job.getCounters().findCounter(TaskCounter.MAP_OUTPUT_RECORDS).getValue(); // Read the file back String[] lines = Head.head(fs, tempPath, outputSize); // Delete the temporary path with all its contents fs.delete(tempPath, true); return lines; }
private void doVerify(Configuration conf, HTableDescriptor htd) throws Exception { Path outputDir = new Path(HBaseTestUtil.getMROutputDir(TEST_NAME), "verify-output"); Job job = new Job(conf); job.setJarByClass(this.getClass()); job.setJobName(TEST_NAME + " Verification for " + htd.getNameAsString()); Scan scan = new Scan(); TableMapReduceUtil.initTableMapperJob( htd.getNameAsString(), scan, VerifyMapper.class, BytesWritable.class, BytesWritable.class, job); int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING); TableMapReduceUtil.setScannerCaching(job, SCANNER_CACHING); job.setReducerClass(VerifyReducer.class); job.setNumReduceTasks(NUM_REDUCE_TASKS); FileOutputFormat.setOutputPath(job, outputDir); assertTrue(job.waitForCompletion(true)); long numOutputRecords = job.getCounters().findCounter(TaskCounter.REDUCE_OUTPUT_RECORDS).getValue(); assertEquals(0, numOutputRecords); }