public void run(JobConf conf) throws Exception { _runningJob = new JobClient(conf).submitJob(conf); info("See " + _runningJob.getTrackingURL() + " for details."); _runningJob.waitForCompletion(); if(!_runningJob.isSuccessful()) { throw new Exception("Hadoop job:" + getId() + " failed!"); } // dump all counters Counters counters = _runningJob.getCounters(); for(String groupName: counters.getGroupNames()) { Counters.Group group = counters.getGroup(groupName); info("Group: " + group.getDisplayName()); for(Counter counter: group) info(counter.getDisplayName() + ":\t" + counter.getValue()); } }
process.inputClient.configure (TaskType.MAP, job.get(AvroJob.INPUT_SCHEMA), AvroJob.getMapOutputSchema(job).toString()); process.inputClient.partitions(job.getNumReduceTasks()); while (recordReader.next(data, NullWritable.get())) { process.inputClient.input(data.buffer(), data.count()); inputRecordCounter.increment(data.count()-1); if (process.outputService.isFinished()) break;
private Map<String, Double> extractAllCounterValues(Counters counters) { Map<String, Double> exctractedCounters = new HashMap<String, Double>(); for (Counters.Group cg : counters) { for (Counter c : cg) { exctractedCounters.put(cg.getName() + "::" + c.getName(), new Double(c.getCounter())); } } return exctractedCounters; }
sLogger.info(" - number of reducers: " + 0); Path inputDocumentFiles = new Path(inputDocument); Path outputDocumentFiles = new Path(outputDocument); Path termIndexPath = new Path(termIndex); Path titleIndexPath = new Path(titleIndex); JobConf conf = new JobConf(configuration, ParseCorpus.class); FileSystem fs = FileSystem.get(conf); conf.setJobName(ParseCorpus.class.getSimpleName() + " - index document"); Preconditions.checkArgument(fs.exists(termIndexPath), "Missing term index files..."); DistributedCache.addCacheFile(termIndexPath.toUri(), conf); Preconditions.checkArgument(fs.exists(titleIndexPath), "Missing title index files..."); DistributedCache.addCacheFile(titleIndexPath.toUri(), conf); conf.setNumMapTasks(numberOfMappers); conf.setNumReduceTasks(0); conf.setMapperClass(IndexDocumentMapper.class); Counters counters = job.getCounters(); int collapsedDocuments = (int) counters.findCounter(MyCounter.COLLAPSED_DOCUMENTS).getCounter(); sLogger.info("Total number of collapsed documnts: " + collapsedDocuments); int leftOverDocuments = (int) counters.findCounter(MyCounter.LEFT_OVER_DOCUMENTS).getCounter(); sLogger.info("Total number of left-over documents: " + leftOverDocuments);
public int runTool (Configuration config, String collectionPath, String outputPath, String indexFile, String mappingFile) throws Exception { JobConf conf = new JobConf (config, DemoCountAquaint2Documents.class); FileSystem fs = FileSystem.get (config); sLogger.info(" - mapping file: " + mappingFile); conf.setJobName("BuildAquaint2ForwardIndex"); conf.set("mapred.child.java.opts", "-Xmx1024m"); conf.setNumReduceTasks(1); FileInputFormat.setInputPaths(conf, new Path(collectionPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); FileOutputFormat.setCompressOutput(conf, false); FileSystem.get(conf).delete(new Path(outputPath), true); RunningJob job = JobClient.runJob(conf); Counters counters = job.getCounters(); int numDocs = (int) counters.findCounter(Count.DOCS).getCounter();
sLogger.info(" - maximum document count: " + maximumDocumentCount); Path inputTermFiles = new Path(inputTerms); Path outputTermFile = new Path(outputTerm); JobConf conf = new JobConf(configuration, ParseCorpus.class); FileSystem fs = FileSystem.get(conf); conf.setJobName(ParseCorpus.class.getSimpleName() + " - index term"); conf.setFloat("corpus.maximum.document.count", maximumDocumentCount); fs.rename(new Path(outputString + Path.SEPARATOR + "part-00000"), outputTermFile); sLogger.info("Successfully index all the terms at " + outputTermFile); Counters counters = job.getCounters(); int lowDocumentFrequencyTerms = (int) counters.findCounter( MyCounter.LOW_DOCUMENT_FREQUENCY_TERMS).getCounter(); sLogger.info("Removed " + lowDocumentFrequencyTerms + " low frequency terms."); int highDocumentFrequencyTerms = (int) counters.findCounter( MyCounter.HIGH_DOCUMENT_FREQUENCY_TERMS).getCounter(); sLogger.info("Removed " + highDocumentFrequencyTerms + " high frequency terms."); int leftOverTerms = (int) counters.findCounter(MyCounter.LEFT_OVER_TERMS).getCounter(); sLogger.info("Total number of left-over terms: " + leftOverTerms); } finally {
@SuppressWarnings("unchecked") private <T> T getSplitDetails(Path file, long offset) throws IOException { FileSystem fs = file.getFileSystem(conf); FSDataInputStream inFile = fs.open(file); inFile.seek(offset); String className = Text.readString(inFile); Class<T> cls; try { cls = (Class<T>) conf.getClassByName(className); } catch (ClassNotFoundException ce) { IOException wrap = new IOException("Split class " + className + " not found"); wrap.initCause(ce); throw wrap; } SerializationFactory factory = new SerializationFactory(conf); Deserializer<T> deserializer = (Deserializer<T>) factory.getDeserializer(cls); deserializer.open(inFile); T split = deserializer.deserialize(null); long pos = inFile.getPos(); getCounters().findCounter( TaskCounter.SPLIT_RAW_BYTES).increment(pos - offset); inFile.close(); return split; }
@SuppressWarnings({ "deprecation", "unchecked" }) public OldTrackingRecordWriter(ReduceTask reduce, JobConf job, TaskReporter reporter, String finalName) throws IOException { this.reduceOutputCounter = reduce.reduceOutputCounter; this.fileOutputByteCounter = reduce.fileOutputByteCounter; List<Statistics> matchedStats = null; if (job.getOutputFormat() instanceof FileOutputFormat) { matchedStats = getFsStatistics(FileOutputFormat.getOutputPath(job), job); } fsStats = matchedStats; FileSystem fs = FileSystem.get(job); long bytesOutPrev = getOutputBytes(fsStats); this.real = job.getOutputFormat().getRecordWriter(fs, job, finalName, reporter); long bytesOutCurr = getOutputBytes(fsStats); fileOutputByteCounter.increment(bytesOutCurr - bytesOutPrev); }
void testInputFormat(Class<? extends InputFormat> clazz) throws IOException { Configuration conf = UTIL.getConfiguration(); final JobConf job = new JobConf(conf); job.setInputFormat(clazz); job.setOutputFormat(NullOutputFormat.class); job.setMapperClass(ExampleVerifier.class); job.setNumReduceTasks(0); LOG.debug("submitting job."); final RunningJob run = JobClient.runJob(job); assertTrue("job failed!", run.isSuccessful()); assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getCounter()); assertEquals("Saw any instances of the filtered out row.", 0, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getCounter()); assertEquals("Saw the wrong number of instances of columnA.", 1, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getCounter()); assertEquals("Saw the wrong number of instances of columnB.", 1, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getCounter()); assertEquals("Saw the wrong count of values for the filtered-for row.", 2, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getCounter()); assertEquals("Saw the wrong count of values for the filtered-out row.", 0, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getCounter()); }
@SuppressWarnings( { "unchecked" }) public void testMergeShouldReturnProperProgress( List<Segment<Text, Text>> segments) throws IOException { Path tmpDir = new Path("localpath"); Class<Text> keyClass = (Class<Text>) jobConf.getMapOutputKeyClass(); Class<Text> valueClass = (Class<Text>) jobConf.getMapOutputValueClass(); RawComparator<Text> comparator = jobConf.getOutputKeyComparator(); Counter readsCounter = new Counter(); Counter writesCounter = new Counter(); Progress mergePhase = new Progress(); RawKeyValueIterator mergeQueue = Merger.merge(conf, fs, keyClass,
@Test public void testShufflePermissions() throws Exception { JobConf conf = new JobConf(); conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "077"); conf.set(MRConfig.LOCAL_DIR, TEST_ROOT_DIR.getAbsolutePath()); MapOutputFile mof = new MROutputFiles(); mof.setConf(conf); doReturn(new Progress()).when(mockTask).getSortPhase(); TaskReporter mockReporter = mock(TaskReporter.class); doReturn(new Counter()).when(mockReporter).getCounter( any(TaskCounter.class)); MapOutputCollector.Context ctx = new MapOutputCollector.Context(mockTask, mob.close(); Path outputFile = mof.getOutputFile(); FileSystem lfs = FileSystem.getLocal(conf); FsPermission perms = lfs.getFileStatus(outputFile).getPermission(); Assert.assertEquals("Incorrect output file perms", (short)0640, perms.toShort()); Path indexFile = mof.getOutputIndexFile(); perms = lfs.getFileStatus(indexFile).getPermission(); Assert.assertEquals("Incorrect index file perms", (short)0640, perms.toShort());
private static void runJvmReuseTest(JobConf job, boolean reuse) throws IOException { // setup a map-only job that reads the input and only sets the counters // based on how many times the jvm was reused. job.setInt(JobContext.JVM_NUMTASKS_TORUN, reuse ? -1 : 1); FileInputFormat.setInputPaths(job, SORT_INPUT_PATH); job.setInputFormat(SequenceFileInputFormat.class); job.setOutputFormat(NullOutputFormat.class); job.setMapperClass(ReuseDetector.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumMapTasks(24); job.setNumReduceTasks(0); RunningJob result = JobClient.runJob(job); long uses = result.getCounters().findCounter("jvm", "use").getValue(); int maps = job.getNumMapTasks(); if (reuse) { assertTrue("maps = " + maps + ", uses = " + uses, maps < uses); } else { assertEquals("uses should be number of maps", job.getNumMapTasks(), uses); } }
/** Verify that at least one segment does not hit disk */ public void testReduceFromPartialMem() throws Exception { final int MAP_TASKS = 7; JobConf job = mrCluster.createJobConf(); job.setNumMapTasks(MAP_TASKS); job.setInt(JobContext.REDUCE_MERGE_INMEM_THRESHOLD, 0); job.set(JobContext.REDUCE_INPUT_BUFFER_PERCENT, "1.0"); job.setInt(JobContext.SHUFFLE_PARALLEL_COPIES, 1); job.setInt(JobContext.IO_SORT_MB, 10); job.set(JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS, "-Xmx128m"); job.setLong(JobContext.REDUCE_MEMORY_TOTAL_BYTES, 128 << 20); job.set(JobContext.SHUFFLE_INPUT_BUFFER_PERCENT, "0.14"); job.set(JobContext.SHUFFLE_MERGE_EPRCENT, "1.0"); Counters c = runJob(job); final long out = c.findCounter(TaskCounter.MAP_OUTPUT_RECORDS).getCounter(); final long spill = c.findCounter(TaskCounter.SPILLED_RECORDS).getCounter(); assertTrue("Expected some records not spilled during reduce" + spill + ")", spill < 2 * out); // spilled map records, some records at the reduce }
final boolean localMode = ShimLoader.getHadoopShims().isLocalMode(job); while (!rj.isComplete()) { if (th.getContext() != null) { th.getContext().checkHeartbeaterLockException(); if (initializing && rj.getJobState() == JobStatus.PREP) { TaskReport[] mappers = jc.getMapTaskReports(rj.getID()); if (mappers == null) { logMapper = "no information for number of mappers; "; RunningJob newRj = jc.getJob(rj.getID()); if (newRj == null) { Counter counterCpuMsec = ctrs.findCounter("org.apache.hadoop.mapred.Task$Counter", "CPU_MILLISECONDS"); if (counterCpuMsec != null) { long newCpuMSec = counterCpuMsec.getValue(); if (newCpuMSec > 0) { cpuMsec = newCpuMSec; Counter counterCpuMsec = ctrs.findCounter("org.apache.hadoop.mapred.Task$Counter", "CPU_MILLISECONDS"); if (counterCpuMsec != null) { long newCpuMSec = counterCpuMsec.getValue(); if (newCpuMSec > cpuMsec) { cpuMsec = newCpuMSec;
public static void main(String[] args) throws Exception { if (args.length != 1) { System.out.println ("Usage : java PrintCounters <job id>"); System.exit(1); } RunningJob job = new JobClient().getJob(JobID.forName(args[0])); // RunningJob job = new JobClient().getJob(new JobID("job_201309211454", 9)); // RunningJob job = new JobClient().getJob(args[0]); Counters counters = job.getCounters(); for (Counters.Group group : counters) { System.out.println("- Counter Group: " + group.getDisplayName() + " (" + group.getName() + ")"); System.out.println(" number of counters in this group: " + group.size()); for (Counters.Counter counter : group) { System.out.println(" - " + counter.getDisplayName() + ": " + counter.getName()); } } }
/** * Constructs counter groups from job runtime statistics. Hive mangles Hadoop Counter data, * forming counter names with format "$groupName::$counterName". * * @param counterNameToValue mangled hadoop counters from hive. * @return counter groups by name. */ public static Map<String, CounterGroup> counterGroupInfoMap(Map<String, Double> counterNameToValue) { Counters counters = new Counters(); for (Map.Entry<String, ? extends Number> entry : counterNameToValue.entrySet()) { String key = entry.getKey(); Number value = entry.getValue(); String[] cNames = key.split("::"); String groupName = cNames[0]; String counterName = cNames[1]; Counter counter = counters.findCounter(groupName, counterName); counter.setValue(value.longValue()); } return CounterGroup.counterGroupsByName(counters); }
public void finish(RunningJob runningJob) throws IOException { super.finish(runningJob) ; counts = new HashMap<Counts,Long>() ; for (Counts count:Counts.values()) { Counters.Counter counter = runningJob.getCounters().findCounter(count) ; if (counter != null) counts.put(count, counter.getCounter()) ; else counts.put(count, 0L) ; } saveCounts() ; }
@SuppressWarnings("deprecation") @Test public void testWriteWithLegacyNames() { Counters counters = new Counters(); counters.incrCounter(Task.Counter.MAP_INPUT_RECORDS, 1); counters.incrCounter(JobInProgress.Counter.DATA_LOCAL_MAPS, 1); counters.findCounter("FileSystemCounters", "FILE_BYTES_READ").increment(1); checkLegacyNames(counters); }
/** * Returns the count for the given counter name in the counter group * 'MultiStoreCounters' * * @param job the MR job * @param jobClient the Hadoop job client * @param counterName the counter name * @return the count of the given counter name */ public static long getMultiStoreCount(Job job, JobClient jobClient, String counterName) { long value = -1; try { RunningJob rj = jobClient.getJob(job.getAssignedJobID()); if (rj != null) { Counters.Counter counter = rj.getCounters().getGroup( MULTI_STORE_COUNTER_GROUP).getCounterForName(counterName); value = counter.getValue(); } } catch (IOException e) { LOG.warn("Failed to get the counter for " + counterName, e); } return value; }