void testInputFormat(Class<? extends InputFormat> clazz) throws IOException { Configuration conf = UTIL.getConfiguration(); final JobConf job = new JobConf(conf); job.setInputFormat(clazz); job.setOutputFormat(NullOutputFormat.class); job.setMapperClass(ExampleVerifier.class); job.setNumReduceTasks(0); LOG.debug("submitting job."); final RunningJob run = JobClient.runJob(job); assertTrue("job failed!", run.isSuccessful()); assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getCounter()); assertEquals("Saw any instances of the filtered out row.", 0, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getCounter()); assertEquals("Saw the wrong number of instances of columnA.", 1, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getCounter()); assertEquals("Saw the wrong number of instances of columnB.", 1, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getCounter()); assertEquals("Saw the wrong count of values for the filtered-for row.", 2, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getCounter()); assertEquals("Saw the wrong count of values for the filtered-out row.", 0, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getCounter()); }
/** * Constructs counter groups from job runtime statistics. Hive mangles Hadoop Counter data, * forming counter names with format "$groupName::$counterName". * * @param counterNameToValue mangled hadoop counters from hive. * @return counter groups by name. */ public static Map<String, CounterGroup> counterGroupInfoMap(Map<String, Double> counterNameToValue) { Counters counters = new Counters(); for (Map.Entry<String, ? extends Number> entry : counterNameToValue.entrySet()) { String key = entry.getKey(); Number value = entry.getValue(); String[] cNames = key.split("::"); String groupName = cNames[0]; String counterName = cNames[1]; Counter counter = counters.findCounter(groupName, counterName); counter.setValue(value.longValue()); } return CounterGroup.counterGroupsByName(counters); }
public void run(JobConf conf) throws Exception { _runningJob = new JobClient(conf).submitJob(conf); info("See " + _runningJob.getTrackingURL() + " for details."); _runningJob.waitForCompletion(); if(!_runningJob.isSuccessful()) { throw new Exception("Hadoop job:" + getId() + " failed!"); } // dump all counters Counters counters = _runningJob.getCounters(); for(String groupName: counters.getGroupNames()) { Counters.Group group = counters.getGroup(groupName); info("Group: " + group.getDisplayName()); for(Counter counter: group) info(counter.getDisplayName() + ":\t" + counter.getValue()); } }
private Counters getEnumCounters(Enum[] keys) { Counters counters = new Counters(); for (Enum key : keys) { for (long i = 0; i < MAX_VALUE; ++i) { counters.incrCounter(key, i); } } return counters; }
sLogger.info(" - number of reducers: " + 0); Path inputDocumentFiles = new Path(inputDocument); Path outputDocumentFiles = new Path(outputDocument); Path termIndexPath = new Path(termIndex); Path titleIndexPath = new Path(titleIndex); JobConf conf = new JobConf(configuration, ParseCorpus.class); FileSystem fs = FileSystem.get(conf); conf.setJobName(ParseCorpus.class.getSimpleName() + " - index document"); DistributedCache.addCacheFile(titleIndexPath.toUri(), conf); conf.setNumMapTasks(numberOfMappers); conf.setNumReduceTasks(0); conf.setMapperClass(IndexDocumentMapper.class); Counters counters = job.getCounters(); int collapsedDocuments = (int) counters.findCounter(MyCounter.COLLAPSED_DOCUMENTS).getCounter(); sLogger.info("Total number of collapsed documnts: " + collapsedDocuments); int leftOverDocuments = (int) counters.findCounter(MyCounter.LEFT_OVER_DOCUMENTS).getCounter(); sLogger.info("Total number of left-over documents: " + leftOverDocuments);
JobConf conf = prepareJobConf(baseJobConf); FileSystem fs = outputDir.getFileSystem(conf); if(fs.exists(outputDir)) { info("Deleting previous output in " + outputDir + " for building store " + this.storeDef.getName()); conf.setInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE); conf.set("cluster.xml", new ClusterMapper().writeCluster(cluster)); conf.set("stores.xml", conf.set("final.output.dir", outputDir.toString()); FileOutputFormat.setOutputPath(conf, tempDir); if (!jc.monitorAndPrintJob(conf, runningJob)) { counters = runningJob.getCounters(); long mapOutputBytes = counters.getCounter(Task.Counter.MAP_OUTPUT_BYTES); long averageNumberOfBytesPerChunk = mapOutputBytes / numChunks / cluster.getNumberOfPartitions(); if (averageNumberOfBytesPerChunk > (HadoopStoreWriter.DEFAULT_CHUNK_SIZE)) { counters = runningJob.getCounters(); long numberOfRecords = counters.getCounter(Task.Counter.REDUCE_INPUT_GROUPS); + counters.getCounter(KeyValueWriter.CollisionCounter.NUM_COLLISIONS)); logger.info("Maximum number of collisions for one entry - " + counters.getCounter(KeyValueWriter.CollisionCounter.MAX_COLLISIONS));
edge_path = new Path(args[0]+"/pr_edge_block"); vector_path = new Path(args[0]+"/pr_iv_block"); tempmv_path = new Path(args[0]+"/pr_tempmv_block"); output_path = new Path(args[0]+"/pr_output_block"); vector_unfold_path = new Path(args[0]+"/pr_vector"); JobClient.runJob(configStage1()); RunningJob job = JobClient.runJob(configStage2()); Counters c = job.getCounters(); long changed = c.getCounter(PrCounters.CONVERGE_CHECK); System.out.println("Iteration = " + i + ", changed reducer = " + changed); JobClient.runJob(configStage25());
public int runTool (Configuration config, String collectionPath, String outputPath, String indexFile, String mappingFile) throws Exception { JobConf conf = new JobConf (config, DemoCountAquaint2Documents.class); FileSystem fs = FileSystem.get (config); sLogger.info(" - mapping file: " + mappingFile); conf.setJobName("BuildAquaint2ForwardIndex"); conf.set("mapred.child.java.opts", "-Xmx1024m"); conf.setNumReduceTasks(1); FileInputFormat.setInputPaths(conf, new Path(collectionPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); FileOutputFormat.setCompressOutput(conf, false); FileSystem.get(conf).delete(new Path(outputPath), true); RunningJob job = JobClient.runJob(conf); Counters counters = job.getCounters(); int numDocs = (int) counters.findCounter(Count.DOCS).getCounter();
@SuppressWarnings("deprecation") public static void main(String[] args) throws IOException { int mapTasks = 15; JobConf conf = new JobConf(M1ViterbiMapper.class); conf.setJobName("m1viterbi"); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(M1ViterbiMapper.class); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(0); conf.setInputFormat(SequenceFileInputFormat.class); FileInputFormat.setInputPaths(conf, new Path(bitext)); FileOutputFormat.setOutputPath(conf, new Path("somealigns.test")); RunningJob rj = JobClient.runJob(conf); Counters cs = rj.getCounters(); double lp = (double)cs.getCounter(CrossEntropyCounters.LOGPROB); double wc = (double)cs.getCounter(CrossEntropyCounters.WORDCOUNT); double ce = (lp / wc) / Math.log(2.0); System.out.println("Viterbi cross-entropy: " + ce + " perplexity: " + Math.pow(2.0, ce)); }
configureJob(jc, "kill job with abort()", 1, 0, outDir); jc.setMapperClass(UtilsForTests.KillMapper.class); jc.setOutputCommitter(committer); JobClient jobClient = new JobClient(jc); RunningJob job = jobClient.submitJob(jc); JobID id = job.getID(); Counters counters = job.getCounters(); if (counters.getCounter(JobCounter.TOTAL_LAUNCHED_MAPS) == 1) { break; counters = job.getCounters(); Path testFile = new Path(outDir, fileName); assertTrue("File " + testFile + " missing for job " + id, fileSys.exists(testFile)); Path file = new Path(outDir, ex); assertFalse("File " + file + " should not be present for killed job " + id, fileSys.exists(file));
@SuppressWarnings("unchecked") private <T> T getSplitDetails(Path file, long offset) throws IOException { FileSystem fs = file.getFileSystem(conf); FSDataInputStream inFile = fs.open(file); inFile.seek(offset); String className = Text.readString(inFile); Class<T> cls; try { cls = (Class<T>) conf.getClassByName(className); } catch (ClassNotFoundException ce) { IOException wrap = new IOException("Split class " + className + " not found"); wrap.initCause(ce); throw wrap; } SerializationFactory factory = new SerializationFactory(conf); Deserializer<T> deserializer = (Deserializer<T>) factory.getDeserializer(cls); deserializer.open(inFile); T split = deserializer.deserialize(null); long pos = inFile.getPos(); getCounters().findCounter( TaskCounter.SPLIT_RAW_BYTES).increment(pos - offset); inFile.close(); return split; }
/** Verify that at least one segment does not hit disk */ public void testReduceFromPartialMem() throws Exception { final int MAP_TASKS = 7; JobConf job = mrCluster.createJobConf(); job.setNumMapTasks(MAP_TASKS); job.setInt(JobContext.REDUCE_MERGE_INMEM_THRESHOLD, 0); job.set(JobContext.REDUCE_INPUT_BUFFER_PERCENT, "1.0"); job.setInt(JobContext.SHUFFLE_PARALLEL_COPIES, 1); job.setInt(JobContext.IO_SORT_MB, 10); job.set(JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS, "-Xmx128m"); job.setLong(JobContext.REDUCE_MEMORY_TOTAL_BYTES, 128 << 20); job.set(JobContext.SHUFFLE_INPUT_BUFFER_PERCENT, "0.14"); job.set(JobContext.SHUFFLE_MERGE_EPRCENT, "1.0"); Counters c = runJob(job); final long out = c.findCounter(TaskCounter.MAP_OUTPUT_RECORDS).getCounter(); final long spill = c.findCounter(TaskCounter.SPILLED_RECORDS).getCounter(); assertTrue("Expected some records not spilled during reduce" + spill + ")", spill < 2 * out); // spilled map records, some records at the reduce }
private static void runJvmReuseTest(JobConf job, boolean reuse) throws IOException { // setup a map-only job that reads the input and only sets the counters // based on how many times the jvm was reused. job.setInt(JobContext.JVM_NUMTASKS_TORUN, reuse ? -1 : 1); FileInputFormat.setInputPaths(job, SORT_INPUT_PATH); job.setInputFormat(SequenceFileInputFormat.class); job.setOutputFormat(NullOutputFormat.class); job.setMapperClass(ReuseDetector.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumMapTasks(24); job.setNumReduceTasks(0); RunningJob result = JobClient.runJob(job); long uses = result.getCounters().findCounter("jvm", "use").getValue(); int maps = job.getNumMapTasks(); if (reuse) { assertTrue("maps = " + maps + ", uses = " + uses, maps < uses); } else { assertEquals("uses should be number of maps", job.getNumMapTasks(), uses); } }
job, numMap, numReduce, cpuMsec, false, rj.getID().toString()); updateMapRedTaskWebUIStatistics(mapRedStats, rj); if (initializing && rj.getJobState() == JobStatus.PREP) { RunningJob newRj = jc.getJob(rj.getID()); if (newRj == null) { Counter counterCpuMsec = ctrs.findCounter("org.apache.hadoop.mapred.Task$Counter", "CPU_MILLISECONDS"); if (counterCpuMsec != null) { long newCpuMSec = counterCpuMsec.getValue(); if (newCpuMSec > 0) { cpuMsec = newCpuMSec; Counter counterCpuMsec = ctrs.findCounter("org.apache.hadoop.mapred.Task$Counter", "CPU_MILLISECONDS"); if (counterCpuMsec != null) { long newCpuMSec = counterCpuMsec.getValue(); if (newCpuMSec > cpuMsec) { cpuMsec = newCpuMSec; Counter counter = ctrs.findCounter( ss.getConf().getVar(HiveConf.ConfVars.HIVECOUNTERGROUP), FileSinkOperator.TOTAL_TABLE_ROWS_WRITTEN); if (counter != null) { mapRedStats.setNumModifiedRows(counter.getValue());
public Task() { taskStatus = TaskStatus.createTaskStatus(isMapTask()); taskId = new TaskAttemptID(); spilledRecordsCounter = counters.findCounter(TaskCounter.SPILLED_RECORDS); failedShuffleCounter = counters.findCounter(TaskCounter.FAILED_SHUFFLE); mergedMapOutputsCounter = counters.findCounter(TaskCounter.MERGED_MAP_OUTPUTS); gcUpdater = new GcTimeUpdater(); }
@SuppressWarnings("deprecation") @Test public void testWriteWithLegacyNames() { Counters counters = new Counters(); counters.incrCounter(Task.Counter.MAP_INPUT_RECORDS, 1); counters.incrCounter(JobInProgress.Counter.DATA_LOCAL_MAPS, 1); counters.findCounter("FileSystemCounters", "FILE_BYTES_READ").increment(1); checkLegacyNames(counters); }
public void finish(RunningJob runningJob) throws IOException { super.finish(runningJob) ; counts = new HashMap<Counts,Long>() ; for (Counts count:Counts.values()) { Counters.Counter counter = runningJob.getCounters().findCounter(count) ; if (counter != null) counts.put(count, counter.getCounter()) ; else counts.put(count, 0L) ; } saveCounts() ; }
/** * Convenience method for computing the sum of two sets of counters. * @param a the first counters * @param b the second counters * @return a new summed counters object */ public static Counters sum(Counters a, Counters b) { Counters counters = new Counters(); counters.incrAllCounters(a); counters.incrAllCounters(b); return counters; }
@Test public void testLegacyGetGroupNames() { Counters counters = new Counters(); // create 2 filesystem counter groups counters.findCounter("fs1", FileSystemCounter.BYTES_READ).increment(1); counters.findCounter("fs2", FileSystemCounter.BYTES_READ).increment(1); counters.incrCounter("group1", "counter1", 1); HashSet<String> groups = new HashSet<String>(counters.getGroupNames()); HashSet<String> expectedGroups = new HashSet<String>(); expectedGroups.add("group1"); expectedGroups.add("FileSystemCounters"); //Legacy Name expectedGroups.add("org.apache.hadoop.mapreduce.FileSystemCounter"); assertEquals(expectedGroups, groups); }
public void readFields(DataInput in) throws IOException { this.taskid.readFields(in); setProgress(in.readFloat()); this.numSlots = in.readInt(); this.runState = WritableUtils.readEnum(in, State.class); setDiagnosticInfo(StringInterner.weakIntern(Text.readString(in))); setStateString(StringInterner.weakIntern(Text.readString(in))); this.phase = WritableUtils.readEnum(in, Phase.class); this.startTime = in.readLong(); this.finishTime = in.readLong(); counters = new Counters(); this.includeAllCounters = in.readBoolean(); this.outputSize = in.readLong(); counters.readFields(in); nextRecordRange.readFields(in); }