JobConf conf = prepareJobConf(baseJobConf); FileSystem fs = outputDir.getFileSystem(conf); if(fs.exists(outputDir)) { info("Deleting previous output in " + outputDir + " for building store " + this.storeDef.getName()); fs.delete(outputDir, true); FileSystem outputFs = outputDir.getFileSystem(conf); if(outputFs.exists(outputDir)) { throw new IOException("Final output directory already exists."); JobClient jc = new JobClient(conf); RunningJob runningJob = jc.submitJob(conf); Counters counters; if (!jc.monitorAndPrintJob(conf, runningJob)) { counters = runningJob.getCounters(); "=" + suggestedTargetChunkSize); } else { logger.error("Job Failed: " + runningJob.getFailureInfo()); counters = runningJob.getCounters(); long numberOfRecords = counters.getCounter(Task.Counter.REDUCE_INPUT_GROUPS);
@Test public void testNonAvroMapOnly() throws Exception { JobConf job = new JobConf(); Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath()); outputPath.getFileSystem(job).delete(outputPath); // configure input for non-Avro sequence file job.setInputFormat(SequenceFileInputFormat.class); FileInputFormat.setInputPaths(job, file().toURI().toString()); // use a hadoop mapper that emits Avro output job.setMapperClass(NonAvroOnlyMapper.class); // configure output for avro job.setNumReduceTasks(0); // map-only FileOutputFormat.setOutputPath(job, outputPath); AvroJob.setOutputSchema(job, SCHEMA); JobClient.runJob(job); checkFile(new DataFileReader<> (new File(outputPath.toString() + "/part-00000.avro"), new SpecificDatumReader<>())); }
private void logJob(String logDir, String jobID, PrintWriter listWriter) throws IOException { RunningJob rj = jobClient.getJob(JobID.forName(jobID)); String jobURLString = rj.getTrackingURL(); Path jobDir = new Path(logDir, jobID); fs.mkdirs(jobDir); logJobConf(jobID, jobURLString, jobDir.toString()); } catch (IOException e) { System.err.println("Cannot retrieve job.xml.html for " + jobID); listWriter.println("job: " + jobID + "(" + "name=" + rj.getJobName() + "," + "status=" + JobStatus.getJobRunState(rj.getJobState()) + ")"); logAttempt(jobID, attempt, jobDir.toString()); listWriter.println(" attempt:" + attempt.id + "(" + "type=" + attempt.type + "," + "status=" + attempt.status + ","
public static Token<org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier> getJobTrackerDelegationToken( Configuration conf, String userName) throws Exception { // LOG.info("getJobTrackerDelegationToken("+conf+","+userName+")"); JobClient jcl = new JobClient(new JobConf(conf, HCatOutputFormat.class)); Token<org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier> t = jcl .getDelegationToken(new Text(userName)); // LOG.info("got "+t); return t; // return null; }
/** * Return the status information about the Map-Reduce cluster */ public ClusterStatus getClusterStatus() throws Exception { ClusterStatus cs; try { JobConf job = new JobConf(conf); JobClient jc = new JobClient(job); cs = jc.getClusterStatus(); } catch (Exception e) { e.printStackTrace(); throw e; } LOG.info("Returning cluster status: " + cs.toString()); return cs; }
success = true; HiveFileFormatUtils.prepareJobOutput(job); job.setOutputFormat(HiveOutputFormatImpl.class); job.setMapperClass(work.getMapperClass()); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(NullWritable.class); if(work.getNumMapTasks() != null) { Path tempOutPath = Utilities.toTempPath(outputPath); try { FileSystem fs = tempOutPath.getFileSystem(job); if (!fs.exists(tempOutPath)) { fs.mkdirs(tempOutPath); HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, "HIVE"); JobClient jc = new JobClient(job); rj = jc.submitJob(job); this.jobID = rj.getJobID(); returnVal = jobExecHelper.progress(rj, jc, ctx); success = (returnVal == 0); String mesg = rj != null ? ("Ended Job = " + rj.getJobID()) : "Job Submission failed"; rj.killJob();
DOMConfigurator.configure("log4j.xml"); FileSystem fs = FileSystem.get(job.getConfiguration()); Path execBasePath = new Path(props.getProperty(ETL_EXECUTION_BASE_PATH)); Path execHistory = new Path(props.getProperty(ETL_EXECUTION_HISTORY_PATH)); if (!fs.exists(execBasePath)) { log.info("The execution base path does not exist. Creating the directory"); fs.mkdirs(execBasePath); if (!fs.exists(execHistory)) { log.info("removing old execution: " + stat.getPath().getName()); ContentSummary execContent = fs.getContentSummary(stat.getPath()); currentCount -= execContent.getFileCount() + execContent.getDirectoryCount(); JobClient client = new JobClient(new JobConf(job.getConfiguration())); for (TaskReport task : client.getMapTaskReports(tasks[0].getTaskAttemptId().getJobID())) { if (task.getCurrentStatus().equals(TIPStatus.FAILED)) { for (String s : task.getDiagnostics()) {
FileSystem fs = emptyScratchDir.getFileSystem(job); fs.mkdirs(emptyScratchDir); } catch (IOException e) { e.printStackTrace(); job.setOutputFormat(HiveOutputFormatImpl.class); job.setMapRunnerClass(ExecMapRunner.class); job.setMapperClass(ExecMapper.class); Path hdfsPath = mWork.getTmpHDFSPath(); hdfs.copyFromLocalFile(archivePath, hdfsFilePath); jc = new JobClient(job); rj = jc.submitJob(job); this.jobID = rj.getJobID(); updateStatusInQueryDisplay(); returnVal = jobExecHelper.progress(rj, jc, ctx); killJob(); jobID = rj.getID().toString(); jc.close();
public int run(String[] args) throws Exception { if(args.length != 3) Utils.croak("USAGE: GenerateData input-file output-dir value-size"); JobConf conf = new JobConf(getConf(), GenerateData.class); conf.setJobName("generate-data"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(GenerateDataMapper.class); conf.setReducerClass(IdentityReducer.class); conf.setNumReduceTasks(0); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setOutputKeyClass(BytesWritable.class); conf.setOutputValueClass(BytesWritable.class); Path inputPath = new Path(args[0]); FileInputFormat.setInputPaths(conf, inputPath); Path outputPath = new Path(args[1]); // delete output path if it already exists FileSystem fs = outputPath.getFileSystem(conf); if(fs.exists(outputPath)) fs.delete(outputPath, true); FileOutputFormat.setOutputPath(conf, outputPath); conf.setInt("value.size", Integer.parseInt(args[2])); JobClient.runJob(conf); return 0; }
@Test /** * Run the identity job on a "bytes" Avro file using AvroAsTextInputFormat * and AvroTextOutputFormat to produce a sorted "bytes" Avro file. */ public void testSort() throws Exception { JobConf job = new JobConf(); String inputPath = INPUT_DIR.getRoot().getPath(); Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath()); outputPath.getFileSystem(job).delete(outputPath); WordCountUtil.writeLinesBytesFile(inputPath); job.setInputFormat(AvroAsTextInputFormat.class); job.setOutputFormat(AvroTextOutputFormat.class); job.setOutputKeyClass(Text.class); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, outputPath); JobClient.runJob(job); WordCountUtil.validateSortedFile(outputPath.toString() + "/part-00000.avro"); }
@Test public void testJob() throws Exception { JobConf job = new JobConf(); Path inputPath1 = new Path(INPUT_DIR_1.getRoot().getPath()); Path inputPath2 = new Path(INPUT_DIR_2.getRoot().getPath()); Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath()); outputPath.getFileSystem(job).delete(outputPath); writeNamesFiles(new File(inputPath1.toUri().getPath())); writeBalancesFiles(new File(inputPath2.toUri().getPath())); job.setJobName("multiple-inputs-join"); AvroMultipleInputs.addInputPath(job, inputPath1, NamesMapImpl.class, ReflectData.get().getSchema(NamesRecord.class)); AvroMultipleInputs.addInputPath(job, inputPath2, BalancesMapImpl.class, ReflectData.get().getSchema(BalancesRecord.class)); Schema keySchema = ReflectData.get().getSchema(KeyRecord.class); Schema valueSchema = ReflectData.get().getSchema(JoinableRecord.class); AvroJob.setMapOutputSchema(job, Pair.getPairSchema(keySchema, valueSchema)); AvroJob.setOutputSchema(job, ReflectData.get().getSchema(CompleteRecord.class)); AvroJob.setReducerClass(job, ReduceImpl.class); job.setNumReduceTasks(1); FileOutputFormat.setOutputPath(job, outputPath); AvroJob.setReflect(job); JobClient.runJob(job); validateCompleteFile(new File(OUTPUT_DIR.getRoot(), "part-00000.avro")); }
public LogRetriever(String statusDir, JobType jobType, Configuration conf) throws IOException { this.statusDir = statusDir; this.jobType = jobType; attemptDetailPattern = Pattern.compile(attemptDetailPatternInString); attemptLogPattern = Pattern.compile(attemptLogPatternInString); attemptIDPattern = Pattern.compile(attemptIDPatternInString); attemptStartTimePattern = Pattern.compile(attemptStartTimePatternInString); attemptEndTimePattern = Pattern.compile(attemptEndTimePatternInString); Path statusPath = new Path(statusDir); fs = statusPath.getFileSystem(conf); jobClient = new JobClient(new JobConf(conf)); this.conf = conf; }
@Override protected void runJob(String jobName, Configuration c, List<Scan> scans) throws IOException, InterruptedException, ClassNotFoundException { JobConf job = new JobConf(TEST_UTIL.getConfiguration()); job.setJobName(jobName); job.setMapperClass(Mapper.class); job.setReducerClass(Reducer.class); TableMapReduceUtil.initMultiTableSnapshotMapperJob(getSnapshotScanMapping(scans), Mapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job, true, restoreDir); TableMapReduceUtil.addDependencyJars(job); job.setReducerClass(Reducer.class); job.setNumReduceTasks(1); // one to get final "first" and "last" key FileOutputFormat.setOutputPath(job, new Path(job.getJobName())); LOG.info("Started " + job.getJobName()); RunningJob runningJob = JobClient.runJob(job); runningJob.waitForCompletion(); assertTrue(runningJob.isSuccessful()); LOG.info("After map/reduce completion - job " + jobName); }
public void testInputFormat() throws Exception { JobConf job = new JobConf(); WordCountUtil wordCountUtil = new WordCountUtil("trevniMapredTest"); Schema subSchema = Schema.parse("{\"type\":\"record\"," + "\"name\":\"PairValue\","+ "\"fields\": [ " + "{\"name\":\"value\", \"type\":\"long\"}" + "]}"); AvroJob.setInputSchema(job, subSchema); AvroJob.setMapperClass(job, Counter.class); FileInputFormat.setInputPaths(job, new Path(wordCountUtil.getDir().toString() + "/out/*")); job.setInputFormat(AvroTrevniInputFormat.class); job.setNumReduceTasks(0); // map-only job.setOutputFormat(NullOutputFormat.class); // ignore output total = 0; JobClient.runJob(job); assertEquals(WordCountUtil.TOTAL, total); }
public void testOutputFormat() throws Exception { JobConf job = new JobConf(); WordCountUtil wordCountUtil = new WordCountUtil("trevniMapredTest"); wordCountUtil.writeLinesFile(); AvroJob.setInputSchema(job, STRING); AvroJob.setOutputSchema(job, Pair.getPairSchema(STRING,LONG)); AvroJob.setMapperClass(job, MapImpl.class); AvroJob.setCombinerClass(job, ReduceImpl.class); AvroJob.setReducerClass(job, ReduceImpl.class); FileInputFormat.setInputPaths(job, new Path(wordCountUtil.getDir().toString() + "/in")); FileOutputFormat.setOutputPath(job, new Path(wordCountUtil.getDir().toString() + "/out")); FileOutputFormat.setCompressOutput(job, true); job.setOutputFormat(AvroTrevniOutputFormat.class); JobClient.runJob(job); wordCountUtil.validateCountsFile(); }
int curDirNumber, int obsoleteDirNumber, HiveConf hiveConf, IMetaStoreClient msc, long id, String jobName) throws IOException { job.setBoolean(IS_MAJOR, compactionType == CompactionType.MAJOR); if(dirsToSearch == null) { dirsToSearch = new StringableList(); if (baseDir != null) job.set(BASE_DIR, baseDir.toString()); job.set(DELTA_DIRS, deltaDirs.toString()); job.set(DIRS_TO_SEARCH, dirsToSearch.toString()); job.setLong(MIN_TXN, minTxn); JobClient jc = null; try { jc = new JobClient(job); RunningJob rj = jc.submitJob(job); LOG.info("Submitted compaction job '" + job.getJobName() + "' with jobID=" + rj.getID() + " compaction ID=" + id); try { msc.setHadoopJobid(rj.getID().toString(), id); } catch (TException e) { LOG.warn("Error setting hadoop job, jobId=" + rj.getID().toString() + " compactionId=" + id, e); jc.close();
@Test @SuppressWarnings("deprecation") public void shoudBeValidMapReduceEvaluation() throws Exception { Configuration cfg = UTIL.getConfiguration(); JobConf jobConf = new JobConf(cfg); try { jobConf.setJobName("process row task"); jobConf.setNumReduceTasks(1); TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY), ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class, jobConf); TableMapReduceUtil.initTableReduceJob(TABLE_NAME, ClassificatorRowReduce.class, jobConf); RunningJob job = JobClient.runJob(jobConf); assertTrue(job.isSuccessful()); } finally { if (jobConf != null) FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir"))); } }
void testInputFormat(Class<? extends InputFormat> clazz) throws IOException { Configuration conf = UTIL.getConfiguration(); final JobConf job = new JobConf(conf); job.setInputFormat(clazz); job.setOutputFormat(NullOutputFormat.class); job.setMapperClass(ExampleVerifier.class); job.setNumReduceTasks(0); LOG.debug("submitting job."); final RunningJob run = JobClient.runJob(job); assertTrue("job failed!", run.isSuccessful()); assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getCounter()); assertEquals("Saw any instances of the filtered out row.", 0, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getCounter()); assertEquals("Saw the wrong number of instances of columnA.", 1, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getCounter()); assertEquals("Saw the wrong number of instances of columnB.", 1, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getCounter()); assertEquals("Saw the wrong count of values for the filtered-for row.", 2, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getCounter()); assertEquals("Saw the wrong count of values for the filtered-out row.", 0, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getCounter()); }
JobConf jobConf = new JobConf(util.getConfiguration()); jobConf.setJarByClass(util.getClass()); org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJarsForClasses(jobConf, TestTableSnapshotInputFormat.class); jobConf.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class); jobConf.setNumReduceTasks(1); jobConf.setOutputFormat(NullOutputFormat.class); RunningJob job = JobClient.runJob(jobConf); Assert.assertTrue(job.isSuccessful()); } finally { if (!shutdownCluster) {