private boolean runJob(float badRecordThreshold) throws Exception { Configuration conf = new Configuration(); conf.setFloat(HCatConstants.HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY, badRecordThreshold); Job job = new Job(conf); job.setJarByClass(this.getClass()); job.setMapperClass(MyMapper.class); job.setInputFormatClass(HCatInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); HCatInputFormat.setInput(job, "default", "test_bad_records"); job.setMapOutputKeyClass(HCatRecord.class); job.setMapOutputValueClass(HCatRecord.class); job.setNumReduceTasks(0); Path path = new Path(TEST_DATA_DIR, "test_bad_record_handling_output"); if (path.getFileSystem(conf).exists(path)) { path.getFileSystem(conf).delete(path, true); } TextOutputFormat.setOutputPath(job, path); return job.waitForCompletion(true); }
TextOutputFormat.setOutputPath(job, path);
private void doMapReduce(final Class<? extends Test> cmd) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = getConf(); Path inputDir = writeInputFile(conf); conf.set(EvaluationMapTask.CMD_KEY, cmd.getName()); conf.set(EvaluationMapTask.PE_KEY, getClass().getName()); Job job = Job.getInstance(conf); job.setJarByClass(PerformanceEvaluation.class); job.setJobName("HBase Performance Evaluation"); job.setInputFormatClass(PeInputFormat.class); PeInputFormat.setInputPaths(job, inputDir); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(EvaluationMapTask.class); job.setReducerClass(LongSumReducer.class); job.setNumReduceTasks(1); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs")); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.initCredentials(job); job.waitForCompletion(true); }
TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
private int doVerify(Path outputDir, int numReducers) throws IOException, InterruptedException, ClassNotFoundException { job = new Job(getConf()); job.setJobName("Link Verifier"); job.setNumReduceTasks(numReducers); job.setJarByClass(getClass()); setJobScannerConf(job); Scan scan = new Scan(); scan.addColumn(FAMILY_NAME, COLUMN_PREV); scan.setCaching(10000); scan.setCacheBlocks(false); String[] split = labels.split(COMMA); scan.setAuthorizations(new Authorizations(split[this.labelIndex * 2], split[(this.labelIndex * 2) + 1])); TableMapReduceUtil.initTableMapperJob(tableName.getName(), scan, VerifyMapper.class, BytesWritable.class, BytesWritable.class, job); TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class); job.getConfiguration().setBoolean("mapreduce.map.speculative", false); job.setReducerClass(VerifyReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(BytesWritable.class); TextOutputFormat.setOutputPath(job, outputDir);
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: WordCount <input path> <result path>"); return; } final String inputPath = args[0]; final String outputPath = args[1]; final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // Set up the Hadoop Input Format Job job = Job.getInstance(); HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, job); TextInputFormat.addInputPath(job, new Path(inputPath)); // Create a Flink job with it DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat); // Tokenize the line and convert from Writable "Text" to String for better handling DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer()); // Sum up the words DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1); // Convert String back to Writable "Text" for use with Hadoop Output Format DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper()); // Set up Hadoop Output Format HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(), job); hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " "); hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test TextOutputFormat.setOutputPath(job, new Path(outputPath)); // Output & Execute hadoopResult.output(hadoopOutputFormat); env.execute("Word Count"); }
new HadoopOutputFormat<Text, LongWritable>(new TextOutputFormat<Text, LongWritable>(), job); job.getConfiguration().set("mapred.textoutputformat.separator", " "); TextOutputFormat.setOutputPath(job, new Path(resultPath));
@Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "QuestionAnswerBuilder"); job.setJarByClass(QuestionAnswerBuilder.class); job.setMapperClass(HierarchyMapper.class); FileInputFormat.addInputPath(job, new Path(args[0])); job.setReducerClass(QuestionAnswerReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(args[1])); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); return job.waitForCompletion(true) ? 0 : 2; }
@Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "PostCommentHeirarchy"); job.setJarByClass(PostCommentHierarchy.class); MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, PostMapper.class); MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, CommentMapper.class); job.setReducerClass(PostCommentHierarchyReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(args[2])); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); return job.waitForCompletion(true) ? 0 : 2; }
private int createParitionFile(String sequenceFileInput, String outputPath, float frequency) throws IOException, ClassNotFoundException, InterruptedException { Configuration config = getConf(); config.setFloat(SAMPLE_FREQUENCY, frequency); Job sampler = new Job(config); sampler.setInputFormatClass(TextInputFormat.class); sampler.setOutputFormatClass(TextOutputFormat.class); sampler.setOutputKeyClass(Text.class); sampler.setOutputValueClass(NullWritable.class); sampler.setNumReduceTasks(0); sampler.setMapperClass(Map.class); TextInputFormat.addInputPath(sampler, new Path(sequenceFileInput)); TextOutputFormat.setOutputPath(sampler, new Path(outputPath)); sampler.waitForCompletion(true); return 0; }
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.out.println("usage : need <input path> <output path>"); return 1; } Path inputPath = new Path(args[0]); Path outputPath = new Path(args[1]); Configuration conf = getConf(); Job job = new Job(conf, getClass().getName() + "--<your_name>"); // TODO job.setJarByClass(ItemizedBilling.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); job.setMapOutputValueClass(IntWritable.class); job.setMapOutputKeyClass(Text.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); TextInputFormat.setInputPaths(job, inputPath); TextOutputFormat.setOutputPath(job, outputPath); return job.waitForCompletion(true) ? 0 : 1; }
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.out.println("usage : need <input path> <output path>"); return 1; } Path inputPath = new Path(args[0]); Path outputPath = new Path(args[1]); Configuration conf = getConf(); Job job = new Job(conf, getClass().getName() + "--<your_name>"); // TODO job.setJarByClass(BillingTotal.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); job.setMapOutputValueClass(IntWritable.class); job.setMapOutputKeyClass(Text.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); TextInputFormat.setInputPaths(job, inputPath); TextOutputFormat.setOutputPath(job, outputPath); return job.waitForCompletion(true) ? 0 : 1; }
@Override public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException { if (args.length != 2) { System.out.println("Usage: CountRowKeys tableName outputPath"); return 1; } Job job = new Job(getConf(), this.getClass().getName()); job.setJarByClass(this.getClass()); job.setInputFormatClass(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(ServerConstants.getTablesDir() + "/" + args[0] + "/*/*/data")); job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setReducerClass(MyReducer.class); TextOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
public int run(String[] args) throws Exception { Job job = new Job(getConf(), this.getClass().getSimpleName()); job.setJarByClass(this.getClass()); job.setInputFormatClass(AccumuloInputFormat.class); AccumuloInputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]); AccumuloInputFormat.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), args[4], new Authorizations()); IteratorSetting regex = new IteratorSetting(50, "regex", RegExFilter.class); RegExFilter.setRegexs(regex, args[5], args[6], args[7], args[8], false); AccumuloInputFormat.addIterator(job.getConfiguration(), regex); job.setMapperClass(RegexMapper.class); job.setMapOutputKeyClass(Key.class); job.setMapOutputValueClass(Value.class); job.setNumReduceTasks(0); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(args[9])); System.out.println("setRowRegex: " + args[5]); System.out.println("setColumnFamilyRegex: " + args[6]); System.out.println("setColumnQualifierRegex: " + args[7]); System.out.println("setValueRegex: " + args[8]); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
public int run(Path outputDir, int numReducers) throws Exception { LOG.info("Running Verify with outputDir=" + outputDir + ", numReducers=" + numReducers); job = new Job(getConf()); job.setJobName("Link Verifier"); job.setNumReduceTasks(numReducers); job.setJarByClass(getClass()); Joiner columnsToQuery = Joiner.on(","); new KuduTableMapReduceUtil.TableInputFormatConfiguratorWithCommandLineParser( job, getTableName(getConf()), columnsToQuery.join(COLUMN_KEY_ONE, COLUMN_KEY_TWO, COLUMN_PREV_ONE, COLUMN_PREV_TWO)) .configure(); job.setMapperClass(VerifyMapper.class); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); job.setReducerClass(VerifyReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
@Override public int run(String[] args) throws Exception { SequenceFileIndexerOptions options = new SequenceFileIndexerOptions(args); options.prepare(); Job job = new Job(getConf()); job.setJarByClass(SequenceFileIndexer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(RecordIndexMapper.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setNumReduceTasks(1); FileInputFormat.setInputPaths(job, options.getInputPaths()); TextOutputFormat.setOutputPath(job, options.getOutputPath()); TextOutputFormat.setCompressOutput(job, false); // SequenceFileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class); // SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); job.waitForCompletion(true); return 0; } public static void main(String [] args) throws Exception {
public static void setSingleOutput(Job job, Path file) throws IOException { job.getConfiguration().set(SINGLEFILE, file.toString()); HDFSPath tempDirectorySingle = getTempDirectorySingle(job.getConfiguration()); if (tempDirectorySingle.exists()) tempDirectorySingle.delete(); TextOutputFormat.setOutputPath(job, getTempDirectorySingle(job.getConfiguration())); }
private void doMapReduce(final Class<? extends Test> cmd) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = getConf(); Path inputDir = writeInputFile(conf); conf.set(EvaluationMapTask.CMD_KEY, cmd.getName()); conf.set(EvaluationMapTask.PE_KEY, getClass().getName()); Job job = Job.getInstance(conf); job.setJarByClass(PerformanceEvaluation.class); job.setJobName("HBase Performance Evaluation"); job.setInputFormatClass(PeInputFormat.class); PeInputFormat.setInputPaths(job, inputDir); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(EvaluationMapTask.class); job.setReducerClass(LongSumReducer.class); job.setNumReduceTasks(1); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs")); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.initCredentials(job); job.waitForCompletion(true); }
/** * Set up a MapReduce job to output human-readable text. */ protected void configureTextOutput(String destination) { Path outPath; outPath = MRReasoningUtils.getOutputPath(job.getConfiguration(), destination); TextOutputFormat.setOutputPath(job, outPath); LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INTERMEDIATE_OUT, TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.TERMINAL_OUT, TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.SCHEMA_OUT, TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INCONSISTENT_OUT, TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT, TextOutputFormat.class, Text.class, Text.class); MultipleOutputs.setCountersEnabled(job, true); }