public static void setHadoopConfForCuboid(Job job, CubeSegment segment, String metaUrl) throws Exception { job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); }
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = new Job(conf); job.setJobName("Convert Text"); job.setJarByClass(Mapper.class); job.setMapperClass(Mapper.class); job.setReducerClass(Reducer.class); // increase if you need sorting or a special number of files job.setNumReduceTasks(0); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(job, new Path("/lol")); SequenceFileOutputFormat.setOutputPath(job, new Path("/lolz")); // submit and wait for completion job.waitForCompletion(true); }
/** * Job configuration. */ public static Job configureJob(Configuration conf, String [] args) throws IOException { String tableName = args[0]; String columnFamily = args[1]; System.out.println("****" + tableName); conf.set(TableInputFormat.SCAN, TableMapReduceUtil.convertScanToString(new Scan())); conf.set(TableInputFormat.INPUT_TABLE, tableName); conf.set("index.tablename", tableName); conf.set("index.familyname", columnFamily); String[] fields = new String[args.length - 2]; System.arraycopy(args, 2, fields, 0, fields.length); conf.setStrings("index.fields", fields); Job job = new Job(conf, tableName); job.setJarByClass(IndexBuilder.class); job.setMapperClass(Map.class); job.setNumReduceTasks(0); job.setInputFormatClass(TableInputFormat.class); job.setOutputFormatClass(MultiTableOutputFormat.class); return job; }
@Override public Job createSubmittableJob(String[] args) throws IOException { Job job = super.createSubmittableJob(args); // Call my class instead. job.setJarByClass(WALMapperSearcher.class); job.setMapperClass(WALMapperSearcher.class); job.setOutputFormatClass(NullOutputFormat.class); return job; } }
private int doVerify(Path outputDir, int numReducers) throws IOException, InterruptedException, ClassNotFoundException { job = new Job(getConf()); job.setJobName("Link Verifier"); job.setNumReduceTasks(numReducers); job.setJarByClass(getClass()); setJobScannerConf(job); Scan scan = new Scan(); scan.addColumn(FAMILY_NAME, COLUMN_PREV); scan.setCaching(10000); scan.setCacheBlocks(false); String[] split = labels.split(COMMA); scan.setAuthorizations(new Authorizations(split[this.labelIndex * 2], split[(this.labelIndex * 2) + 1])); TableMapReduceUtil.initTableMapperJob(tableName.getName(), scan, VerifyMapper.class, BytesWritable.class, BytesWritable.class, job); TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class); job.getConfiguration().setBoolean("mapreduce.map.speculative", false); job.setReducerClass(VerifyReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
protected HCatSchema getTableSchema() throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "hcat mapreduce read schema test"); job.setJarByClass(this.getClass()); // input/output settings job.setInputFormatClass(HCatInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); HCatInputFormat.setInput(job, dbName, tableName); return HCatInputFormat.getTableSchema(job.getConfiguration()); }
boolean outputCompression) { if (setMapper) { job.setMapperClass(HadoopWordCount2Mapper.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class);
private void setupMapper(CubeSegment cubeSeg) throws IOException { // set the segment's offset info to job conf Map<Integer, Long> offsetStart = cubeSeg.getSourcePartitionOffsetStart(); Map<Integer, Long> offsetEnd = cubeSeg.getSourcePartitionOffsetEnd(); Integer minPartition = Collections.min(offsetStart.keySet()); Integer maxPartition = Collections.max(offsetStart.keySet()); job.getConfiguration().set(CONFIG_KAFKA_PARITION_MIN, minPartition.toString()); job.getConfiguration().set(CONFIG_KAFKA_PARITION_MAX, maxPartition.toString()); for(Integer partition: offsetStart.keySet()) { job.getConfiguration().set(CONFIG_KAFKA_PARITION_START + partition, offsetStart.get(partition).toString()); job.getConfiguration().set(CONFIG_KAFKA_PARITION_END + partition, offsetEnd.get(partition).toString()); } job.setMapperClass(KafkaFlatTableMapper.class); job.setInputFormatClass(KafkaInputFormat.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setNumReduceTasks(0); }
Job job = new Job(conf); job.setJobName(jobname); job.setJarByClass(ExportSnapshot.class); TableMapReduceUtil.addDependencyJars(job); job.setMapperClass(ExportMapper.class); job.setInputFormatClass(ExportSnapshotInputFormat.class); job.setOutputFormatClass(NullOutputFormat.class); job.setMapSpeculativeExecution(false); job.setNumReduceTasks(0); if (!job.waitForCompletion(true)) { throw new ExportSnapshotException(job.getStatus().getFailureInfo());
/** * Add a OutputFormat configuration to the Job with a alias name. * * @param alias the name to be given to the OutputFormat configuration * @param outputFormatClass OutputFormat class * @param keyClass the key class for the output data * @param valueClass the value class for the output data * @throws IOException */ public void addOutputFormat(String alias, Class<? extends OutputFormat> outputFormatClass, Class<?> keyClass, Class<?> valueClass) throws IOException { Job copy = new Job(this.job.getConfiguration()); outputConfigs.put(alias, copy); copy.setOutputFormatClass(outputFormatClass); copy.setOutputKeyClass(keyClass); copy.setOutputValueClass(valueClass); }
@Override public Job createSubmittableJob(String[] args) throws IOException { Job job = super.createSubmittableJob(args); // Call my class instead. job.setJarByClass(WALMapperSearcher.class); job.setMapperClass(WALMapperSearcher.class); job.setOutputFormatClass(NullOutputFormat.class); return job; } }
Scan scan = new Scan(startRow, endRow); // limit the scan job.setJarByClass(util.getClass()); TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), TestTableSnapshotInputFormat.class); job.setOutputFormatClass(NullOutputFormat.class); Assert.assertTrue(job.waitForCompletion(true)); } finally { if (!shutdownCluster) {
job.setMapperClass(Mapper.class); job.setJarByClass(Mapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.waitForCompletion(true);
job.setJarByClass(CompactionTool.class); job.setMapperClass(CompactionMapper.class); job.setInputFormatClass(CompactionInputFormat.class); job.setOutputFormatClass(NullOutputFormat.class); job.setMapSpeculativeExecution(false); job.setNumReduceTasks(0); return job.waitForCompletion(true) ? 0 : 1; } finally { fs.delete(stagingDir, true);
protected void configureReducer(Job job) throws IOException { job.setOutputFormatClass(AvroKeyCompactorOutputFormat.class); job.setReducerClass(AvroKeyDedupReducer.class); job.setOutputKeyClass(AvroKey.class); job.setOutputValueClass(NullWritable.class); setNumberOfReducers(job); }
private Job configureSubmittableJob(Job job, Path outputPath) throws Exception { Configuration conf = job.getConfiguration(); conf.setBoolean("mapreduce.job.user.classpath.first", true); HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf)); job.setJarByClass(IndexScrutinyTool.class); job.setOutputFormatClass(NullOutputFormat.class); if (outputInvalidRows && OutputFormat.FILE.equals(outputFormat)) { job.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, outputPath); } job.setMapperClass(IndexScrutinyMapper.class); job.setNumReduceTasks(0); // Set the Output classes job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); TableMapReduceUtil.addDependencyJars(job); return job; }
job.setJarByClass(mapperClass); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); job.setMapperClass(mapperClass); job.setOutputFormatClass(NullOutputFormat.class); job.setInputFormatClass(inputFormat); boolean success = job.waitForCompletion(true);
job.setJarByClass(SegmentCreationJob.class); job.setJobName(_jobName); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.waitForCompletion(true); if (!job.isSuccessful()) { throw new RuntimeException("Job failed : " + job);
private void setupReducer(Path output, CubeSegment cubeSeg) throws IOException { int hllShardBase = MapReduceUtil.getCuboidHLLCounterReducerNum(cubeSeg.getCubeInstance()); job.setReducerClass(CalculateStatsFromBaseCuboidReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(hllShardBase); FileOutputFormat.setOutputPath(job, output); job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString()); deletePath(job.getConfiguration(), output); } }
/** * Sets up the actual job. * * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { String tableName = args[0]; Path outputDir = new Path(args[1]); String reportSeparatorString = (args.length > 2) ? args[2]: ":"; conf.set("ReportSeparator", reportSeparatorString); Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName)); job.setJarByClass(CellCounter.class); Scan scan = getConfiguredScanForJob(conf, args); TableMapReduceUtil.initTableMapperJob(tableName, scan, CellCounterMapper.class, ImmutableBytesWritable.class, Result.class, job); job.setNumReduceTasks(1); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(job, outputDir); job.setReducerClass(IntSumReducer.class); return job; }