SequenceFileOutputFormat.setOutputPath(job, new Path(dictOutputPath));
job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, jobOutputPath); return job;
public static void initJoinMRJob(Job job, String prospectsPath, String spoPath, Class<? extends Mapper<CompositeType,TripleCard,?,?>> mapperClass, String outPath, String auths) throws AccumuloSecurityException { MultipleInputs.addInputPath(job, new Path(prospectsPath), SequenceFileInputFormat.class, mapperClass); MultipleInputs.addInputPath(job, new Path(spoPath), SequenceFileInputFormat.class, mapperClass); job.setMapOutputKeyClass(CompositeType.class); job.setMapOutputValueClass(TripleCard.class); SequenceFileOutputFormat.setOutputPath(job, new Path(outPath)); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(TripleEntry.class); job.setOutputValueClass(CardList.class); }
@Override public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException { if (args.length < 3) { logger.error("Usage: NLMExtractionJob <input_seqfile> <output_dir> <max_PDF_size>"); logger.error(" (max_PDF_size -- size in MB; greater files will be ignored)"); return 1; } conf.set(MAX_PDF_SIZE, args[2]); Job job = new Job(conf); job.setJarByClass(NLMExtractionJob.class); job.setInputFormatClass(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(args[0])); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(BytesWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(BytesWritable.class); SequenceFileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(ExtractMap.class); job.setNumReduceTasks(0); /* * Launch job */ boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
private boolean secondJobExecution(String[] args) throws IOException, InterruptedException, ClassNotFoundException { conf.clear(); new GenericOptionsParser(conf, args); Job wordPerDocJob = new Job(conf); wordPerDocJob.setJobName(NAME + " WordPerDocCount"); wordPerDocJob.setJarByClass(TfidfJob_Proto.class); wordPerDocJob.setMapperClass(WordPerDocCountMapper.class); wordPerDocJob.setMapOutputKeyClass(Text.class); wordPerDocJob.setMapOutputValueClass(StringListIntListWritable.class); wordPerDocJob.setInputFormatClass(SequenceFileInputFormat.class); wordPerDocJob.setReducerClass(WordPerDocCountReducer.class); wordPerDocJob.setOutputKeyClass(TextArrayWritable.class); wordPerDocJob.setOutputValueClass(StringListIntListWritable.class); wordPerDocJob.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileInputFormat.addInputPath(wordPerDocJob, new Path(AUXIL_PATH + "job1")); SequenceFileOutputFormat.setOutputPath(wordPerDocJob, new Path(AUXIL_PATH + "job2")); /* * Launch job */ long startTime = ManagementFactory.getThreadMXBean().getThreadCpuTime(Thread.currentThread().getId()); boolean success = wordPerDocJob.waitForCompletion(true); long endTime = ManagementFactory.getThreadMXBean().getThreadCpuTime(Thread.currentThread().getId()); double duration = (endTime - startTime) / Math.pow(10, 9); logger.info("=== Job1 Finished in " + duration + " seconds " + (success ? "(success)" : "(failure)")); return success; }
SequenceFileOutputFormat.setOutputPath(job, outpath); SequenceFileOutputFormat.setCompressOutput(job, false); job.waitForCompletion(true);
SequenceFileOutputFormat.setOutputPath(job, outpath); SequenceFileOutputFormat.setCompressOutput(job, false);
job.setOutputKeyClass(Text.class); job.setOutputValueClass(BytesWritable.class); SequenceFileOutputFormat.setOutputPath(job, new Path(args[1]));
public static void initTabToSeqFileJob(Job job, String intable, String outpath, String auths) throws AccumuloSecurityException { Configuration conf = job.getConfiguration(); String username = conf.get(USERNAME); String password = conf.get(PASSWORD); String instance = conf.get(INSTANCE); String zookeepers = conf.get(ZOOKEEPERS); System.out.println("Zookeepers are " + auths); if (zookeepers != null) { AccumuloInputFormat.setZooKeeperInstance(job, instance, zookeepers); } else { throw new IllegalArgumentException("Must specify either mock or zookeepers"); } AccumuloInputFormat.setConnectorInfo(job, username, new PasswordToken(password)); AccumuloInputFormat.setScanAuthorizations(job, new Authorizations(auths)); AccumuloInputFormat.setInputTableName(job, intable); job.setInputFormatClass(AccumuloInputFormat.class); job.setMapOutputKeyClass(CompositeType.class); job.setMapOutputValueClass(TripleCard.class); // OUTPUT SequenceFileOutputFormat.setOutputPath(job, new Path(outpath)); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(CompositeType.class); job.setOutputValueClass(TripleCard.class); }
public static Job createJob(Path[] inputPaths, Path outputPath, Map<String, String> metadata, Configuration config) throws IOException { final Job job = new Job(config); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(BytesWritable.class); job.setOutputFormatClass(MetadataSequenceFileOutputFormat.class); SequenceFileInputFormat.setInputPaths(job, inputPaths); SequenceFileOutputFormat.setOutputPath(job, outputPath); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); if (metadata != null) MetadataConfiguration.setMetadata(metadata, job.getConfiguration()); return job; } }
SequenceFileOutputFormat.setOutputPath(job, outpath); SequenceFileOutputFormat.setCompressOutput(job, false);
partitionerJob.setOutputFormatClass(SequenceFileOutputFormat.class); Path outputDir = WikipediaConfiguration.getPartitionedArticlesPath(partitionerConf); SequenceFileOutputFormat.setOutputPath(partitionerJob, outputDir); SequenceFileOutputFormat.setCompressOutput(partitionerJob, true); SequenceFileOutputFormat.setOutputCompressionType(partitionerJob, CompressionType.RECORD);
private boolean thirdJobExecution(String[] args) throws IOException, InterruptedException, ClassNotFoundException { conf.clear(); new GenericOptionsParser(conf, args); conf.set("DOCS_NUM", getDOCS_NUM() + ""); Job tfidfJob = new Job(conf); tfidfJob.setJobName(NAME + " Tfidf"); tfidfJob.setJarByClass(TfidfJob_Proto.class); tfidfJob.setMapperClass(TfidfMapper.class); tfidfJob.setMapOutputKeyClass(Text.class); tfidfJob.setMapOutputValueClass(StringListIntListWritable.class); tfidfJob.setInputFormatClass(SequenceFileInputFormat.class); tfidfJob.setReducerClass(TfidfReducer.class); tfidfJob.setOutputKeyClass(TextArrayWritable.class); tfidfJob.setOutputValueClass(DoubleWritable.class); tfidfJob.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileInputFormat.addInputPath(tfidfJob, new Path(AUXIL_PATH + "job2")); SequenceFileOutputFormat.setOutputPath(tfidfJob, new Path(FINAL_PATH + (int)(Math.random() * Integer.MAX_VALUE))); /* * Launch job */ long startTime = ManagementFactory.getThreadMXBean().getThreadCpuTime(Thread.currentThread().getId()); boolean success = tfidfJob.waitForCompletion(true); long endTime = ManagementFactory.getThreadMXBean().getThreadCpuTime(Thread.currentThread().getId()); double duration = (endTime - startTime) / Math.pow(10, 9); logger.info("=== Job1 Finished in " + duration + " seconds " + (success ? "(success)" : "(failure)")); return success; }
public static Job createJob(Path[] inputPaths, Path outputPath, Map<String, String> metadata, Configuration config) throws IOException { final Job job = new Job(config); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(BytesWritable.class); job.setOutputFormatClass(MetadataSequenceFileOutputFormat.class); SequenceFileInputFormat.setInputPaths(job, inputPaths); SequenceFileOutputFormat.setOutputPath(job, outputPath); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); if (metadata != null) MetadataConfiguration.setMetadata(metadata, job.getConfiguration()); return job; } }
/** * Set up the MapReduce job to output a schema (TBox). */ protected void configureSchemaOutput() { Path outPath = MRReasoningUtils.getSchemaPath(job.getConfiguration()); SequenceFileOutputFormat.setOutputPath(job, outPath); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(SchemaWritable.class); LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); MultipleOutputs.addNamedOutput(job, "schemaobj", SequenceFileOutputFormat.class, NullWritable.class, SchemaWritable.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT, TextOutputFormat.class, Text.class, Text.class); MultipleOutputs.setCountersEnabled(job, true); }
SequenceFileOutputFormat.setOutputPath(job, new Path(jobOutputDir));
wordCountJob.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(wordCountJob, new Path(AUXIL_PATH + "job1"));
protected void setupOutput(final Job job, final SampleDataForSplitPoints operation, final Store store) throws IOException { job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, new Path(operation.getOutputPath())); if (null != operation.getCompressionCodec()) { if (GzipCodec.class.isAssignableFrom(operation.getCompressionCodec()) && !NativeCodeLoader.isNativeCodeLoaded() && !ZlibFactory.isNativeZlibLoaded(job.getConfiguration())) { LOGGER.warn("SequenceFile doesn't work with GzipCodec without native-hadoop code!"); } else { SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressorClass(job, operation.getCompressionCodec()); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); } } } }
@Override public int run(String[] args) throws Exception { final HadoopDownloaderOptions options = new HadoopDownloaderOptions(args); options.prepare(true); final Job job = new Job(getConf()); job.setJarByClass(HadoopDownloader.class); job.setJobName("Hadoop Downloader Utility"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(BytesWritable.class); if (options.getNumberOfThreads() <= 1) { job.setMapperClass(DownloadMapper.class); } else { job.setMapperClass(MultithreadedMapper.class); MultithreadedMapper.setMapperClass(job, DownloadMapper.class); MultithreadedMapper.setNumberOfThreads(job, options.getNumberOfThreads()); } job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setNumReduceTasks(options.getNumberOfReducers()); job.getConfiguration().setStrings(ARGS_KEY, args); FileInputFormat.setInputPaths(job, options.getInputPaths()); SequenceFileOutputFormat.setOutputPath(job, options.getOutputPath()); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); job.waitForCompletion(true); return 0; }
MRReasoningUtils.OUTPUT_BASE + iteration); SequenceFileOutputFormat.setOutputPath(job, outPath); LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INTERMEDIATE_OUT,