Refine search
boolean outputCompression) { if (setMapper) { job.setMapperClass(HadoopWordCount2Mapper.class); job.setInputFormatClass(TextInputFormat.class); job.setCombinerClass(HadoopWordCount2Combiner.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); SequenceFileOutputFormat.setCompressOutput(job, true); job.getConfiguration().set(FileOutputFormat.COMPRESS_CODEC, SnappyCodec.class.getName());
Segments<CubeSegment> mergingSeg = cube.getMergingSegments(segment); job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME)); job.getConfiguration().set(BatchConstants.ARG_CUBE_NAME, cubeName); job.getConfiguration().set(OPTION_META_URL.getOpt(), metaUrl); job.getConfiguration().set(OPTION_SEGMENT_ID.getOpt(), segmentId); job.getConfiguration().set(OPTION_MERGE_SEGMENT_IDS.getOpt(), segmentIds); job.getConfiguration().set(OPTION_OUTPUT_PATH_STAT.getOpt(), statOutputPath); HadoopUtil.deletePath(job.getConfiguration(), new Path(dictOutputPath)); job.setMapperClass(MergeDictionaryMapper.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.NONE); SequenceFileOutputFormat.setOutputPath(job, new Path(dictOutputPath)); logger.info("Starting: " + job.getJobName());
@Override public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { sequenceFileOutputFormat.checkOutputSpecs(context); }
Job job = new Job(oldApiJob); job.setJobName("ABt-job"); job.setJarByClass(ABtDenseOutJob.class); job.setInputFormatClass(SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, outputPath); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); job.setReducerClass(QRReducer.class); job.getConfiguration().setInt(QJob.PROP_AROWBLOCK_SIZE, aBlockRows); job.getConfiguration().setInt(BtJob.PROP_OUTER_PROD_BLOCK_HEIGHT, outerProdBlockHeight); job.getConfiguration().setInt(QRFirstStep.PROP_K, k); job.getConfiguration().setInt(QRFirstStep.PROP_P, p); job.getConfiguration().set(PROP_BT_PATH, inputBtGlob.toString()); job.getConfiguration().set(PROP_XI_PATH, xiPath.toString()); job.getConfiguration().set(PROP_SB_PATH, sbPath.toString()); job.getConfiguration().set(PROP_SQ_PATH, sqPath.toString());
public void run(Configuration conf, Path inputPathQ, Path inputUHatPath, Path sigmaPath, Path outputPath, int k, int numReduceTasks, Class<? extends Writable> labelClass, SSVDSolver.OutputScalingEnum outputScaling) throws ClassNotFoundException, InterruptedException, IOException { job = new Job(conf); job.setJobName("U-job"); job.setJarByClass(UJob.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(job, inputPathQ); FileOutputFormat.setOutputPath(job, outputPath); // WARN: tight hadoop integration here: job.getConfiguration().set("mapreduce.output.basename", OUTPUT_U); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); job.setMapperClass(UMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(VectorWritable.class); job.setOutputKeyClass(labelClass); job.setOutputValueClass(VectorWritable.class); job.getConfiguration().set(PROP_UHAT_PATH, inputUHatPath.toString()); job.getConfiguration().set(PROP_SIGMA_PATH, sigmaPath.toString()); job.getConfiguration().set(PROP_OUTPUT_SCALING, outputScaling.name()); job.getConfiguration().setInt(PROP_K, k); job.setNumReduceTasks(0); job.submit(); }
@Override public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException { if (args.length < 3) { logger.error("Usage: NLMExtractionJob <input_seqfile> <output_dir> <max_PDF_size>"); logger.error(" (max_PDF_size -- size in MB; greater files will be ignored)"); return 1; } conf.set(MAX_PDF_SIZE, args[2]); Job job = new Job(conf); job.setJarByClass(NLMExtractionJob.class); job.setInputFormatClass(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(args[0])); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(BytesWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(BytesWritable.class); SequenceFileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(ExtractMap.class); job.setNumReduceTasks(0); /* * Launch job */ boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
Path outpath = new Path(SequenceFileUtility.convertToURI(tmpFile.getAbsolutePath()).toString()); System.out.println("It is all going to: " + outpath); Configuration conf = new Configuration(); Job job = new Job(conf, "featureselect"); job.setNumReduceTasks(1); job.setJarByClass(SequenceFileByteDataSelector.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(BytesWritable.class); job.getConfiguration().setStrings(FeatureSelect.FILETYPE_KEY, new String[]{fileType}); job.getConfiguration().setStrings(FeatureSelect.NFEATURE_KEY, new String[]{"" + k}); SequenceFileOutputFormat.setOutputPath(job, outpath); SequenceFileOutputFormat.setCompressOutput(job, false); job.waitForCompletion(true);
conf.set("mapred.child.java.opts", "-Xmx3072m"); conf.setInt("mapred.task.timeout", 60000000); Job job = new Job(conf); int numReducers = conf.getInt("Cloud9.Reducers", 200); String inputPath = conf.get("Cloud9.InputPath"); if (!fs.exists(new Path(mappingFile))) DistributedCache.addCacheFile(new Path(mappingFile).toUri(), job.getConfiguration()); job.setJobName("ExtractLinks"); job.setNumReduceTasks(numReducers); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); FileOutputFormat.setOutputPath(job, new Path(outputPath));
private boolean secondJobExecution(String[] args) throws IOException, InterruptedException, ClassNotFoundException { conf.clear(); new GenericOptionsParser(conf, args); Job wordPerDocJob = new Job(conf); wordPerDocJob.setJobName(NAME + " WordPerDocCount"); wordPerDocJob.setJarByClass(TfidfJob_Proto.class); wordPerDocJob.setMapperClass(WordPerDocCountMapper.class); wordPerDocJob.setMapOutputKeyClass(Text.class); wordPerDocJob.setMapOutputValueClass(StringListIntListWritable.class); wordPerDocJob.setInputFormatClass(SequenceFileInputFormat.class); wordPerDocJob.setReducerClass(WordPerDocCountReducer.class); wordPerDocJob.setOutputKeyClass(TextArrayWritable.class); wordPerDocJob.setOutputValueClass(StringListIntListWritable.class); wordPerDocJob.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileInputFormat.addInputPath(wordPerDocJob, new Path(AUXIL_PATH + "job1")); SequenceFileOutputFormat.setOutputPath(wordPerDocJob, new Path(AUXIL_PATH + "job2")); /* * Launch job */ long startTime = ManagementFactory.getThreadMXBean().getThreadCpuTime(Thread.currentThread().getId()); boolean success = wordPerDocJob.waitForCompletion(true); long endTime = ManagementFactory.getThreadMXBean().getThreadCpuTime(Thread.currentThread().getId()); double duration = (endTime - startTime) / Math.pow(10, 9); logger.info("=== Job1 Finished in " + duration + " seconds " + (success ? "(success)" : "(failure)")); return success; }
FileSystem.get(getConf()).delete(new Path(args[1]), true); Job job1 = Job.getInstance(getConf()); job1.setJobName("sequence collection conversion (phase 1)"); job1.setJarByClass(this.getClass()); job1.getConfiguration().setStrings("de.mpii.tools.itemSeparator", itemSeparator); FileInputFormat.setInputPaths(job1, DfsUtils.traverse(new Path(input), job1.getConfiguration())); TextOutputFormat.setOutputPath(job1, new Path(output + "/wc")); job1.getConfiguration().set("mapreduce.cluster.mapmemory.mb", "4096"); job1.getConfiguration().set("mapreduce.cluster.reducememory.mb", "4096"); FileInputFormat.setInputPaths(job2, DfsUtils.traverse(new Path(input), job2.getConfiguration())); SequenceFileOutputFormat.setOutputPath(job2, new Path(output + "/raw")); SequenceFileOutputFormat.setCompressOutput(job2, false);
Job[] jobs = new Job[2]; Job job = Job.getInstance(config); job.setJarByClass(JobFactory.class); job.setJobName("RDF Triples Characteristic Set (Generation)"); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths)); FileOutputFormat.setOutputPath(job, new Path(intermediateOutputPath)); SequenceFileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, intermediateOutputPath); FileOutputFormat.setOutputPath(job, new Path(outputPath));
@Override public int run(String[] args) throws Exception { final CmdLineParser parser = new CmdLineParser(this); try { parser.parseArgument(args); } catch (final CmdLineException e) { System.err.println(e.getMessage()); System.err.println("Usage: hadoop jar HadoopImageIndexer.jar [options]"); parser.printUsage(System.err); return -1; } final Path[] paths = SequenceFileUtility.getFilePaths(input, "part"); final Path outputPath = new Path(output); if (outputPath.getFileSystem(this.getConf()).exists(outputPath) && replace) outputPath.getFileSystem(this.getConf()).delete(outputPath, true); final Job job = TextBytesJobUtil.createJob(paths, outputPath, null, this.getConf()); job.setJarByClass(this.getClass()); job.setMapperClass(PcaVladMapper.class); job.setNumReduceTasks(0); DistributedCache.addFileToClassPath(new Path(indexerData), job.getConfiguration()); job.getConfiguration().set(VLAD_INDEXER_DATA_PATH_KEY, new Path(indexerData).getName()); SequenceFileOutputFormat.setCompressOutput(job, !dontcompress); job.waitForCompletion(true); return 0; }
@SuppressWarnings("unchecked") @Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); Path inputPath = new Path(args[0]); Path partitionFile = new Path(args[1] + "_partitions.lst"); Path outputStage = new Path(args[1] + "_staging"); Path outputOrder = new Path(args[1]); Job sampleJob = new Job(conf, "TotalOrderSortingStage"); sampleJob.setJarByClass(TotalOrderSortingStage.class); sampleJob.setMapperClass(LastAccessMapper.class); sampleJob.setNumReduceTasks(0); sampleJob.setOutputKeyClass(Text.class); sampleJob.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(sampleJob, outputStage); int code = sampleJob.waitForCompletion(true) ? 0 : 1; orderJob.getConfiguration().set( "mapred.textoutputformat.separator", ""); FileSystem.get(new Configuration()).delete(partitionFile, false); FileSystem.get(new Configuration()).delete(outputStage, true); return code;
FileSystem.get(getConf()).delete(new Path(args[1]), true); Job job1 = new Job(getConf()); job1.setJobName("document collection conversion (phase 1)"); job1.getConfiguration().setInt("org.apache.mahout.fsm.maxdocs", maxdocs); job1.setJarByClass(this.getClass()); FileInputFormat.setInputPaths(job1, DfsUtils.traverse(new Path(input), job1.getConfiguration())); TextOutputFormat.setOutputPath(job1, new Path(output + "/wc")); job2.setJobName("document collection conversion (phase 2)"); job2.getConfiguration().setInt("org.apache.mahout.fsm.maxdocs", maxdocs); job2.setJarByClass(this.getClass()); FileInputFormat.setInputPaths(job2, DfsUtils.traverse(new Path(input), job2.getConfiguration())); SequenceFileOutputFormat.setOutputPath(job2, new Path(output + "/raw")); SequenceFileOutputFormat.setCompressOutput(job2, false);
private int runPartitionerJob() throws Exception Job partitionerJob = new Job(getConf(), "Partition Wikipedia"); Configuration partitionerConf = partitionerJob.getConfiguration(); partitionerConf.set("mapred.map.tasks.speculative.execution", "false"); SortedSet<String> languages = new TreeSet<String>(); FileSystem fs = FileSystem.get(partitionerConf); Path parent = new Path(partitionerConf.get("wikipedia.input")); listFiles(parent, fs, inputPaths, languages); partitionerJob.setMapperClass(WikipediaPartitioner.class); partitionerJob.setNumReduceTasks(0); partitionerJob.setOutputFormatClass(SequenceFileOutputFormat.class); Path outputDir = WikipediaConfiguration.getPartitionedArticlesPath(partitionerConf); SequenceFileOutputFormat.setOutputPath(partitionerJob, outputDir); SequenceFileOutputFormat.setCompressOutput(partitionerJob, true); SequenceFileOutputFormat.setOutputCompressionType(partitionerJob, CompressionType.RECORD); return partitionerJob.waitForCompletion(true) ? 0 : 1;
Job job = Job.getInstance(getConf()); job.setJarByClass(RepackWikipedia.class); job.setJobName(String.format("RepackWikipedia[%s: %s, %s: %s, %s: %s, %s: %s]", INPUT_OPTION, inputPath, OUTPUT_OPTION, outputPath, COMPRESSION_TYPE_OPTION, compressionType, LANGUAGE_OPTION, language)); job.getConfiguration().set(DOCNO_MAPPING_FIELD, mappingFile); job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); FileOutputFormat.setCompressOutput(job, false); } else { FileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.RECORD); } else { SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); job.getConfiguration().setInt("io.seqfile.compress.blocksize", blocksize); job.getConfiguration().set("wiki.language", language); FileSystem.get(getConf()).delete(new Path(outputPath), true); return job.waitForCompletion(true) ? 0 : -1;
Job job = Job.getInstance(getConf()); job.setJarByClass(RepackWikipedia.class); job.setJobName(String.format("RepackWikipedia[%s: %s, %s: %s, %s: %s, %s: %s]", INPUT_OPTION, inputPath, OUTPUT_OPTION, outputPath, COMPRESSION_TYPE_OPTION, compressionType, LANGUAGE_OPTION, language)); job.getConfiguration().set(DOCNO_MAPPING_FIELD, mappingFile); job.setNumReduceTasks(0); SequenceFileInputFormat.addInputPath(job, new Path(inputPath)); SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath)); SequenceFileOutputFormat.setCompressOutput(job, false); } else { SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.RECORD); } else { SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); job.getConfiguration().setInt("io.seqfile.compress.blocksize", blocksize); job.getConfiguration().set("wiki.language", language); FileSystem.get(getConf()).delete(new Path(outputPath), true); job.waitForCompletion(true);
@SuppressWarnings({"unchecked", "rawtypes"}) @Override public int runTool() throws Exception { Configuration conf = getConf(); Job job = new Job(conf); String inputPath = conf.get("Cloud9.InputPath"); String inputFormat = conf.get("Cloud9.InputFormat"); String outputPath = conf.get("Cloud9.OutputPath"); String tag = conf.get("Cloud9.TargetTag"); job.setJobName("ExtractFieldCollection"); job.setJarByClass(ExtractHTMLFieldCollection.class); job.setMapperClass(MyMapper.class); job.setReducerClass(Reducer.class); job.setNumReduceTasks(200); job.setInputFormatClass((Class<? extends InputFormat>) Class.forName(inputFormat)); recursivelyAddInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(TextDocument.class); LOG.info("ExtractFieldCollection - " + tag); LOG.info(" - Input path: " + inputPath); LOG.info(" - Input format: " + inputFormat); LOG.info(" - Output path: " + outputPath); LOG.info(" - Target tag: " + tag); job.waitForCompletion(true); return 0; }
public static void runJob(String input, String output) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf); job.setJarByClass(SequenceFileStockMapReduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(StockPriceWritable.class); job.setInputFormatClass( SequenceFileInputFormat.class); //<co id="ch03_comment_seqfile_mr1"/> job.setOutputFormatClass(SequenceFileOutputFormat.class); //<co id="ch03_comment_seqfile_mr2"/> SequenceFileOutputFormat.setCompressOutput(job, true); //<co id="ch03_comment_seqfile_mr3"/> SequenceFileOutputFormat.setOutputCompressionType(job, //<co id="ch03_comment_seqfile_mr4"/> SequenceFile.CompressionType.BLOCK); SequenceFileOutputFormat.setOutputCompressorClass(job, //<co id="ch03_comment_seqfile_mr5"/> DefaultCodec.class); FileInputFormat.setInputPaths(job, new Path(input)); Path outPath = new Path(output); FileOutputFormat.setOutputPath(job, outPath); outPath.getFileSystem(conf).delete(outPath, true); job.waitForCompletion(true); } }
private boolean firstJobExecution(String[] args) throws IOException, InterruptedException, ClassNotFoundException { conf.clear(); new GenericOptionsParser(conf, args); conf.set(TableInputFormat.INPUT_TABLE, INPUT_TABLE); Job wordCountJob = new Job(conf); wordCountJob.setJobName(NAME + " WordCount"); wordCountJob.setJarByClass(TfidfJob_Proto.class); wordCountJob.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(wordCountJob, new Path(AUXIL_PATH + "job1"));