job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.NONE); SequenceFileOutputFormat.setOutputPath(job, new Path(dictOutputPath));
job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
public static void runJob(String input, String output) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf); job.setJarByClass(SequenceFileStockMapReduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(StockPriceWritable.class); job.setInputFormatClass( SequenceFileInputFormat.class); //<co id="ch03_comment_seqfile_mr1"/> job.setOutputFormatClass(SequenceFileOutputFormat.class); //<co id="ch03_comment_seqfile_mr2"/> SequenceFileOutputFormat.setCompressOutput(job, true); //<co id="ch03_comment_seqfile_mr3"/> SequenceFileOutputFormat.setOutputCompressionType(job, //<co id="ch03_comment_seqfile_mr4"/> SequenceFile.CompressionType.BLOCK); SequenceFileOutputFormat.setOutputCompressorClass(job, //<co id="ch03_comment_seqfile_mr5"/> DefaultCodec.class); FileInputFormat.setInputPaths(job, new Path(input)); Path outPath = new Path(output); FileOutputFormat.setOutputPath(job, outPath); outPath.getFileSystem(conf).delete(outPath, true); job.waitForCompletion(true); } }
@Override public void setStoreLocation(String location, Job job) throws IOException { job.setOutputKeyClass(keyClass); job.setOutputValueClass(valueClass); if (compressionType != null && compressionCodecClass != null) { Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job, DefaultCodec.class); SequenceFileOutputFormat. setOutputCompressorClass(job, codecClass); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.valueOf(compressionType)); } FileOutputFormat.setOutputPath(job, new Path(location)); }
@Override public void setStoreLocation(String location, Job job) throws IOException { job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(DefaultTuple.class); if (compressionType != null && compressionCodecClass != null) { Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job, DefaultCodec.class); SequenceFileOutputFormat. setOutputCompressorClass(job, codecClass); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.valueOf(compressionType)); } FileOutputFormat.setOutputPath(job, new Path(location)); }
@SuppressWarnings({"unchecked", "rawtypes"}) @Override public int runTool() throws Exception { Configuration conf = getConf(); Job job = new Job(conf); String inputPath = conf.get("Cloud9.InputPath"); String inputFormat = conf.get("Cloud9.InputFormat"); String outputPath = conf.get("Cloud9.OutputPath"); String tag = conf.get("Cloud9.TargetTag"); job.setJobName("ExtractFieldCollection"); job.setJarByClass(ExtractHTMLFieldCollection.class); job.setMapperClass(MyMapper.class); job.setReducerClass(Reducer.class); job.setNumReduceTasks(200); job.setInputFormatClass((Class<? extends InputFormat>) Class.forName(inputFormat)); recursivelyAddInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(TextDocument.class); LOG.info("ExtractFieldCollection - " + tag); LOG.info(" - Input path: " + inputPath); LOG.info(" - Input format: " + inputFormat); LOG.info(" - Output path: " + outputPath); LOG.info(" - Target tag: " + tag); job.waitForCompletion(true); return 0; }
public void run(Configuration conf, Path inputPathQ, Path inputUHatPath, Path sigmaPath, Path outputPath, int k, int numReduceTasks, Class<? extends Writable> labelClass, SSVDSolver.OutputScalingEnum outputScaling) throws ClassNotFoundException, InterruptedException, IOException { job = new Job(conf); job.setJobName("U-job"); job.setJarByClass(UJob.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(job, inputPathQ); FileOutputFormat.setOutputPath(job, outputPath); // WARN: tight hadoop integration here: job.getConfiguration().set("mapreduce.output.basename", OUTPUT_U); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); job.setMapperClass(UMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(VectorWritable.class); job.setOutputKeyClass(labelClass); job.setOutputValueClass(VectorWritable.class); job.getConfiguration().set(PROP_UHAT_PATH, inputUHatPath.toString()); job.getConfiguration().set(PROP_SIGMA_PATH, sigmaPath.toString()); job.getConfiguration().set(PROP_OUTPUT_SCALING, outputScaling.name()); job.getConfiguration().setInt(PROP_K, k); job.setNumReduceTasks(0); job.submit(); }
public void run(Configuration conf, Path inputPathQ, Path inputUHatPath, Path sigmaPath, Path outputPath, int k, int numReduceTasks, Class<? extends Writable> labelClass, SSVDSolver.OutputScalingEnum outputScaling) throws ClassNotFoundException, InterruptedException, IOException { job = new Job(conf); job.setJobName("U-job"); job.setJarByClass(UJob.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(job, inputPathQ); FileOutputFormat.setOutputPath(job, outputPath); // WARN: tight hadoop integration here: job.getConfiguration().set("mapreduce.output.basename", OUTPUT_U); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); job.setMapperClass(UMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(VectorWritable.class); job.setOutputKeyClass(labelClass); job.setOutputValueClass(VectorWritable.class); job.getConfiguration().set(PROP_UHAT_PATH, inputUHatPath.toString()); job.getConfiguration().set(PROP_SIGMA_PATH, sigmaPath.toString()); job.getConfiguration().set(PROP_OUTPUT_SCALING, outputScaling.name()); job.getConfiguration().setInt(PROP_K, k); job.setNumReduceTasks(0); job.submit(); }
FileOutputFormat.setOutputPath(job, outputPath); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
public void run(Configuration conf, Path inputPathQ, Path inputUHatPath, Path sigmaPath, Path outputPath, int k, int numReduceTasks, Class<? extends Writable> labelClass, SSVDSolver.OutputScalingEnum outputScaling) throws ClassNotFoundException, InterruptedException, IOException { job = new Job(conf); job.setJobName("U-job"); job.setJarByClass(UJob.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(job, inputPathQ); FileOutputFormat.setOutputPath(job, outputPath); // WARN: tight hadoop integration here: job.getConfiguration().set("mapreduce.output.basename", OUTPUT_U); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); job.setMapperClass(UMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(VectorWritable.class); job.setOutputKeyClass(labelClass); job.setOutputValueClass(VectorWritable.class); job.getConfiguration().set(PROP_UHAT_PATH, inputUHatPath.toString()); job.getConfiguration().set(PROP_SIGMA_PATH, sigmaPath.toString()); job.getConfiguration().set(PROP_OUTPUT_SCALING, outputScaling.name()); job.getConfiguration().setInt(PROP_K, k); job.setNumReduceTasks(0); job.submit(); }
FileOutputFormat.setOutputPath(job, outputPath); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
FileOutputFormat.setOutputPath(job, outputPath); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
public static Job createJob(Path[] inputPaths, Path outputPath, Map<String, String> metadata, Configuration config) throws IOException { final Job job = new Job(config); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(BytesWritable.class); job.setOutputFormatClass(MetadataSequenceFileOutputFormat.class); SequenceFileInputFormat.setInputPaths(job, inputPaths); SequenceFileOutputFormat.setOutputPath(job, outputPath); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); if (metadata != null) MetadataConfiguration.setMetadata(metadata, job.getConfiguration()); return job; } }
public static Job createJob(Path[] inputPaths, Path outputPath, Map<String, String> metadata, Configuration config) throws IOException { final Job job = new Job(config); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(BytesWritable.class); job.setOutputFormatClass(MetadataSequenceFileOutputFormat.class); SequenceFileInputFormat.setInputPaths(job, inputPaths); SequenceFileOutputFormat.setOutputPath(job, outputPath); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); if (metadata != null) MetadataConfiguration.setMetadata(metadata, job.getConfiguration()); return job; } }
SequenceFileOutputFormat.setOutputPath(partitionerJob, outputDir); SequenceFileOutputFormat.setCompressOutput(partitionerJob, true); SequenceFileOutputFormat.setOutputCompressionType(partitionerJob, CompressionType.RECORD);
SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
SequenceFileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
SequenceFileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
protected void setupOutput(final Job job, final SampleDataForSplitPoints operation, final Store store) throws IOException { job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, new Path(operation.getOutputPath())); if (null != operation.getCompressionCodec()) { if (GzipCodec.class.isAssignableFrom(operation.getCompressionCodec()) && !NativeCodeLoader.isNativeCodeLoaded() && !ZlibFactory.isNativeZlibLoaded(job.getConfiguration())) { LOGGER.warn("SequenceFile doesn't work with GzipCodec without native-hadoop code!"); } else { SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressorClass(job, operation.getCompressionCodec()); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); } } } }
@Override public int run(String[] args) throws Exception { final HadoopDownloaderOptions options = new HadoopDownloaderOptions(args); options.prepare(true); final Job job = new Job(getConf()); job.setJarByClass(HadoopDownloader.class); job.setJobName("Hadoop Downloader Utility"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(BytesWritable.class); if (options.getNumberOfThreads() <= 1) { job.setMapperClass(DownloadMapper.class); } else { job.setMapperClass(MultithreadedMapper.class); MultithreadedMapper.setMapperClass(job, DownloadMapper.class); MultithreadedMapper.setNumberOfThreads(job, options.getNumberOfThreads()); } job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setNumReduceTasks(options.getNumberOfReducers()); job.getConfiguration().setStrings(ARGS_KEY, args); FileInputFormat.setInputPaths(job, options.getInputPaths()); SequenceFileOutputFormat.setOutputPath(job, options.getOutputPath()); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); job.waitForCompletion(true); return 0; }