Refine search
public RecordWriter<Text, NutchParse> getRecordWriter(FileSystem fs, JobConf job, String name, Progressable progress) throws IOException { final CompressionType compType = SequenceFileOutputFormat.getOutputCompressionType(job); Path out = FileOutputFormat.getOutputPath(job);
public static RecordWriter getHiveRecordWriter(JobConf jc, TableDesc tableInfo, Class<? extends Writable> outputClass, FileSinkDesc conf, Path outPath, Reporter reporter) throws HiveException { HiveOutputFormat<?, ?> hiveOutputFormat = getHiveOutputFormat(jc, tableInfo); try { boolean isCompressed = conf.getCompressed(); JobConf jc_output = jc; if (isCompressed) { jc_output = new JobConf(jc); String codecStr = conf.getCompressCodec(); if (codecStr != null && !codecStr.trim().equals("")) { Class<? extends CompressionCodec> codec = (Class<? extends CompressionCodec>) JavaUtils.loadClass(codecStr); FileOutputFormat.setOutputCompressorClass(jc_output, codec); } String type = conf.getCompressType(); if (type != null && !type.trim().equals("")) { CompressionType style = CompressionType.valueOf(type); SequenceFileOutputFormat.setOutputCompressionType(jc, style); } } return getRecordWriter(jc_output, hiveOutputFormat, outputClass, isCompressed, tableInfo.getProperties(), outPath, reporter); } catch (Exception e) { throw new HiveException(e); } }
public int runTool() throws Exception { JobConf conf = new JobConf(getConf(), ComputeWeight.class); FileSystem fs = FileSystem.get(conf); int numMappers = conf.getInt("Cloud9.Mappers", 1); int numReducers = conf.getInt("Cloud9.Reducers", 200); String inputPath = conf.get("Cloud9.InputPath"); conf.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(conf, true); SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK); FileOutputFormat.setOutputPath(conf, new Path(outputPath));
JobConf job = new JobConf(defaults, DataJoinJob.class); job.setJobName("DataJoinJob: " + jobName); FileInputFormat.setInputPaths(job, inputDir); job.setInputFormat(inputFormat); job.setMapperClass(mapper); FileOutputFormat.setOutputPath(job, new Path(outputDir)); job.setOutputFormat(outputFormat); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); job.setMapOutputKeyClass(Text.class);
JobConf conf = new JobConf(RepackClueWarcRecords.class); conf.setJobName("RepackClueWarcRecords:segment" + segment); conf.set("DocnoMappingDataFile", data); SequenceFileOutputFormat.setOutputPath(conf, new Path(outputPath)); SequenceFileOutputFormat.setCompressOutput(conf, false); } else { SequenceFileOutputFormat.setCompressOutput(conf, true); .setOutputCompressionType(conf, SequenceFile.CompressionType.RECORD); } else { SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK); conf.setInt("io.seqfile.compress.blocksize", blocksize);
+ " input files."); FileOutputFormat.setOutputPath(conf, new Path(this.outputPath)); conf.setJobName(this.inputPath + "_" + System.currentTimeMillis()); conf.setInputFormat(SequenceFileInputFormat.class); conf.setMapperClass(IdentityMapper.class); conf.setReducerClass(MDXReduplicatingReducer.class); conf.setOutputValueClass(Text.class); conf.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputCompressionType(conf, CompressionType.BLOCK); LOG.info("Used " + numReducers + " reducers.");
boolean includeCombine ) throws Exception { JobConf conf = new JobConf(TestMapRed.class); Path testdir = new Path("build/test/test.mapred.compress"); Path inDir = new Path(testdir, "in"); fs.delete(testdir, true); FileInputFormat.setInputPaths(conf, inDir); FileOutputFormat.setOutputPath(conf, outDir); conf.setMapperClass(MyMap.class); conf.setReducerClass(MyReduce.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setCompressMapOutput(compressMapOutputs); SequenceFileOutputFormat.setOutputCompressionType(conf, redCompression); try { if (!fs.mkdirs(testdir)) {
String outputPath = args[1]; JobConf job = new JobConf(conf); SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath)); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.getOutputCompressorClass(job, GzipCodec.class); } else { System.err.println("No output path found."); job.setJarByClass(SegmentCombiner.class); job.setInputFormat(SequenceFileInputFormat.class);
JobConf job = new JobConf(conf); SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath)); SequenceFileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); SequenceFileOutputFormat.setCompressOutput(job, true); } else { System.err.println("No output path found."); job.setJarByClass(WikiReverse.class); job.setInputFormat(WarcFileInputFormat.class);
/** * Set the {@link CompressionType} for the output {@link SequenceFile}. * @param conf the {@link JobConf} to modify * @param style the {@link CompressionType} for the output * {@link SequenceFile} */ public static void setOutputCompressionType(JobConf conf, CompressionType style) { setCompressOutput(conf, true); conf.set(org.apache.hadoop.mapreduce.lib.output. FileOutputFormat.COMPRESS_TYPE, style.toString()); }
throws IOException { Path file = FileOutputFormat.getTaskOutputPath(job, name); if (getCompressOutput(job)) { compressionType = getOutputCompressionType(job); Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, DefaultCodec.class); codec = ReflectionUtils.newInstance(codecClass, job); job.getOutputKeyClass(), job.getOutputValueClass(), compressionType, codec,
@Override public RecordWriter<WritableComparable<?>, Object> getRecordWriter( FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { HBaseHCatStorageHandler.setHBaseSerializers(job); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(Put.class); long version = HBaseRevisionManagerUtil.getOutputRevision(job); return new HBaseBulkRecordWriter(baseOutputFormat.getRecordWriter( ignored, job, name, progress), version); }
throws IOException { Path file = FileOutputFormat.getTaskOutputPath(job, name); if (getCompressOutput(job)) { compressionType = SequenceFileOutputFormat.getOutputCompressionType(job); job.getOutputKeyClass().asSubclass(WritableComparable.class), job.getOutputValueClass().asSubclass(Writable.class), compressionType, codec, progress);
@Override protected RecordWriter<K, V> getBaseRecordWriter(FileSystem fs, JobConf job, String name, Progressable arg3) throws IOException { if (theSequenceFileOutputFormat == null) { theSequenceFileOutputFormat = new SequenceFileOutputFormat<K,V>(); } return theSequenceFileOutputFormat.getRecordWriter(fs, job, name, arg3); } }
/** * Create a sequencefile output stream based on job configuration Uses user supplied compression * flag (rather than obtaining it from the Job Configuration). * * @param jc * Job configuration * @param fs * File System to create file in * @param file * Path to be created * @param keyClass * Java Class for key * @param valClass * Java Class for value * @return output stream over the created sequencefile */ public static SequenceFile.Writer createSequenceWriter(JobConf jc, FileSystem fs, Path file, Class<?> keyClass, Class<?> valClass, boolean isCompressed, Progressable progressable) throws IOException { CompressionCodec codec = null; CompressionType compressionType = CompressionType.NONE; Class codecClass = null; if (isCompressed) { compressionType = SequenceFileOutputFormat.getOutputCompressionType(jc); codecClass = FileOutputFormat.getOutputCompressorClass(jc, DefaultCodec.class); codec = (CompressionCodec) ReflectionUtil.newInstance(codecClass, jc); } return SequenceFile.createWriter(fs, jc, file, keyClass, valClass, compressionType, codec, progressable); }
@Override public void checkOutputSpecs(FileSystem ignored, JobConf job) throws IOException { baseOutputFormat.checkOutputSpecs(ignored, job); HBaseUtil.addHBaseDelegationToken(job); addJTDelegationToken(job); }
public HBaseBulkOutputFormat() { baseOutputFormat = new SequenceFileOutputFormat<WritableComparable<?>, Object>(); }
public int runTool() throws Exception { JobConf conf = new JobConf(getConf(), CollectHostnames.class); FileSystem fs = FileSystem.get(conf); int numMappers = conf.getInt("Cloud9.Mappers", 1); int numReducers = conf.getInt("Cloud9.Reducers", 200); String inputPath = conf.get("Cloud9.InputPath"); conf.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(conf, true); SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK); FileOutputFormat.setOutputPath(conf, new Path(outputPath));
JobConf job = new JobConf(defaults, DataJoinJob.class); job.setJobName("DataJoinJob: " + jobName); FileInputFormat.setInputPaths(job, inputDir); job.setInputFormat(inputFormat); job.setMapperClass(mapper); FileOutputFormat.setOutputPath(job, new Path(outputDir)); job.setOutputFormat(outputFormat); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); job.setMapOutputKeyClass(Text.class);
JobConf conf = new JobConf(getConf()); FileSystem fs = FileSystem.get(conf); int numReducers = Integer.parseInt(DriverUtil.argValue(args, DriverUtil.CL_NUMBER_OF_REDUCERS)); if(DriverUtil.argExists(args, DriverUtil.CL_MAX_LENGTH)) { conf.setInt("Cloud9.maxContentLength", Integer.parseInt(DriverUtil.argValue(args, DriverUtil.CL_MAX_LENGTH))); conf.set("Cloud9.DocnoMappingClass", docnoMappingClass); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(conf, true); SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK); SequenceFileInputFormat.setInputPaths(conf, new Path(collectionPath)); SequenceFileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setOutputKeyClass(IntWritable.class);