private int runMetastoreCompareJob(Path output) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(this.getConf(), "Stage1: Metastore Compare Job"); job.setJarByClass(this.getClass()); job.setInputFormatClass(MetastoreScanInputFormat.class); job.setMapperClass(Stage1ProcessTableMapper.class); job.setReducerClass(Stage1PartitionCompareReducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, output); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
private int runMetastoreCompareJob(Path output) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(this.getConf(), "Stage1: Metastore Compare Job"); job.setJarByClass(this.getClass()); job.setInputFormatClass(MetastoreScanInputFormat.class); job.setMapperClass(Stage1ProcessTableMapper.class); job.setReducerClass(Stage1PartitionCompareReducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, output); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
private void setCompression(Path path, Job job) { String location=path.getName(); if (location.endsWith(".bz2") || location.endsWith(".bz")) { FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class); } else if (location.endsWith(".gz")) { FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); } else { FileOutputFormat.setCompressOutput( job, false); } }
@Override public void setStoreLocation(String location, Job job) throws IOException { job.setOutputKeyClass(this.keyClass); job.setOutputKeyClass(this.keyClass); Configuration conf = job.getConfiguration(); if ("true".equals(conf.get("output.compression.enabled"))) { FileOutputFormat.setCompressOutput(job, true); String codec = conf.get("output.compression.codec"); FileOutputFormat.setOutputCompressorClass(job, PigContext.resolveClassName(codec).asSubclass(CompressionCodec.class)); } FileOutputFormat.setOutputPath(job, new Path(location)); }
@Override public void setStoreLocation(String location, Job job) throws IOException { job.getConfiguration().set(MRConfiguration.TEXTOUTPUTFORMAT_SEPARATOR, ""); FileOutputFormat.setOutputPath(job, new Path(location)); if( "true".equals( job.getConfiguration().get( "output.compression.enabled" ) ) ) { FileOutputFormat.setCompressOutput( job, true ); String codec = job.getConfiguration().get( "output.compression.codec" ); try { FileOutputFormat.setOutputCompressorClass( job, (Class<? extends CompressionCodec>) Class.forName( codec ) ); } catch (ClassNotFoundException e) { throw new RuntimeException("Class not found: " + codec ); } } else { // This makes it so that storing to a directory ending with ".gz" or ".bz2" works. setCompression(new Path(location), job); } }
private int runDirectoryComparisonJob(Path source, Path destination, Path output, String compareOption) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(getConf(), "Directory Comparison Job"); job.setJarByClass(getClass()); job.setInputFormatClass(DirScanInputFormat.class); job.setMapperClass(ListFileMapper.class); job.setReducerClass(DirectoryCompareReducer.class); // last directory is destination, all other directories are source directories job.getConfiguration().set(SRC_PATH_CONF, source.toString()); job.getConfiguration().set(DST_PATH_CONF, destination.toString()); job.getConfiguration().set(FileInputFormat.INPUT_DIR, Joiner.on(",").join(source, destination)); job.getConfiguration().set(COMPARE_OPTION_CONF, compareOption); job.setOutputKeyClass(Text.class); job.setOutputValueClass(FileStatus.class); FileOutputFormat.setOutputPath(job, output); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
private int runDirectoryComparisonJob(Path source, Path destination, Path output, String compareOption) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(getConf(), "Directory Comparison Job"); job.setJarByClass(getClass()); job.setInputFormatClass(DirScanInputFormat.class); job.setMapperClass(ListFileMapper.class); job.setReducerClass(DirectoryCompareReducer.class); // last directory is destination, all other directories are source directories job.getConfiguration().set(SRC_PATH_CONF, source.toString()); job.getConfiguration().set(DST_PATH_CONF, destination.toString()); job.getConfiguration().set(FileInputFormat.INPUT_DIR, Joiner.on(",").join(source, destination)); job.getConfiguration().set(COMPARE_OPTION_CONF, compareOption); job.setOutputKeyClass(Text.class); job.setOutputValueClass(FileStatus.class); FileOutputFormat.setOutputPath(job, output); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
private Job createJob(String inputPath, String outputPath) throws Exception { Configuration conf = getConf(); Job job = new Job(conf); job.setJarByClass(VisualJob.class); job.setNumReduceTasks(90); FileSystem fs = FileSystem.get(new URI(outputPath), conf); if (fs.exists(new Path(outputPath))) { fs.delete(new Path(outputPath), true); } FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(FloatArrayWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(VisualThreadedMapper.class); job.setReducerClass(VisualReducer.class); return job; }
private int runSyncJob(Path source, Path destination, Path tmpDir, Path input, Path output) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(getConf(), "HDFS Sync job"); job.setJarByClass(getClass()); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(HdfsSyncMapper.class); job.setReducerClass(HdfsSyncReducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); job.getConfiguration().set(SRC_PATH_CONF, source.toString()); job.getConfiguration().set(DST_PATH_CONF, destination.toString()); job.getConfiguration().set(TMP_PATH_CONF, tmpDir.toString()); FileInputFormat.setInputPaths(job, input); FileInputFormat.setInputDirRecursive(job, true); FileInputFormat.setMaxInputSplitSize(job, this.getConf().getLong( FileInputFormat.SPLIT_MAXSIZE, 60000L)); FileOutputFormat.setOutputPath(job, new Path(output.toString())); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
private int runSyncJob(Path source, Path destination, Path tmpDir, Path input, Path output) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(getConf(), "HDFS Sync job"); job.setJarByClass(getClass()); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(HdfsSyncMapper.class); job.setReducerClass(HdfsSyncReducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); job.getConfiguration().set(SRC_PATH_CONF, source.toString()); job.getConfiguration().set(DST_PATH_CONF, destination.toString()); job.getConfiguration().set(TMP_PATH_CONF, tmpDir.toString()); FileInputFormat.setInputPaths(job, input); FileInputFormat.setInputDirRecursive(job, true); FileInputFormat.setMaxInputSplitSize(job, this.getConf().getLong( FileInputFormat.SPLIT_MAXSIZE, 60000L)); FileOutputFormat.setOutputPath(job, new Path(output.toString())); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
private int runMetastoreCompareJobWithTextInput(Path input, Path output) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(this.getConf(), "Stage1: Metastore Compare Job with Input List"); job.setJarByClass(this.getClass()); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(Stage1ProcessTableMapperWithTextInput.class); job.setReducerClass(Stage1PartitionCompareReducer.class); FileInputFormat.setInputPaths(job, input); FileInputFormat.setMaxInputSplitSize(job, this.getConf().getLong(FileInputFormat.SPLIT_MAXSIZE, 60000L)); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, output); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); job.setNumReduceTasks(getConf().getInt( ConfigurationKeys.BATCH_JOB_METASTORE_PARALLELISM, 150)); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
private int runMetastoreCompareJobWithTextInput(Path input, Path output) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(this.getConf(), "Stage1: Metastore Compare Job with Input List"); job.setJarByClass(this.getClass()); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(Stage1ProcessTableMapperWithTextInput.class); job.setReducerClass(Stage1PartitionCompareReducer.class); FileInputFormat.setInputPaths(job, input); FileInputFormat.setMaxInputSplitSize(job, this.getConf().getLong(FileInputFormat.SPLIT_MAXSIZE, 60000L)); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, output); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); job.setNumReduceTasks(getConf().getInt( ConfigurationKeys.BATCH_JOB_METASTORE_PARALLELISM, 150)); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
@SuppressWarnings("unchecked") @Override public void setStoreLocation(String location, Job job) throws IOException { ensureUDFContext(job.getConfiguration()); job.setOutputKeyClass(keyClass); job.setOutputValueClass(valueClass); FileOutputFormat.setOutputPath(job, new Path(location)); if ("true".equals(job.getConfiguration().get( "output.compression.enabled"))) { FileOutputFormat.setCompressOutput(job, true); String codec = job.getConfiguration().get( "output.compression.codec"); FileOutputFormat .setOutputCompressorClass( job, PigContext.resolveClassName(codec).asSubclass( CompressionCodec.class)); } else { // This makes it so that storing to a directory ending with ".gz" or // ".bz2" works. setCompression(new Path(location), job); } }
public void run(Configuration conf, Path inputPathQ, Path inputUHatPath, Path sigmaPath, Path outputPath, int k, int numReduceTasks, Class<? extends Writable> labelClass, SSVDSolver.OutputScalingEnum outputScaling) throws ClassNotFoundException, InterruptedException, IOException { job = new Job(conf); job.setJobName("U-job"); job.setJarByClass(UJob.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(job, inputPathQ); FileOutputFormat.setOutputPath(job, outputPath); // WARN: tight hadoop integration here: job.getConfiguration().set("mapreduce.output.basename", OUTPUT_U); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); job.setMapperClass(UMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(VectorWritable.class); job.setOutputKeyClass(labelClass); job.setOutputValueClass(VectorWritable.class); job.getConfiguration().set(PROP_UHAT_PATH, inputUHatPath.toString()); job.getConfiguration().set(PROP_SIGMA_PATH, sigmaPath.toString()); job.getConfiguration().set(PROP_OUTPUT_SCALING, outputScaling.name()); job.getConfiguration().setInt(PROP_K, k); job.setNumReduceTasks(0); job.submit(); }
if (codecName != null) { FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, CompressionUtils.getHadoopCodec(codecName));
private int runHdfsCopyJob(Path input, Path output) throws IOException, InterruptedException, ClassNotFoundException, TemplateRenderException { LOG.info("Starting job for step 2..."); Job job = Job.getInstance(this.getConf(), "Stage 2: HDFS Copy Job"); job.setJarByClass(this.getClass()); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(Stage2DirectoryCopyMapper.class); job.setReducerClass(Stage2DirectoryCopyReducer.class); FileInputFormat.setInputPaths(job, input); FileInputFormat.setInputDirRecursive(job, true); FileInputFormat.setMaxInputSplitSize(job, this.getConf().getLong(FileInputFormat.SPLIT_MAXSIZE, 60000L)); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, output); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); job.setNumReduceTasks(getConf().getInt( ConfigurationKeys.BATCH_JOB_COPY_PARALLELISM, 150)); boolean success = job.waitForCompletion(true); if (success) { LOG.info("Job for step 2 finished successfully! To view logging data, run the following " + "commands in Hive: \n\n" + VelocityUtils.renderTemplate(STEP2_HQL_TEMPLATE, velocityContext) + "\n"); } return success ? 0 : 1; }
/** * @param path * @param job */ private void setCompression(Path path, Job job) { CompressionCodecFactory codecFactory = new CompressionCodecFactory( job.getConfiguration()); CompressionCodec codec = codecFactory.getCodec(path); if (codec != null) { FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, codec.getClass()); } else { FileOutputFormat.setCompressOutput(job, false); } }
@SuppressWarnings("unchecked") @Override public void setStoreLocation(String location, Job job) throws IOException { Configuration conf = HadoopCompat.getConfiguration(job); ensureUDFContext(conf); verifyWritableClass(config.keyClass, true, config.keyConverter); verifyWritableClass(config.valueClass, false, config.valueConverter); job.setOutputKeyClass(config.keyClass); job.setOutputValueClass(config.valueClass); super.setStoreLocation(location, job); if ("true".equals(conf.get("output.compression.enabled"))) { FileOutputFormat.setCompressOutput(job, true); String codec = conf.get("output.compression.codec"); FileOutputFormat.setOutputCompressorClass(job, PigContext.resolveClassName(codec).asSubclass(CompressionCodec.class)); } else { // This makes it so that storing to a directory ending with ".gz" or ".bz2" works. setCompression(new Path(location), job); } }
/** * @param path * @param job */ private void setCompression(Path path, Job job) { CompressionCodecFactory codecFactory = new CompressionCodecFactory(HadoopCompat.getConfiguration(job)); CompressionCodec codec = codecFactory.getCodec(path); if (codec != null) { FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, codec.getClass()); } else { FileOutputFormat.setCompressOutput(job, false); } }
private Path doMapReduce(final Path inputPath, final boolean writeHeader) throws Exception { final FileSystem fileSystem = FileSystem.get(conf); final Path outputPath = fileSystem.makeQualified(new Path("target/out")); fileSystem.delete(outputPath, true); final Job job = Job.getInstance(conf); FileInputFormat.setInputPaths(job, inputPath); job.setInputFormatClass(VCFInputFormat.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(VariantContextWritable.class); job.setOutputFormatClass(writeHeader ? VCFTestWithHeaderOutputFormat.class : VCFTestNoHeaderOutputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(VariantContextWritable.class); job.setNumReduceTasks(0); FileOutputFormat.setOutputPath(job, outputPath); if (codecClass != null) { FileOutputFormat.setOutputCompressorClass(job, codecClass); } final boolean success = job.waitForCompletion(true); assertTrue(success); return outputPath; }