public static boolean exists(JobContext job, FileSystem fs, Path inputPath) throws IOException { if (!FileOutputFormat.getCompressOutput(job)) { return fs.exists(inputPath); } else { Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class); CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration()); return fs.exists(new Path(inputPath + codec.getDefaultExtension())); } }
public static InputStream openInputStream(JobContext job, Path inputPath, final FileSystem fileSystem) throws IOException { if (!FileOutputFormat.getCompressOutput(job)) { return fileSystem.open(inputPath); } else { Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class); CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration()); inputPath = new Path(inputPath + codec.getDefaultExtension()); return codec.createInputStream(fileSystem.open(inputPath)); } }
public static OutputStream makePathAndOutputStream(JobContext job, Path outputPath, boolean deleteExisting) throws IOException { OutputStream retVal; FileSystem fs = outputPath.getFileSystem(job.getConfiguration()); Class<? extends CompressionCodec> codecClass; CompressionCodec codec = null; if (FileOutputFormat.getCompressOutput(job)) { codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class); codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration()); outputPath = new Path(outputPath + codec.getDefaultExtension()); } if (fs.exists(outputPath)) { if (deleteExisting) { fs.delete(outputPath, false); } else { throw new ISE("outputPath[%s] must not exist.", outputPath); } } if (FileOutputFormat.getCompressOutput(job)) { retVal = codec.createOutputStream(fs.create(outputPath, false)); } else { retVal = fs.create(outputPath, false); } return retVal; }
if (FileOutputFormat.getCompressOutput(context)) {
@Override public boolean isHadoopCompressionSet() { return FileOutputFormat.getCompressOutput(context); }
@Override public boolean isHadoopCompressionSet() { return FileOutputFormat.getCompressOutput(context); }
@Override public boolean isHadoopCompressionSet() { return FileOutputFormat.getCompressOutput(context); }
public static boolean exists(JobContext job, FileSystem fs, Path inputPath) throws IOException { if (!FileOutputFormat.getCompressOutput(job)) { return fs.exists(inputPath); } else { Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class); CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration()); return fs.exists(new Path(inputPath.toString() + codec.getDefaultExtension())); } }
public static boolean exists(JobContext job, FileSystem fs, Path inputPath) throws IOException { if (!FileOutputFormat.getCompressOutput(job)) { return fs.exists(inputPath); } else { Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class); CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration()); return fs.exists(new Path(inputPath.toString() + codec.getDefaultExtension())); } }
public static boolean exists(JobContext job, FileSystem fs, Path inputPath) throws IOException { if (!FileOutputFormat.getCompressOutput(job)) { return fs.exists(inputPath); } else { Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class); CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration()); return fs.exists(new Path(inputPath.toString() + codec.getDefaultExtension())); } }
public static InputStream openInputStream(JobContext job, Path inputPath, final FileSystem fileSystem) throws IOException { if (!FileOutputFormat.getCompressOutput(job)) { return fileSystem.open(inputPath); } else { Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class); CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration()); inputPath = new Path(inputPath.toString() + codec.getDefaultExtension()); return codec.createInputStream(fileSystem.open(inputPath)); } }
public static InputStream openInputStream(JobContext job, Path inputPath, final FileSystem fileSystem) throws IOException { if (!FileOutputFormat.getCompressOutput(job)) { return fileSystem.open(inputPath); } else { Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class); CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration()); inputPath = new Path(inputPath.toString() + codec.getDefaultExtension()); return codec.createInputStream(fileSystem.open(inputPath)); } }
public static InputStream openInputStream(JobContext job, Path inputPath, final FileSystem fileSystem) throws IOException { if (!FileOutputFormat.getCompressOutput(job)) { return fileSystem.open(inputPath); } else { Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class); CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration()); inputPath = new Path(inputPath.toString() + codec.getDefaultExtension()); return codec.createInputStream(fileSystem.open(inputPath)); } }
public StringRecordWriterProvider(TaskAttemptContext context) { Configuration conf = context.getConfiguration(); if (recordDelimiter == null) { recordDelimiter = conf.get(ETL_OUTPUT_RECORD_DELIMITER, DEFAULT_RECORD_DELIMITER); } isCompressed = FileOutputFormat.getCompressOutput(context); if (isCompressed) { Class<? extends CompressionCodec> codecClass = null; if ("snappy".equals(EtlMultiOutputFormat.getEtlOutputCodec(context))) { codecClass = SnappyCodec.class; } else if ("gzip".equals((EtlMultiOutputFormat.getEtlOutputCodec(context)))) { codecClass = GzipCodec.class; } else { codecClass = DefaultCodec.class; } codec = ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); } }
public StringRecordWriterProvider(TaskAttemptContext context) { Configuration conf = context.getConfiguration(); if (recordDelimiter == null) { recordDelimiter = conf.get(ETL_OUTPUT_RECORD_DELIMITER, DEFAULT_RECORD_DELIMITER); } isCompressed = FileOutputFormat.getCompressOutput(context); if (isCompressed) { Class<? extends CompressionCodec> codecClass = null; if ("snappy".equals(EtlMultiOutputFormat.getEtlOutputCodec(context))) { codecClass = SnappyCodec.class; } else if ("gzip".equals((EtlMultiOutputFormat.getEtlOutputCodec(context)))) { codecClass = GzipCodec.class; } else { codecClass = DefaultCodec.class; } codec = ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); } }
static <T> void configureDataFileWriter(DataFileWriter<T> writer, TaskAttemptContext job,String codecName,int deflateLevel) throws UnsupportedEncodingException { Configuration conf = job.getConfiguration(); if (FileOutputFormat.getCompressOutput(job)) { CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(deflateLevel) : CodecFactory.fromString(codecName); writer.setCodec(factory); } writer.setSyncInterval(conf.getInt(SYNC_INTERVAL_KEY, DEFAULT_SYNC_INTERVAL)); // copy metadata from job for (Map.Entry<String,String> e : conf) { if (e.getKey().startsWith(AvroJob.TEXT_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue()); if (e.getKey().startsWith(AvroJob.BINARY_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()), URLDecoder.decode(e.getValue(), "ISO-8859-1") .getBytes("ISO-8859-1")); } }
public static OutputStream makePathAndOutputStream(JobContext job, Path outputPath, boolean deleteExisting) throws IOException { OutputStream retVal; FileSystem fs = outputPath.getFileSystem(job.getConfiguration()); if (fs.exists(outputPath)) { if (deleteExisting) { fs.delete(outputPath, false); } else { throw new ISE("outputPath[%s] must not exist.", outputPath); } } if (!FileOutputFormat.getCompressOutput(job)) { retVal = fs.create(outputPath, false); } else { Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class); CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration()); outputPath = new Path(outputPath.toString() + codec.getDefaultExtension()); retVal = codec.createOutputStream(fs.create(outputPath, false)); } return retVal; }
static <T> void configureDataFileWriter(DataFileWriter<T> writer, TaskAttemptContext job) throws UnsupportedEncodingException { Configuration conf = job.getConfiguration(); if (FileOutputFormat.getCompressOutput(job)) { int level = conf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL); String codecName = conf.get(AvroJob.OUTPUT_CODEC, DEFLATE_CODEC); CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName); writer.setCodec(factory); } writer.setSyncInterval(conf.getInt(SYNC_INTERVAL_KEY, DEFAULT_SYNC_INTERVAL)); // copy metadata from job for (Map.Entry<String,String> e : conf) { if (e.getKey().startsWith(AvroJob.TEXT_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue()); if (e.getKey().startsWith(AvroJob.BINARY_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()), URLDecoder.decode(e.getValue(), "ISO-8859-1") .getBytes("ISO-8859-1")); } }
static <T> void configureDataFileWriter(DataFileWriter<T> writer, TaskAttemptContext job,String codecName,int deflateLevel) throws UnsupportedEncodingException { Configuration conf = job.getConfiguration(); if (FileOutputFormat.getCompressOutput(job)) { CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(deflateLevel) : CodecFactory.fromString(codecName); writer.setCodec(factory); } writer.setSyncInterval(conf.getInt(SYNC_INTERVAL_KEY, DEFAULT_SYNC_INTERVAL)); // copy metadata from job for (Map.Entry<String,String> e : conf) { if (e.getKey().startsWith(AvroJob.TEXT_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue()); if (e.getKey().startsWith(AvroJob.BINARY_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()), URLDecoder.decode(e.getValue(), "ISO-8859-1") .getBytes("ISO-8859-1")); } }
public WARCWriter(TaskAttemptContext context) throws IOException { Configuration conf = context.getConfiguration(); CompressionCodec codec = getCompressOutput(context) ? WARCFileWriter.getGzipCodec(conf) : null; Path workFile = getDefaultWorkFile(context, ""); this.writer = new WARCFileWriter(conf, codec, workFile); }