public static InputStream openInputStream(JobContext job, Path inputPath, final FileSystem fileSystem) throws IOException { if (!FileOutputFormat.getCompressOutput(job)) { return fileSystem.open(inputPath); } else { Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class); CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration()); inputPath = new Path(inputPath + codec.getDefaultExtension()); return codec.createInputStream(fileSystem.open(inputPath)); } }
public static OutputStream makePathAndOutputStream(JobContext job, Path outputPath, boolean deleteExisting) throws IOException { OutputStream retVal; FileSystem fs = outputPath.getFileSystem(job.getConfiguration()); Class<? extends CompressionCodec> codecClass; CompressionCodec codec = null; if (FileOutputFormat.getCompressOutput(job)) { codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class); codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration()); outputPath = new Path(outputPath + codec.getDefaultExtension()); } if (fs.exists(outputPath)) { if (deleteExisting) { fs.delete(outputPath, false); } else { throw new ISE("outputPath[%s] must not exist.", outputPath); } } if (FileOutputFormat.getCompressOutput(job)) { retVal = codec.createOutputStream(fs.create(outputPath, false)); } else { retVal = fs.create(outputPath, false); } return retVal; }
/** * Get a {@link Compressor} for the given {@link CompressionCodec} from the * pool or a new one. * * @param codec the <code>CompressionCodec</code> for which to get the * <code>Compressor</code> * @param conf the <code>Configuration</code> object which contains confs for creating or reinit the compressor * @return <code>Compressor</code> for the given * <code>CompressionCodec</code> from the pool or a new one */ public static Compressor getCompressor(CompressionCodec codec, Configuration conf) { Compressor compressor = borrow(compressorPool, codec.getCompressorType()); if (compressor == null) { compressor = codec.createCompressor(); LOG.info("Got brand-new compressor ["+codec.getDefaultExtension()+"]"); } else { compressor.reinit(conf); if(LOG.isDebugEnabled()) { LOG.debug("Got recycled compressor"); } } if (compressor != null && !compressor.getClass().isAnnotationPresent(DoNotPool.class)) { updateLeaseCount(compressorCounts, compressor, 1); } return compressor; }
/** * Get a {@link Compressor} for the given {@link CompressionCodec} from the * pool or a new one. * * @param codec * the <code>CompressionCodec</code> for which to get the * <code>Compressor</code> * @return <code>Compressor</code> for the given <code>CompressionCodec</code> * from the pool or a new one */ public static Compressor getCompressor(CompressionCodec codec) { Compressor compressor = borrow(COMPRESSOR_POOL, codec.getCompressorType()); if (compressor == null) { compressor = codec.createCompressor(); LOG.info("Got brand-new compressor"); } else { LOG.debug("Got recycled compressor"); } return compressor; }
/** * Get a {@link Decompressor} for the given {@link CompressionCodec} from the * pool or a new one. * * @param codec * the <code>CompressionCodec</code> for which to get the * <code>Decompressor</code> * @return <code>Decompressor</code> for the given * <code>CompressionCodec</code> the pool or a new one */ public static Decompressor getDecompressor(CompressionCodec codec) { Decompressor decompressor = borrow(DECOMPRESSOR_POOL, codec .getDecompressorType()); if (decompressor == null) { decompressor = codec.createDecompressor(); LOG.info("Got brand-new decompressor"); } else { LOG.debug("Got recycled decompressor"); } return decompressor; }
try { final String dirValue = context.getProperty(DIRECTORY).evaluateAttributeExpressions(putFlowFile).getValue(); final Path configuredRootDirPath = new Path(dirValue); final long blockSize = blockSizeProp != null ? blockSizeProp.longValue() : hdfs.getDefaultBlockSize(configuredRootDirPath); final int bufferSize = bufferSizeProp != null ? bufferSizeProp.intValue() : configuration.getInt(BUFFER_SIZE_KEY, BUFFER_SIZE_DEFAULT); .getDefaultReplication(configuredRootDirPath); ? putFlowFile.getAttribute(CoreAttributes.FILENAME.key()) + codec.getDefaultExtension() : putFlowFile.getAttribute(CoreAttributes.FILENAME.key()); final Path tempCopyFile = new Path(configuredRootDirPath, "." + filename); final Path copyFile = new Path(configuredRootDirPath, filename); if (!hdfs.getFileStatus(configuredRootDirPath).isDirectory()) { throw new IOException(configuredRootDirPath.toString() + " already exists and is not a directory");
public static boolean exists(JobContext job, FileSystem fs, Path inputPath) throws IOException { if (!FileOutputFormat.getCompressOutput(job)) { return fs.exists(inputPath); } else { Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class); CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration()); return fs.exists(new Path(inputPath + codec.getDefaultExtension())); } }
codec = getCompressionCodec(context, getConfiguration()); final String originalFilename = file.getName(); final String relativePath = getPathDifference(rootDir, file); stream = getUserGroupInformation().doAs((PrivilegedExceptionAction<FSDataInputStream>) () -> hdfs.open(file, bufferSize)); codec = compressionCodecFactory.getCodec(file); stream = codec.createInputStream(stream); outputFilename = StringUtils.removeEnd(originalFilename, codec.getDefaultExtension()); } else { outputFilename = originalFilename; flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), outputFilename); if (!keepSourceFiles && !getUserGroupInformation().doAs((PrivilegedExceptionAction<Boolean>) () -> hdfs.delete(file, false))) { getLogger().warn("Could not remove {} from HDFS. Not ingesting this file ...", new Object[]{file}); session.getProvenanceReporter().receive(flowFile, file.toString()); session.transfer(flowFile, REL_SUCCESS); getLogger().info("retrieved {} from HDFS {} in {} milliseconds at a rate of {}",
compressionCodecs = new CompressionCodecFactory(job); codec = compressionCodecs.getCodec(file); FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(fileSplit.getPath()); if (isCompressedInput()) { LOG.info(format("Input file is compressed. Using compression code %s", codec.getClass().getName())); in = codec.createInputStream(fileIn); } else { LOG.info("The input file is not compressed");
@Override public void open(String filePath, CompressionCodec codec, CompressionType cType) throws IOException { Configuration conf = new Configuration(); Path dstPath = new Path(filePath); FileSystem hdfs = dstPath.getFileSystem(conf); if (useRawLocalFileSystem) { if (hdfs instanceof LocalFileSystem) { if (conf.getBoolean("hdfs.append.support", false) == true && hdfs.isFile(dstPath)) { fsOut = hdfs.append(dstPath); appending = true; } else { fsOut = hdfs.create(dstPath); cmpOut = codec.createOutputStream(fsOut, compressor); serializer = EventSerializerFactory.getInstance(serializerType, serializerContext, cmpOut);
public static void main(String... args) throws Exception { Configuration config = new Configuration(); FileSystem hdfs = FileSystem.get(config); Class<?> codecClass = Class.forName(args[1]); CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, config); InputStream is = hdfs.open(new Path(args[0])); OutputStream os = hdfs.create( new Path(args[0] + codec.getDefaultExtension())); OutputStream cos = codec.createOutputStream(os); IOUtils.copyBytes(is, cos, config, true); IOUtils.closeStream(os); IOUtils.closeStream(is); } }
public static void decompressFile(final FileSystem fs, final String inFile, final String outFile, boolean deletePrevious) throws IOException { final Path inPath = new Path(inFile); final Path outPath = new Path(outFile); final CompressionCodecFactory factory = new CompressionCodecFactory(new Configuration()); final CompressionCodec codec = factory.getCodec(inPath); final OutputStream out = fs.create(outPath); final InputStream in = codec.createInputStream(fs.open(inPath)); IOUtils.copyBytes(in, out, 8192); IOUtils.closeStream(in); IOUtils.closeStream(out); if (deletePrevious) fs.delete(new Path(inFile), true); }
public static void main(String... args) throws Exception { Configuration config = new Configuration(); FileSystem hdfs = FileSystem.get(config); Class<?> codecClass = Class.forName(args[0]); CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, config); InputStream is = FileSystem.getLocal(config).open(new Path(args[1])); OutputStream os = hdfs.create( new Path(args[2] + codec.getDefaultExtension())); OutputStream cos = codec.createOutputStream(os); IOUtils.copyBytes(is, cos, config, true); IOUtils.closeStream(os); IOUtils.closeStream(is); } }
protected DataOutputStream getDataOutputStream(final TaskAttemptContext job) throws IOException, InterruptedException { final Configuration conf = job.getConfiguration(); boolean isCompressed = getCompressOutput(job); CompressionCodec codec = null; String extension = ""; if (isCompressed) { final Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, DefaultCodec.class); codec = ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); } final Path file = super.getDefaultWorkFile(job, extension); final FileSystem fs = file.getFileSystem(conf); if (!isCompressed) { return new DataOutputStream(fs.create(file, false)); } else { return new DataOutputStream(codec.createOutputStream(fs.create(file, false))); } }
public static void main(String... args) throws Exception { Configuration config = new Configuration(); FileSystem hdfs = FileSystem.get(config); InputStream is = hdfs.open(new Path(args[0])); Class<?> codecClass = Class.forName(args[1]); CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, config); InputStream cis = codec.createInputStream(is); IOUtils.copyBytes(cis, System.out, config, true); IOUtils.closeStream(is); } }
Configuration conf = new Configuration(); CompressionCodecFactory factory = new CompressionCodecFactory(conf); boolean encode = false; for(int i=0; i < args.length; ++i) { encode = false; } else { CompressionCodec codec = factory.getCodec(new Path(args[i])); if (codec == null) { System.out.println("Codec for " + args[i] + " not found."); java.io.InputStream in = null; try { out = codec.createOutputStream( new java.io.FileOutputStream(args[i])); byte[] buffer = new byte[100]; String inFilename = removeSuffix(args[i], codec.getDefaultExtension()); in = new java.io.FileInputStream(inFilename); int len = in.read(buffer); CompressionInputStream in = null; try { in = codec.createInputStream( new java.io.FileInputStream(args[i])); byte[] buffer = new byte[100];
private static List<String> readLines(Path location, Configuration conf) throws Exception { FileSystem fileSystem = FileSystem.get(location.toUri(), conf); CompressionCodecFactory factory = new CompressionCodecFactory(conf); FileStatus[] items = fileSystem.listStatus(location); if (items == null) return new ArrayList<String>(); List<String> results = new ArrayList<String>(); for (FileStatus item : items) { // ignoring files like _SUCCESS if (item.getPath().getName().startsWith("_")) { continue; } CompressionCodec codec = factory.getCodec(item.getPath()); InputStream stream = null; // check if we have a compression codec we need to use if (codec != null) { stream = codec.createInputStream(fileSystem.open(item.getPath())); } else { stream = fileSystem.open(item.getPath()); } StringWriter writer = new StringWriter(); IOUtils.copy(stream, writer, "UTF-8"); String raw = writer.toString(); for (String str : raw.split("\n")) { results.add(str); } } return results; }
CompressionCodec codec = null; Configuration conf = getConfiguration(); final CompressionCodecFactory compressionCodecFactory = new CompressionCodecFactory(conf); final CompressionType compressionType = CompressionType.valueOf(context.getProperty(COMPRESSION_CODEC).toString()); final boolean inferCompressionCodec = compressionType == CompressionType.AUTOMATIC; codec = compressionCodecFactory.getCodec(path); } else if (compressionType != CompressionType.NONE) { codec = getCompressionCodec(context, getConfiguration()); final Path qualifiedPath = path.makeQualified(hdfs.getUri(), hdfs.getWorkingDirectory()); try { final String outputFilename; final String originalFilename = path.getName(); stream = hdfs.open(path, 16384); stream = codec.createInputStream(stream); outputFilename = StringUtils.removeEnd(originalFilename, codec.getDefaultExtension()); } else { outputFilename = originalFilename; session.getProvenanceReporter().fetch(flowFile, qualifiedPath.toString(), stopWatch.getDuration(TimeUnit.MILLISECONDS)); session.transfer(flowFile, REL_SUCCESS); } catch (final FileNotFoundException | AccessControlException e) {
/** * Returns an {@link InputStream} to the specified file. * <p> * Note: It is the caller's responsibility to close the returned {@link InputStream}. * </p> * * @param path The path to the file to open. * @return An {@link InputStream} for the specified file. * @throws FileBasedHelperException if there is a problem opening the {@link InputStream} for the specified file. */ @Override public InputStream getFileStream(String path) throws FileBasedHelperException { try { Path p = new Path(path); InputStream in = this.getFileSystem().open(p); // Account for compressed files (e.g. gzip). // https://github.com/apache/spark/blob/master/core/src/main/scala/org/apache/spark/input/WholeTextFileRecordReader.scala CompressionCodecFactory factory = new CompressionCodecFactory(this.getFileSystem().getConf()); CompressionCodec codec = factory.getCodec(p); return (codec == null) ? in : codec.createInputStream(in); } catch (IOException e) { throw new FileBasedHelperException("Cannot open file " + path + " due to " + e.getMessage(), e); } }
private InputStream openPartFileAsStream(FileStatus fileStatus) throws IOException { CompressionCodecFactory compressionFactory = new CompressionCodecFactory(new Configuration()); InputStream is = null; FileSystem fs = ShifuFileUtils.getFileSystemBySourceType(sourceType); CompressionCodec codec = compressionFactory.getCodec(fileStatus.getPath()); if (codec != null) { is = codec.createInputStream(fs.open(fileStatus.getPath())); } else { is = fs.open(fileStatus.getPath()); } return is; }