public static InputStream openInputStream(JobContext job, Path inputPath, final FileSystem fileSystem) throws IOException { if (!FileOutputFormat.getCompressOutput(job)) { return fileSystem.open(inputPath); } else { Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class); CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration()); inputPath = new Path(inputPath + codec.getDefaultExtension()); return codec.createInputStream(fileSystem.open(inputPath)); } }
public InputStream createDecompressionStream( InputStream downStream, Decompressor decompressor, int downStreamBufferSize) throws IOException { CompressionCodec codec = getCodec(conf); // Set the internal buffer size to read from down stream. if (downStreamBufferSize > 0) { ((Configurable)codec).getConf().setInt("io.file.buffer.size", downStreamBufferSize); } CompressionInputStream cis = codec.createInputStream(downStream, decompressor); BufferedInputStream bis2 = new BufferedInputStream(cis, DATA_IBUF_SIZE); return bis2; }
@Override public synchronized InputStream createDecompressionStream( InputStream downStream, Decompressor decompressor, int downStreamBufferSize) throws IOException { if (!isSupported()) { throw new IOException( "LZO codec class not specified. Did you forget to set property " + CONF_LZO_CLASS + "?"); } InputStream bis1 = null; if (downStreamBufferSize > 0) { bis1 = new BufferedInputStream(downStream, downStreamBufferSize); } else { bis1 = downStream; } conf.setInt(IO_COMPRESSION_CODEC_LZO_BUFFERSIZE_KEY, IO_COMPRESSION_CODEC_LZO_BUFFERSIZE_DEFAULT); CompressionInputStream cis = codec.createInputStream(bis1, decompressor); BufferedInputStream bis2 = new BufferedInputStream(cis, DATA_IBUF_SIZE); return bis2; }
private static List<String> readLines(Path location, Configuration conf) throws Exception { FileSystem fileSystem = HadoopUtil.getWorkingFileSystem(); CompressionCodecFactory factory = new CompressionCodecFactory(conf); FileStatus[] items = fileSystem.listStatus(location); if (items == null) return new ArrayList<String>(); List<String> results = new ArrayList<String>(); for (FileStatus item : items) { // ignoring files like _SUCCESS if (item.getPath().getName().startsWith("_")) { continue; } CompressionCodec codec = factory.getCodec(item.getPath()); InputStream stream = null; // check if we have a compression codec we need to use if (codec != null) { stream = codec.createInputStream(fileSystem.open(item.getPath())); } else { stream = fileSystem.open(item.getPath()); } StringWriter writer = new StringWriter(); IOUtils.copy(stream, writer, "UTF-8"); String raw = writer.toString(); for (String str : StringUtil.split(raw, "\n")) { results.add(str); } } return results; }
public FlexibleDelimitedFileReader(LogFilePath path, CompressionCodec codec) throws IOException { Path fsPath = new Path(path.getLogFilePath()); FileSystem fs = FileUtil.getFileSystem(path.getLogFilePath()); InputStream inputStream = fs.open(fsPath); this.mReader = (codec == null) ? new BufferedInputStream(inputStream) : new BufferedInputStream( codec.createInputStream(inputStream, mDecompressor = CodecPool.getDecompressor(codec))); this.mOffset = path.getOffset(); }
public DelimitedTextFileReader(LogFilePath path, CompressionCodec codec) throws IOException { Path fsPath = new Path(path.getLogFilePath()); FileSystem fs = FileUtil.getFileSystem(path.getLogFilePath()); InputStream inputStream = fs.open(fsPath); this.mReader = (codec == null) ? new BufferedInputStream(inputStream) : new BufferedInputStream( codec.createInputStream(inputStream, mDecompressor = CodecPool.getDecompressor(codec))); this.mOffset = path.getOffset(); }
@Override public void decompress(Slice compressed, Slice uncompressed) throws RcFileCorruptionException { checkState(!destroyed, "Codec has been destroyed"); decompressor.reset(); try (CompressionInputStream decompressorStream = codec.createInputStream(compressed.getInput(), decompressor)) { uncompressed.setBytes(0, decompressorStream, uncompressed.length()); } catch (IndexOutOfBoundsException | IOException e) { throw new RcFileCorruptionException(e, "Compressed stream is truncated"); } }
@Override public void decompress(Slice compressed, Slice uncompressed) throws RcFileCorruptionException { try (CompressionInputStream decompressorStream = codec.createInputStream(compressed.getInput())) { uncompressed.setBytes(0, decompressorStream, uncompressed.length()); } catch (IndexOutOfBoundsException | IOException e) { throw new RcFileCorruptionException(e, "Compressed stream is truncated"); } }
/** * Returns an {@link InputStream} to the specified file. * <p> * Note: It is the caller's responsibility to close the returned {@link InputStream}. * </p> * * @param path The path to the file to open. * @return An {@link InputStream} for the specified file. * @throws FileBasedHelperException if there is a problem opening the {@link InputStream} for the specified file. */ @Override public InputStream getFileStream(String path) throws FileBasedHelperException { try { Path p = new Path(path); InputStream in = this.getFileSystem().open(p); // Account for compressed files (e.g. gzip). // https://github.com/apache/spark/blob/master/core/src/main/scala/org/apache/spark/input/WholeTextFileRecordReader.scala CompressionCodecFactory factory = new CompressionCodecFactory(this.getFileSystem().getConf()); CompressionCodec codec = factory.getCodec(p); return (codec == null) ? in : codec.createInputStream(in); } catch (IOException e) { throw new FileBasedHelperException("Cannot open file " + path + " due to " + e.getMessage(), e); } }
if (isCompressedInput()) { LOG.info(format("Input file is compressed. Using compression code %s", codec.getClass().getName())); in = codec.createInputStream(fileIn); } else { LOG.info("The input file is not compressed");
@Override public void writeUncompressedBytes(DataOutputStream outStream) throws IOException { if (decompressedStream == null) { rawData = new DataInputBuffer(); decompressedStream = codec.createInputStream(rawData); } else { decompressedStream.resetState(); } rawData.reset(data, 0, dataSize); byte[] buffer = new byte[8192]; int bytesRead = 0; while ((bytesRead = decompressedStream.read(buffer, 0, 8192)) != -1) { outStream.write(buffer, 0, bytesRead); } }
/** * Create an input stream with a codec taken from the global CodecPool. * * @param codec The codec to use to create the input stream. * @param conf The configuration to use if we need to create a new codec. * @param in The input stream to wrap. * @return The new input stream * @throws IOException */ static CompressionInputStream createInputStreamWithCodecPool( CompressionCodec codec, Configuration conf, InputStream in) throws IOException { Decompressor decompressor = CodecPool.getDecompressor(codec); CompressionInputStream stream = null; try { stream = codec.createInputStream(in, decompressor); } finally { if (stream == null) { CodecPool.returnDecompressor(decompressor); } else { stream.setTrackedDecompressor(decompressor); } } return stream; } }
CompressionInputStream in = null; try { in = codec.createInputStream( new java.io.FileInputStream(args[i])); byte[] buffer = new byte[100];
private ByteBuffer decompress(CompressionCodec compressor, InputStream cellBlockStream, int osInitialSize) throws IOException { // GZIPCodec fails w/ NPE if no configuration. if (compressor instanceof Configurable) { ((Configurable) compressor).setConf(this.conf); } Decompressor poolDecompressor = CodecPool.getDecompressor(compressor); CompressionInputStream cis = compressor.createInputStream(cellBlockStream, poolDecompressor); ByteBufferOutputStream bbos; try { // TODO: This is ugly. The buffer will be resized on us if we guess wrong. // TODO: Reuse buffers. bbos = new ByteBufferOutputStream(osInitialSize); IOUtils.copy(cis, bbos); bbos.close(); return bbos.getByteBuffer(); } finally { CodecPool.returnDecompressor(poolDecompressor); } } }
if (codec != null) { i.seek(0); return codec.createInputStream(i);
if (codec != null) { valDecompressor = CodecPool.getDecompressor(codec); deflatFilter = codec.createInputStream(decompressBuffer, valDecompressor);
if (codec != null) { valDecompressor = CodecPool.getDecompressor(codec); deflatFilter = codec.createInputStream(decompressBuffer, valDecompressor);
stream = codec.createInputStream(stream); outputFilename = StringUtils.removeEnd(originalFilename, codec.getDefaultExtension()); } else {
keyTempBuffer.write(in, compressedKeyLen); keyDecompressBuffer.reset(keyTempBuffer.getData(), compressedKeyLen); CompressionInputStream deflatFilter = codec.createInputStream( keyDecompressBuffer, keyDecompressor); DataInputStream compressedIn = new DataInputStream(deflatFilter);
keyTempBuffer.write(in, compressedKeyLen); keyDecompressBuffer.reset(keyTempBuffer.getData(), compressedKeyLen); CompressionInputStream deflatFilter = codec.createInputStream( keyDecompressBuffer, keyDecompressor); DataInputStream compressedIn = new DataInputStream(deflatFilter);