public FlexibleDelimitedFileReader(LogFilePath path, CompressionCodec codec) throws IOException { Path fsPath = new Path(path.getLogFilePath()); FileSystem fs = FileUtil.getFileSystem(path.getLogFilePath()); InputStream inputStream = fs.open(fsPath); this.mReader = (codec == null) ? new BufferedInputStream(inputStream) : new BufferedInputStream( codec.createInputStream(inputStream, mDecompressor = CodecPool.getDecompressor(codec))); this.mOffset = path.getOffset(); }
/** * Create an input stream with a codec taken from the global CodecPool. * * @param codec The codec to use to create the input stream. * @param conf The configuration to use if we need to create a new codec. * @param in The input stream to wrap. * @return The new input stream * @throws IOException */ static CompressionInputStream createInputStreamWithCodecPool( CompressionCodec codec, Configuration conf, InputStream in) throws IOException { Decompressor decompressor = CodecPool.getDecompressor(codec); CompressionInputStream stream = null; try { stream = codec.createInputStream(in, decompressor); } finally { if (stream == null) { CodecPool.returnDecompressor(decompressor); } else { stream.setTrackedDecompressor(decompressor); } } return stream; } }
@Override public void open(String filePath, CompressionCodec codec, CompressionType cType) throws IOException { Configuration conf = new Configuration(); Path dstPath = new Path(filePath); FileSystem hdfs = dstPath.getFileSystem(conf); if (useRawLocalFileSystem) { if (hdfs instanceof LocalFileSystem) { if (conf.getBoolean("hdfs.append.support", false) == true && hdfs.isFile(dstPath)) { fsOut = hdfs.append(dstPath); appending = true; } else { fsOut = hdfs.create(dstPath); compressor = CodecPool.getCompressor(codec, conf); cmpOut = codec.createOutputStream(fsOut, compressor); serializer = EventSerializerFactory.getInstance(serializerType, serializerContext, cmpOut);
/** * Create an output stream with a codec taken from the global CodecPool. * * @param codec The codec to use to create the output stream. * @param conf The configuration to use if we need to create a new codec. * @param out The output stream to wrap. * @return The new output stream * @throws IOException */ static CompressionOutputStream createOutputStreamWithCodecPool( CompressionCodec codec, Configuration conf, OutputStream out) throws IOException { Compressor compressor = CodecPool.getCompressor(codec, conf); CompressionOutputStream stream = null; try { stream = codec.createOutputStream(out, compressor); } finally { if (stream == null) { CodecPool.returnCompressor(compressor); } else { stream.setTrackedCompressor(compressor); } } return stream; }
/** * Get a {@link Decompressor} for the given {@link CompressionCodec} from the * pool or a new one. * * @param codec the <code>CompressionCodec</code> for which to get the * <code>Decompressor</code> * @return <code>Decompressor</code> for the given * <code>CompressionCodec</code> the pool or a new one */ public static Decompressor getDecompressor(CompressionCodec codec) { Decompressor decompressor = borrow(decompressorPool, codec.getDecompressorType()); if (decompressor == null) { decompressor = codec.createDecompressor(); LOG.info("Got brand-new decompressor ["+codec.getDefaultExtension()+"]"); } else { if(LOG.isDebugEnabled()) { LOG.debug("Got recycled decompressor"); } } if (decompressor != null && !decompressor.getClass().isAnnotationPresent(DoNotPool.class)) { updateLeaseCount(decompressorCounts, decompressor, 1); } return decompressor; }
private InputStream createInputStream(Configuration job, final Path file) throws IOException { final FileSystem fs = file.getFileSystem(job); InputStream in = fs.open(file); CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file); if (null != codec) { decompressor = CodecPool.getDecompressor(codec); in = codec.createInputStream(in, decompressor); } return in; }
Configuration conf = new Configuration(); conf.setBoolean(CommonConfigurationKeys.IO_NATIVE_LIB_AVAILABLE_KEY, false); assertFalse("ZlibFactory is using native libs against request", ZlibFactory.isNativeZlibLoaded(conf)); assertTrue("ZlibFactory returned unexpected inflator", zlibDecompressor instanceof BuiltInZlibInflater); CodecPool.returnDecompressor(zlibDecompressor); Path f = new Path(new Path(tmpDir), "testGzipCodecRead.txt.gz"); BufferedWriter bw = new BufferedWriter(new OutputStreamWriter( new GZIPOutputStream(new FileOutputStream(f.toString())))); CompressionCodecFactory ccf = new CompressionCodecFactory(conf); CompressionCodec codec = ccf.getCodec(f); Decompressor decompressor = CodecPool.getDecompressor(codec); FileSystem fs = FileSystem.getLocal(conf); InputStream is = fs.open(f); is = codec.createInputStream(is, decompressor); BufferedReader br = new BufferedReader(new InputStreamReader(is)); String line = br.readLine();
@Test public void testMultipleClose() throws IOException { URL testFileUrl = getClass().getClassLoader(). getResource("recordSpanningMultipleSplits.txt.bz2"); assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2", testFileUrl); File testFile = new File(testFileUrl.getFile()); Path testFilePath = new Path(testFile.getAbsolutePath()); long testFileSize = testFile.length(); Configuration conf = new Configuration(); conf.setInt(org.apache.hadoop.mapreduce.lib.input. LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE); FileSplit split = new FileSplit(testFilePath, 0, testFileSize, (String[])null); LineRecordReader reader = new LineRecordReader(conf, split); LongWritable key = new LongWritable(); Text value = new Text(); //noinspection StatementWithEmptyBody while (reader.next(key, value)) ; reader.close(); reader.close(); BZip2Codec codec = new BZip2Codec(); codec.setConf(conf); Set<Decompressor> decompressors = new HashSet<Decompressor>(); for (int i = 0; i < 10; ++i) { decompressors.add(CodecPool.getDecompressor(codec)); } assertEquals(10, decompressors.size()); }
private void verifyCompressedFile(Path f, int expectedNumLines) throws IOException { Configuration conf = new Configuration(); if (!BaseSqoopTestCase.isOnPhysicalCluster()) { conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS); FileSystem fs = FileSystem.get(conf); InputStream is = fs.open(f); CompressionCodecFactory ccf = new CompressionCodecFactory(conf); CompressionCodec codec = ccf.getCodec(f); LOG.info("gzip check codec is " + codec); Decompressor decompressor = CodecPool.getDecompressor(codec); if (null == decompressor) { LOG.info("Verifying gzip sanity with null decompressor"); + decompressor.toString()); is = codec.createInputStream(is, decompressor); BufferedReader r = new BufferedReader(new InputStreamReader(is)); int numLines = 0;
@Override public void init(Path path, Configuration conf) throws IOException { FileSystem fs = path.getFileSystem(conf); CompressionCodec codec = new CompressionCodecFactory(conf).getCodec(path); OutputStream output; if (codec != null) { compressor = CodecPool.getCompressor(codec); output = codec.createOutputStream(fs.create(path), compressor); } else { output = fs.create(path); } writer = new JsonObjectMapperWriter<T>(output, conf.getBoolean("rumen.output.pretty.print", true)); }
@Test public void testCodecPoolGzipReuse() throws Exception { Configuration conf = new Configuration(); conf.setBoolean(CommonConfigurationKeys.IO_NATIVE_LIB_AVAILABLE_KEY, true); if (!ZlibFactory.isNativeZlibLoaded(conf)) { LOG.warn("testCodecPoolGzipReuse skipped: native libs not loaded"); return; } GzipCodec gzc = ReflectionUtils.newInstance(GzipCodec.class, conf); DefaultCodec dfc = ReflectionUtils.newInstance(DefaultCodec.class, conf); Compressor c1 = CodecPool.getCompressor(gzc); Compressor c2 = CodecPool.getCompressor(dfc); CodecPool.returnCompressor(c1); CodecPool.returnCompressor(c2); assertTrue("Got mismatched ZlibCompressor", c2 != CodecPool.getCompressor(gzc)); }
public FlexibleDelimitedFileWriter(LogFilePath path, CompressionCodec codec) throws IOException { Path fsPath = new Path(path.getLogFilePath()); FileSystem fs = FileUtil.getFileSystem(path.getLogFilePath()); this.mCountingStream = new CountingOutputStream(fs.create(fsPath)); this.mWriter = (codec == null) ? new BufferedOutputStream( this.mCountingStream) : new BufferedOutputStream( codec.createOutputStream(this.mCountingStream, mCompressor = CodecPool.getCompressor(codec))); }
public static BufferedReader getBufferedReader(File file, MapredContext context) throws IOException { URI fileuri = file.toURI(); Path path = new Path(fileuri); Configuration conf = context.getJobConf(); CompressionCodecFactory ccf = new CompressionCodecFactory(conf); CompressionCodec codec = ccf.getCodec(path); if (codec == null) { return new BufferedReader(new FileReader(file)); } else { Decompressor decompressor = CodecPool.getDecompressor(codec); FileInputStream fis = new FileInputStream(file); CompressionInputStream cis = codec.createInputStream(fis, decompressor); BufferedReader br = new BufferedReaderExt(new InputStreamReader(cis), decompressor); return br; } }
public InputStream openFile(Path path) throws IOException { CompressionCodec codec=compressionCodecs.getCodec(path); FSDataInputStream fileIn=fs.open(path); // check if compressed if (codec==null) { // uncompressed LOG.debug("Reading from an uncompressed file \""+path+"\""); return fileIn; } else { // compressed Decompressor decompressor = CodecPool.getDecompressor(codec); this.openDecompressors.add(decompressor); // to be returned later using close if (codec instanceof SplittableCompressionCodec) { LOG.debug("Reading from a compressed file \""+path+"\" with splittable compression codec"); long end = fs.getFileStatus(path).getLen(); return ((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, 0, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS); } else { LOG.debug("Reading from a compressed file \""+path+"\" with non-splittable compression codec"); return codec.createInputStream(fileIn,decompressor); } } }
/** * Construct an IFile Reader. * * @param conf Configuration File * @param in The input stream * @param length Length of the data in the stream, including the checksum * bytes. * @param codec codec * @throws IOException */ public Reader(Configuration conf, FSDataInputStream in, long length, CompressionCodec codec) throws IOException { checksumIn = new IFileInputStream(in,length); if (codec != null) { decompressor = CodecPool.getDecompressor(codec); this.in = codec.createInputStream(checksumIn, decompressor); } else { this.in = checksumIn; } this.fileLength = length; if (conf != null) { bufferSize = conf.getInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE); } }
try { Class<? extends CompressionCodec> codecClass = conf.getClassByName(codecClassname).asSubclass(CompressionCodec.class); this.codec = ReflectionUtils.newInstance(codecClass, conf); } catch (ClassNotFoundException cnfe) { valBuffer = new DataInputBuffer(); if (decompress) { valDecompressor = CodecPool.getDecompressor(codec); valInFilter = codec.createInputStream(valBuffer, valDecompressor); valIn = new DataInputStream(valInFilter); } else { valLenBuffer = new DataInputBuffer(); keyLenDecompressor = CodecPool.getDecompressor(codec); keyLenInFilter = codec.createInputStream(keyLenBuffer, keyLenDecompressor); keyLenIn = new DataInputStream(keyLenInFilter); keyDecompressor = CodecPool.getDecompressor(codec); keyInFilter = codec.createInputStream(keyBuffer, keyDecompressor); keyIn = new DataInputStream(keyInFilter); valLenDecompressor = CodecPool.getDecompressor(codec); valLenInFilter = codec.createInputStream(valLenBuffer, valLenDecompressor);
/** * Get a {@link Compressor} for the given {@link CompressionCodec} from the * pool or a new one. * * @param codec the <code>CompressionCodec</code> for which to get the * <code>Compressor</code> * @param conf the <code>Configuration</code> object which contains confs for creating or reinit the compressor * @return <code>Compressor</code> for the given * <code>CompressionCodec</code> from the pool or a new one */ public static Compressor getCompressor(CompressionCodec codec, Configuration conf) { Compressor compressor = borrow(compressorPool, codec.getCompressorType()); if (compressor == null) { compressor = codec.createCompressor(); LOG.info("Got brand-new compressor ["+codec.getDefaultExtension()+"]"); } else { compressor.reinit(conf); if(LOG.isDebugEnabled()) { LOG.debug("Got recycled compressor"); } } if (compressor != null && !compressor.getClass().isAnnotationPresent(DoNotPool.class)) { updateLeaseCount(compressorCounts, compressor, 1); } return compressor; }
@Override public void destroy() { if (destroyed) { return; } destroyed = true; CodecPool.returnDecompressor(decompressor); } }
@Override public CompressedSliceOutput get() { try { compressor.reset(); bufferedOutput.reset(); CompressionOutputStream compressionStream = codec.createOutputStream(bufferedOutput, compressor); return new CompressedSliceOutput(compressionStream, bufferedOutput, this, () -> CodecPool.returnCompressor(compressor)); } catch (IOException e) { throw new UncheckedIOException(e); } } }
if (this.codec != null) { ReflectionUtils.setConf(this.codec, this.conf); this.compressor = CodecPool.getCompressor(this.codec); this.deflateFilter = this.codec.createOutputStream(buffer, compressor); this.deflateOut = new DataOutputStream(new BufferedOutputStream(deflateFilter));