/** * Creates a {@link CodecFactory} based on the specified codec name and deflate level. If codecName is absent, then * a {@link CodecFactory#deflateCodec(int)} is returned. Otherwise the codecName is converted into a * {@link CodecFactory} via the {@link CodecFactory#fromString(String)} method. * * @param codecName the name of the codec to use (e.g. deflate, snappy, xz, etc.). * @param deflateLevel must be an integer from [0-9], and is only applicable if the codecName is "deflate". * @return a {@link CodecFactory}. */ public static CodecFactory getCodecFactory(Optional<String> codecName, Optional<String> deflateLevel) { if (!codecName.isPresent()) { return CodecFactory.deflateCodec(ConfigurationKeys.DEFAULT_DEFLATE_LEVEL); } else if (codecName.get().equalsIgnoreCase(DataFileConstants.DEFLATE_CODEC)) { if (!deflateLevel.isPresent()) { return CodecFactory.deflateCodec(ConfigurationKeys.DEFAULT_DEFLATE_LEVEL); } return CodecFactory.deflateCodec(Integer.parseInt(deflateLevel.get())); } else { return CodecFactory.fromString(codecName.get().toLowerCase()); } }
private CodecFactory getCompressionCodec(Map<String, String> conf) { if (getBoolean(conf, CONF_COMPRESS, false)) { int deflateLevel = getInt(conf, CONF_DEFLATE_LEVEL, CodecFactory.DEFAULT_DEFLATE_LEVEL); int xzLevel = getInt(conf, CONF_XZ_LEVEL, CodecFactory.DEFAULT_XZ_LEVEL); String outputCodec = conf.get(CONF_COMPRESS_CODEC); if (DataFileConstants.DEFLATE_CODEC.equals(outputCodec)) { return CodecFactory.deflateCodec(deflateLevel); } else if (DataFileConstants.XZ_CODEC.equals(outputCodec)) { return CodecFactory.xzCodec(xzLevel); } else { return CodecFactory.fromString(outputCodec); } } return CodecFactory.nullCodec(); }
return HadoopCodecFactory.fromHadoopString(compressionCodec); } else { return CodecFactory.deflateCodec(deflateLevel); return CodecFactory.deflateCodec(deflateLevel); } else if (DataFileConstants.XZ_CODEC.equals(outputCodec)) { return CodecFactory.xzCodec(xzLevel);
@Override public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jobConf, Path path, Class<? extends Writable> valueClass, boolean isCompressed, Properties properties, Progressable progressable) throws IOException { Schema schema; try { schema = AvroSerdeUtils.determineSchemaOrThrowException(jobConf, properties); } catch (AvroSerdeException e) { throw new IOException(e); } GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(schema); DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(gdw); if (isCompressed) { int level = jobConf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL); String codecName = jobConf.get(OUTPUT_CODEC, DEFLATE_CODEC); CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName); dfw.setCodec(factory); } dfw.create(schema, path.getFileSystem(jobConf).create(path)); return new AvroGenericRecordWriter(dfw); }
@Test public void testWithDeflateCodeWithHadoopConfig() throws IOException { Configuration conf = new Configuration(); conf.setBoolean("mapred.output.compress", true); conf.set("mapred.output.compression.codec","org.apache.hadoop.io.compress.DeflateCodec"); conf.setInt(org.apache.avro.mapred.AvroOutputFormat.DEFLATE_LEVEL_KEY, -1); conf.setInt(SYNC_INTERVAL_KEY, TEST_SYNC_INTERVAL); testGetRecordWriter(conf, CodecFactory.deflateCodec(-1), TEST_SYNC_INTERVAL); }
@Test public void testWithDeflateCodec() throws IOException { Configuration conf = new Configuration(); conf.setBoolean("mapred.output.compress", true); conf.setInt(org.apache.avro.mapred.AvroOutputFormat.DEFLATE_LEVEL_KEY, 3); testGetRecordWriter(conf, CodecFactory.deflateCodec(3), DataFileConstants.DEFAULT_SYNC_INTERVAL); }
return factory; } else { return CodecFactory.deflateCodec(deflateLevel); factory = CodecFactory.deflateCodec(deflateLevel); } else if ( codecName.equals(XZ_CODEC)) { factory = CodecFactory.xzCodec(xzLevel);
static CodecFactory codecFactory(OptionSet opts, OptionSpec<String> codec, OptionSpec<Integer> level, String defaultCodec) { String codecName = opts.hasArgument(codec) ? codec.value(opts) : defaultCodec; if(codecName.equals(DEFLATE_CODEC)) { return CodecFactory.deflateCodec(level.value(opts)); } else if(codecName.equals(DataFileConstants.XZ_CODEC)) { return CodecFactory.xzCodec(level.value(opts)); } else { return CodecFactory.fromString(codec.value(opts)); } } }
@SuppressWarnings("unchecked") public RecordWriter<TetherData, NullWritable> getRecordWriter(FileSystem ignore, JobConf job, String name, Progressable prog) throws IOException { Schema schema = AvroJob.getOutputSchema(job); final DataFileWriter writer = new DataFileWriter(new GenericDatumWriter()); if (FileOutputFormat.getCompressOutput(job)) { int level = job.getInt(AvroOutputFormat.DEFLATE_LEVEL_KEY, CodecFactory.DEFAULT_DEFLATE_LEVEL); writer.setCodec(CodecFactory.deflateCodec(level)); } Path path = FileOutputFormat.getTaskOutputPath(job, name+AvroOutputFormat.EXT); writer.create(schema, path.getFileSystem(job).create(path)); return new RecordWriter<TetherData, NullWritable>() { public void write(TetherData datum, NullWritable ignore) throws IOException { writer.appendEncoded(datum.buffer()); } public void close(Reporter reporter) throws IOException { writer.close(); } }; }
@Parameters public static List<Object[]> codecs() { List<Object[]> r = new ArrayList<>(); r.add(new Object[] { null }); r.add(new Object[] { CodecFactory.deflateCodec(0) }); r.add(new Object[] { CodecFactory.deflateCodec(1) }); r.add(new Object[] { CodecFactory.deflateCodec(9) }); r.add(new Object[] { CodecFactory.nullCodec() }); r.add(new Object[] { CodecFactory.snappyCodec() }); r.add(new Object[] { CodecFactory.xzCodec(0) }); r.add(new Object[] { CodecFactory.xzCodec(1) }); r.add(new Object[] { CodecFactory.xzCodec(6) }); r.add(new Object[] { CodecFactory.zstandardCodec() }); return r; }
@Parameters public static List<Object[]> codecs() { List<Object[]> r = new ArrayList<>(); r.add(new Object[] { null , null, false}); r.add(new Object[] { null , null, true}); r.add(new Object[] { CodecFactory.deflateCodec(1), CodecFactory.deflateCodec(6), false }); r.add(new Object[] { CodecFactory.deflateCodec(1), CodecFactory.deflateCodec(6), true }); r.add(new Object[] { CodecFactory.deflateCodec(3), CodecFactory.nullCodec(), false }); r.add(new Object[] { CodecFactory.nullCodec(), CodecFactory.deflateCodec(6), false }); r.add(new Object[] { CodecFactory.xzCodec(1), CodecFactory.xzCodec(2), false }); r.add(new Object[] { CodecFactory.xzCodec(1), CodecFactory.xzCodec(2), true }); r.add(new Object[] { CodecFactory.xzCodec(2), CodecFactory.nullCodec(), false }); r.add(new Object[] { CodecFactory.nullCodec(), CodecFactory.xzCodec(2), false }); return r; }
private CodecFactory getCodecFactory(String property) { CodecType type = CodecType.valueOf(property); switch (type) { case BZIP2: return CodecFactory.bzip2Codec(); case DEFLATE: return CodecFactory.deflateCodec(CodecFactory.DEFAULT_DEFLATE_LEVEL); case LZO: return CodecFactory.xzCodec(CodecFactory.DEFAULT_XZ_LEVEL); case SNAPPY: return CodecFactory.snappyCodec(); case NONE: default: return CodecFactory.nullCodec(); } }
protected CodecFactory getCodecFactory(String property) { CodecType type = CodecType.valueOf(property); switch (type) { case BZIP2: return CodecFactory.bzip2Codec(); case DEFLATE: return CodecFactory.deflateCodec(CodecFactory.DEFAULT_DEFLATE_LEVEL); case NONE: return CodecFactory.nullCodec(); case LZO: return CodecFactory.xzCodec(CodecFactory.DEFAULT_XZ_LEVEL); case SNAPPY: default: return CodecFactory.snappyCodec(); } }
public static CodecFactory getCodecFactory(String property) { CodecType type = CodecType.valueOf(property); switch (type) { case BZIP2: return CodecFactory.bzip2Codec(); case DEFLATE: return CodecFactory.deflateCodec(CodecFactory.DEFAULT_DEFLATE_LEVEL); case LZO: return CodecFactory.xzCodec(CodecFactory.DEFAULT_XZ_LEVEL); case SNAPPY: return CodecFactory.snappyCodec(); case NONE: default: return CodecFactory.nullCodec(); } }
@Test public void testDeflateClassCodec() throws IOException { Configuration conf = new Configuration(); Path myfile = new Path(mTempDir.getRoot().getPath(), "myfile"); Schema key = Schema.create(Schema.Type.STRING); Schema value = Schema.create(Schema.Type.STRING); Schema recordSchema = AvroKeyValue.getSchema(key, value); DatumReader<GenericRecord> datumReader = SpecificData.get().createDatumReader(recordSchema); DataFileReader<GenericRecord> reader; LOG.debug("Using CodecFactory.deflateCodec() for a SortedKeyValueFile..."); SortedKeyValueFile.Writer.Options options = new SortedKeyValueFile.Writer.Options() .withKeySchema(key) .withValueSchema(value) .withConfiguration(conf) .withPath(myfile) .withCodec(CodecFactory.deflateCodec(9)); SortedKeyValueFile.Writer<CharSequence, CharSequence> writer = new SortedKeyValueFile.Writer<>(options); writer.close(); reader = new DataFileReader<>( new FsInput(new Path(myfile, SortedKeyValueFile.DATA_FILENAME), conf), datumReader); assertEquals("deflate", reader.getMetaString("avro.codec")); reader.close(); }
@Test public void testWriteAndRead() throws IOException { Schema schema = Schema.create(Type.STRING); // Write it DataFileWriter<Utf8> w = new DataFileWriter<>(new GenericDatumWriter<>(schema)); w.setCodec(CodecFactory.deflateCodec(6)); ByteArrayOutputStream baos = new ByteArrayOutputStream(); w.create(schema, baos); w.append(new Utf8("hello world")); w.append(new Utf8("hello moon")); w.sync(); w.append(new Utf8("bye bye world")); w.append(new Utf8("bye bye moon")); w.close(); // Read it DataFileStream<Utf8> r = new DataFileStream<>( new ByteArrayInputStream(baos.toByteArray()), new GenericDatumReader<>(schema)); assertEquals("hello world", r.next().toString()); assertEquals("hello moon", r.next().toString()); assertEquals("bye bye world", r.next().toString()); assertEquals("bye bye moon", r.next().toString()); assertFalse(r.hasNext()); } }
public CodecFactory getCodecFactory() { if (avroCodec().equals("snappy")) { return CodecFactory.snappyCodec(); } else if (avroCodec().startsWith("deflate")) { return CodecFactory.deflateCodec(Integer.valueOf(avroCodec().replace("deflate", ""))); } throw new IllegalArgumentException("Invalid avroCodec " + avroCodec()); }
public ReflectAvroFileWriter(File file, Class<T> _class, boolean append) throws IOException { schema = ReflectData.get().getSchema(_class); DatumWriter<T> datumWriter = new ReflectDatumWriter<T>(_class); writer = new DataFileWriter<T>(datumWriter) .setCodec(CodecFactory.deflateCodec(9)); if(append && file.exists()){ writer = writer.appendTo(file); }else{ writer = writer.create(schema, file); } }
@Test public void testDeflateCodecSerDeWithLevels() throws Exception { for (int i = 0; i < 10; ++i) { SerializableAvroCodecFactory codecFactory = new SerializableAvroCodecFactory(CodecFactory.deflateCodec(i)); SerializableAvroCodecFactory serdeC = SerializableUtils.clone(codecFactory); assertEquals(CodecFactory.deflateCodec(i).toString(), serdeC.getCodec().toString()); } }
@Test @SuppressWarnings("unchecked") public void testWriteWithSerDeCustomDeflateCodec() throws Exception { AvroIO.Write<String> write = AvroIO.write(String.class).to("/tmp/foo/baz").withCodec(CodecFactory.deflateCodec(9)); assertEquals( CodecFactory.deflateCodec(9).toString(), SerializableUtils.clone(write.inner.getCodec()).getCodec().toString()); }