@Override public void open() throws IOException { CompressionCodecName codecName = CompressionCodecName.UNCOMPRESSED; if (enableCompression) { codecName = getCompressionCodecName(); } avroParquetWriter = new AvroParquetWriter<E>(fileSystem.makeQualified(path), schema, codecName, DEFAULT_ROW_GROUP_SIZE, ParquetWriter.DEFAULT_PAGE_SIZE, ParquetWriter.DEFAULT_IS_DICTIONARY_ENABLED, conf); }
private void rotate() throws WriterException { if(parquetWriter!=null) { try { parquetWriter.close(); String newFileName = previousFileName.substring(0, previousFileName.length() - 7); fs.rename(previousPath, new Path(newFileName + ".done")); } catch (IOException e) { LOG.warn("Fail to close Chukwa write ahead log."); } } startTime = System.currentTimeMillis(); calendar.setTimeInMillis(startTime); String newName = new java.text.SimpleDateFormat("yyyyMMddHHmmssSSS") .format(calendar.getTime()); newName += localHostAddr + new java.rmi.server.UID().toString(); newName = newName.replace("-", ""); newName = newName.replace(":", ""); newName = newName.replace(".", ""); newName = outputDir + "/" + newName.trim() + ".chukwa"; LOG.info("writing: "+newName); Path path = new Path(newName); try { parquetWriter = new AvroParquetWriter<GenericRecord>(path, avroSchema, CompressionCodecName.SNAPPY, blockSize, pageSize); previousPath = path; previousFileName = newName; } catch (IOException e) { throw new WriterException(e); } }
@Override public RecordWriter<SinkRecord> getRecordWriter( Configuration conf, final String fileName, SinkRecord record, final AvroData avroData) throws IOException { final Schema avroSchema = avroData.fromConnectSchema(record.valueSchema()); CompressionCodecName compressionCodecName = CompressionCodecName.SNAPPY; int blockSize = 256 * 1024 * 1024; int pageSize = 64 * 1024; Path path = new Path(fileName); final ParquetWriter<GenericRecord> writer = new AvroParquetWriter<>(path, avroSchema, compressionCodecName, blockSize, pageSize, true, conf); return new RecordWriter<SinkRecord>() { @Override public void write(SinkRecord record) throws IOException { Object value = avroData.fromConnectData(record.valueSchema(), record.value()); writer.write((GenericRecord) value); } @Override public void close() throws IOException { writer.close(); } }; } }
currentFileName = newName; chunksWrittenThisRotate = false; parquetWriter = new AvroParquetWriter<GenericRecord>(newOutputPath, avroSchema, CompressionCodecName.SNAPPY, blockSize, pageSize);
/** * Create a data file that gets exported to the db. * @param numRecords how many records to write to the file. */ protected void createParquetFile(int numRecords, ColumnGenerator... extraCols) throws IOException { Schema schema = buildSchema(extraCols); String fileName = UUID.randomUUID().toString() + ".parquet"; Path filePath = new Path(getTablePath(), fileName); try (AvroParquetWriter parquetWriter = new AvroParquetWriter(filePath, schema, SNAPPY, DEFAULT_BLOCK_SIZE, DEFAULT_PAGE_SIZE)) { for (int i = 0; i < numRecords; i++) { GenericRecord record = new GenericData.Record(schema); record.put("id", i); record.put("msg", getMsgPrefix() + i); addExtraColumns(record, i, extraCols); parquetWriter.write(record); } } }