@Override public void append(E entity) throws IOException { avroParquetWriter.write(entity); }
@Override public CommitStatus add(List<Chunk> chunks) throws WriterException { long elapsedTime = 0; CommitStatus rv = ChukwaWriter.COMMIT_OK; for(Chunk chunk : chunks) { try { GenericRecord record = new GenericData.Record(avroSchema); record.put("dataType", chunk.getDataType()); record.put("data", ByteBuffer.wrap(chunk.getData())); record.put("tags", chunk.getTags()); record.put("seqId", chunk.getSeqID()); record.put("source", chunk.getSource()); record.put("stream", chunk.getStreamName()); parquetWriter.write(record); elapsedTime = System.currentTimeMillis() - startTime; if(elapsedTime > rotateInterval) { rotate(); } } catch (IOException e) { LOG.warn("Failed to store data to HDFS."); LOG.warn(ExceptionUtil.getStackTrace(e)); } } if (next != null) { rv = next.add(chunks); //pass data through } return rv; }
/** * Create a data file that gets exported to the db. * @param numRecords how many records to write to the file. */ protected void createParquetFile(int numRecords, ColumnGenerator... extraCols) throws IOException { Schema schema = buildSchema(extraCols); String fileName = UUID.randomUUID().toString() + ".parquet"; Path filePath = new Path(getTablePath(), fileName); try (AvroParquetWriter parquetWriter = new AvroParquetWriter(filePath, schema, SNAPPY, DEFAULT_BLOCK_SIZE, DEFAULT_PAGE_SIZE)) { for (int i = 0; i < numRecords; i++) { GenericRecord record = new GenericData.Record(schema); record.put("id", i); record.put("msg", getMsgPrefix() + i); addExtraColumns(record, i, extraCols); parquetWriter.write(record); } } }
record.put("source", chunk.getSource()); record.put("stream", chunk.getStreamName()); parquetWriter.write(record);