@Override public void finish() throws IOException { parquetWriter.close(); } }
@Override public void addElement(T datum) throws IOException { parquetWriter.write(datum); }
public MapredParquetInputFormat() { this(new ParquetInputFormat<ArrayWritable>(DataWritableReadSupport.class)); }
protected static void writeRepeateListData(ParquetWriter<Group> writer, int elementNum, boolean isNull) throws IOException { SimpleGroupFactory f = new SimpleGroupFactory(schema); int listMaxSize = 4; for (int i = 0; i < elementNum; i++) { Group group = f.newGroup(); if (!isNull) { for (int j = 0; j < listMaxSize; j++) { group.append("list_int32_field_for_repeat_test", j); } } writer.write(group); } writer.close(); }
protected static ParquetWriter<Group> initWriterFromFile() throws IOException { GroupWriteSupport.setSchema(schema, conf); return new ParquetWriter<>( file, new GroupWriteSupport(), GZIP, 1024 * 1024, 1024, 1024 * 1024, true, false, PARQUET_1_0, conf); }
@Override public KeyValue next() throws IOException { GenericRecord record = reader.read(); if (record != null) { return new KeyValue(offset++, serializeAvroRecord(writer, record)); } return null; }
@Override public void close() throws IOException { reader.close(); } }
private ParquetMetadata readFooterFromFile(final Path file, final FileSystem fs, final FileStatus stat, MetadataFilter filter) throws IOException { InputFile inputFile = new InputFile() { @Override public SeekableInputStream newStream() throws IOException { return HadoopStreams.wrap(fs.open(file)); } @Override public long getLength() throws IOException { return stat.getLen(); } }; return ParquetFileReader.readFooter(inputFile, filter); }
@Override public void close() throws IOException { if (cacheFsPath != null) { LlapCacheAwareFs.unregisterFile(cacheFsPath); } if (reader != null) { reader.close(); } }
protected static FileSplit getFileSplit(Job vectorJob) throws IOException, InterruptedException { ParquetInputFormat parquetInputFormat = new ParquetInputFormat(GroupReadSupport.class); InputSplit split = (InputSplit) parquetInputFormat.getSplits(vectorJob).get(0); FileSplit fsplit = new FileSplit(file, 0L, split.getLength(), split.getLocations()); return fsplit; }
public MapredParquetOutputFormat() { realOutputFormat = new ParquetOutputFormat<ParquetHiveRecord>(new DataWritableWriteSupport()); }
@Override public long getLength() throws IOException { return writer.getDataSize(); }
@Override public void checkOutputSpecs(final FileSystem ignored, final JobConf job) throws IOException { realOutputFormat.checkOutputSpecs(ShimLoader.getHadoopShims().getHCatShim().createJobContext(job, null)); }
protected static void writeRepeateMapData( ParquetWriter<Group> writer, int elementNum, boolean isNull) throws IOException { SimpleGroupFactory f = new SimpleGroupFactory(schema); int mapMaxSize = 4; for (int i = 0; i < elementNum; i++) { Group group = f.newGroup(); if (!isNull) { for (int j = 0; j < mapMaxSize; j++) { group.addGroup("map_int32_for_repeat_test").append("key", j).append("value", j); } } writer.write(group); } writer.close(); }
@Override public void close() throws IOException { writer.close(); } }
@Override public void write(final Record record) throws IOException { final GenericRecord genericRecord = AvroTypeUtil.createAvroRecord(record, avroSchema); parquetWriter.write(genericRecord); }
@Override public void close() throws IOException { reader.close(); } }
@Override public void close() throws IOException { parquetWriter.close(); }
@Override public void close() throws IOException { parquetReader.close(); }
@Override public void close() throws IOException { reader.close(); }