How to use org.apache.parquet.hadoop

Best Java code snippets using org.apache.parquet.hadoop (Showing top 20 results out of 315)

  @Override
  public void finish() throws IOException {
    parquetWriter.close();
  }
}

@Override
public void addElement(T datum) throws IOException {
  parquetWriter.write(datum);
}

public MapredParquetInputFormat() {
 this(new ParquetInputFormat<ArrayWritable>(DataWritableReadSupport.class));
}

protected static void writeRepeateListData(ParquetWriter<Group> writer,
 int elementNum, boolean isNull) throws IOException {
 SimpleGroupFactory f = new SimpleGroupFactory(schema);
 int listMaxSize = 4;
 for (int i = 0; i < elementNum; i++) {
  Group group = f.newGroup();
  if (!isNull) {
   for (int j = 0; j < listMaxSize; j++) {
    group.append("list_int32_field_for_repeat_test", j);
   }
  }
  writer.write(group);
 }
 writer.close();
}

protected static ParquetWriter<Group> initWriterFromFile() throws IOException {
 GroupWriteSupport.setSchema(schema, conf);
 return new ParquetWriter<>(
   file,
   new GroupWriteSupport(),
   GZIP, 1024 * 1024, 1024, 1024 * 1024,
   true, false, PARQUET_1_0, conf);
}

@Override
public KeyValue next() throws IOException {
  GenericRecord record = reader.read();
  if (record != null) {
    return new KeyValue(offset++, serializeAvroRecord(writer, record));
  }
  return null;
}

  @Override
  public void close() throws IOException {
    reader.close();
  }
}

private ParquetMetadata readFooterFromFile(final Path file, final FileSystem fs,
  final FileStatus stat, MetadataFilter filter) throws IOException {
 InputFile inputFile = new InputFile() {
  @Override
  public SeekableInputStream newStream() throws IOException {
   return HadoopStreams.wrap(fs.open(file));
  }
  @Override
  public long getLength() throws IOException {
   return stat.getLen();
  }
 };
 return ParquetFileReader.readFooter(inputFile, filter);
}

@Override
public void close() throws IOException {
 if (cacheFsPath != null) {
  LlapCacheAwareFs.unregisterFile(cacheFsPath);
 }
 if (reader != null) {
  reader.close();
 }
}

protected static FileSplit getFileSplit(Job vectorJob) throws IOException, InterruptedException {
 ParquetInputFormat parquetInputFormat = new ParquetInputFormat(GroupReadSupport.class);
 InputSplit split = (InputSplit) parquetInputFormat.getSplits(vectorJob).get(0);
 FileSplit fsplit = new FileSplit(file, 0L, split.getLength(), split.getLocations());
 return fsplit;
}

public MapredParquetOutputFormat() {
 realOutputFormat = new ParquetOutputFormat<ParquetHiveRecord>(new DataWritableWriteSupport());
}

@Override
public long getLength() throws IOException {
  return writer.getDataSize();
}

@Override
public void checkOutputSpecs(final FileSystem ignored, final JobConf job) throws IOException {
 realOutputFormat.checkOutputSpecs(ShimLoader.getHadoopShims().getHCatShim().createJobContext(job, null));
}

protected static void writeRepeateMapData(
 ParquetWriter<Group> writer, int elementNum, boolean isNull) throws IOException {
 SimpleGroupFactory f = new SimpleGroupFactory(schema);
 int mapMaxSize = 4;
 for (int i = 0; i < elementNum; i++) {
  Group group = f.newGroup();
  if (!isNull) {
   for (int j = 0; j < mapMaxSize; j++) {
    group.addGroup("map_int32_for_repeat_test").append("key", j).append("value", j);
   }
  }
  writer.write(group);
 }
 writer.close();
}

  @Override
  public void close() throws IOException {
    writer.close();
  }
}

@Override
public void write(final Record record) throws IOException {
  final GenericRecord genericRecord = AvroTypeUtil.createAvroRecord(record, avroSchema);
  parquetWriter.write(genericRecord);
}

  @Override
  public void close() throws IOException {
    reader.close();
  }
}

@Override
public void close() throws IOException {
  parquetWriter.close();
}

@Override
public void close() throws IOException {
  parquetReader.close();
}

@Override
public void close() throws IOException {
  reader.close();
}