org.apache.parquet.ParquetReadOptions$Builder java code examples

 optionsBuilder.set(entry.getKey(), entry.getValue());
 optionsBuilder.withRange(start, start + length);
ParquetReadOptions options = optionsBuilder.build();

public Builder copy(ParquetReadOptions options) {
 useSignedStringMinMax(options.useSignedStringMinMax);
 useStatsFilter(options.useStatsFilter);
 useDictionaryFilter(options.useDictionaryFilter);
 useRecordFilter(options.useRecordFilter);
 withRecordFilter(options.recordFilter);
 withMetadataFilter(options.metadataFilter);
 withCodecFactory(options.codecFactory);
 withAllocator(options.allocator);
 for (Map.Entry<String, String> keyValue : options.properties.entrySet()) {
  set(keyValue.getKey(), keyValue.getValue());
 }
 return this;
}

private void initializeInternalReader(ParquetInputSplit split, Configuration configuration) throws IOException {
 Path path = split.getPath();
 long[] rowGroupOffsets = split.getRowGroupOffsets();
 // if task.side.metadata is set, rowGroupOffsets is null
 ParquetReadOptions.Builder optionsBuilder = HadoopReadOptions.builder(configuration);
 if (rowGroupOffsets != null) {
  optionsBuilder.withOffsets(rowGroupOffsets);
 } else {
  optionsBuilder.withRange(split.getStart(), split.getEnd());
 }
 // open a reader with the metadata filter
 ParquetFileReader reader = ParquetFileReader.open(
   HadoopInputFile.fromPath(path, configuration), optionsBuilder.build());
 if (rowGroupOffsets != null) {
  // verify a row group was found for each offset
  List<BlockMetaData> blocks = reader.getFooter().getBlocks();
  if (blocks.size() != rowGroupOffsets.length) {
   throw new IllegalStateException(
     "All of the offsets in the split should be found in the file."
     + " expected: " + Arrays.toString(rowGroupOffsets)
     + " found: " + blocks);
  }
 }
 if (!reader.getRowGroups().isEmpty()) {
  checkDeltaByteArrayProblem(
    reader.getFooter().getFileMetaData(), configuration,
    reader.getRowGroups().get(0));
 }
 internalReader.initialize(reader, configuration);
}

private void initializeInternalReader(ParquetInputSplit split, Configuration configuration) throws IOException {
 Path path = split.getPath();
 long[] rowGroupOffsets = split.getRowGroupOffsets();
 // if task.side.metadata is set, rowGroupOffsets is null
 ParquetReadOptions.Builder optionsBuilder = HadoopReadOptions.builder(configuration);
 if (rowGroupOffsets != null) {
  optionsBuilder.withOffsets(rowGroupOffsets);
 } else {
  optionsBuilder.withRange(split.getStart(), split.getEnd());
 }
 // open a reader with the metadata filter
 ParquetFileReader reader = ParquetFileReader.open(
   HadoopInputFile.fromPath(path, configuration), optionsBuilder.build());
 if (rowGroupOffsets != null) {
  // verify a row group was found for each offset
  List<BlockMetaData> blocks = reader.getFooter().getBlocks();
  if (blocks.size() != rowGroupOffsets.length) {
   throw new IllegalStateException(
     "All of the offsets in the split should be found in the file."
     + " expected: " + Arrays.toString(rowGroupOffsets)
     + " found: " + blocks);
  }
 }
 if (!reader.getRowGroups().isEmpty()) {
  checkDeltaByteArrayProblem(
    reader.getFooter().getFileMetaData(), configuration,
    reader.getRowGroups().get(0));
 }
 internalReader.initialize(reader, configuration);
}

/**
 * Reads the meta data block in the footer of the file using provided input stream
 * @param file a {@link InputFile} to read
 * @param filter the filter to apply to row groups
 * @return the metadata blocks in the footer
 * @throws IOException if an error occurs while reading the file
 * @deprecated will be removed in 2.0.0;
 *             use {@link ParquetFileReader#open(InputFile, ParquetReadOptions)}
 */
@Deprecated
public static final ParquetMetadata readFooter(InputFile file, MetadataFilter filter) throws IOException {
 ParquetReadOptions options;
 if (file instanceof HadoopInputFile) {
  options = HadoopReadOptions.builder(((HadoopInputFile) file).getConfiguration())
    .withMetadataFilter(filter).build();
 } else {
  options = ParquetReadOptions.builder().withMetadataFilter(filter).build();
 }
 try (SeekableInputStream in = file.newStream()) {
  return readFooter(file, options, in);
 }
}

 public ParquetReader<T> build() throws IOException {
  ParquetReadOptions options = optionsBuilder.build();
  if (path != null) {
   FileSystem fs = path.getFileSystem(conf);
   FileStatus stat = fs.getFileStatus(path);
   if (stat.isFile()) {
    return new ParquetReader<>(
      Collections.singletonList((InputFile) HadoopInputFile.fromStatus(stat, conf)),
      options,
      getReadSupport());
   } else {
    List<InputFile> files = new ArrayList<>();
    for (FileStatus fileStatus : fs.listStatus(path, HiddenFileFilter.INSTANCE)) {
     files.add(HadoopInputFile.fromStatus(fileStatus, conf));
    }
    return new ParquetReader<T>(files, options, getReadSupport());
   }
  } else {
   return new ParquetReader<>(Collections.singletonList(file), options, getReadSupport());
  }
 }
}

/**
 * Reads the meta data block in the footer of the file using provided input stream
 * @param file a {@link InputFile} to read
 * @param filter the filter to apply to row groups
 * @return the metadata blocks in the footer
 * @throws IOException if an error occurs while reading the file
 * @deprecated will be removed in 2.0.0;
 *             use {@link ParquetFileReader#open(InputFile, ParquetReadOptions)}
 */
@Deprecated
public static final ParquetMetadata readFooter(InputFile file, MetadataFilter filter) throws IOException {
 ParquetReadOptions options;
 if (file instanceof HadoopInputFile) {
  options = HadoopReadOptions.builder(((HadoopInputFile) file).getConfiguration())
    .withMetadataFilter(filter).build();
 } else {
  options = ParquetReadOptions.builder().withMetadataFilter(filter).build();
 }
 try (SeekableInputStream in = file.newStream()) {
  return readFooter(file, options, in);
 }
}

 public ParquetReader<T> build() throws IOException {
  ParquetReadOptions options = optionsBuilder.build();
  if (path != null) {
   FileSystem fs = path.getFileSystem(conf);
   FileStatus stat = fs.getFileStatus(path);
   if (stat.isFile()) {
    return new ParquetReader<>(
      Collections.singletonList((InputFile) HadoopInputFile.fromStatus(stat, conf)),
      options,
      getReadSupport());
   } else {
    List<InputFile> files = new ArrayList<>();
    for (FileStatus fileStatus : fs.listStatus(path, HiddenFileFilter.INSTANCE)) {
     files.add(HadoopInputFile.fromStatus(fileStatus, conf));
    }
    return new ParquetReader<T>(files, options, getReadSupport());
   }
  } else {
   return new ParquetReader<>(Collections.singletonList(file), options, getReadSupport());
  }
 }
}

public Builder<T> withFileRange(long start, long end) {
 optionsBuilder.withRange(start, end);
 return this;
}

public Builder<T> useSignedStringMinMax() {
 optionsBuilder.useSignedStringMinMax();
 return this;
}

public Builder<T> useSignedStringMinMax(boolean useSignedStringMinMax) {
 optionsBuilder.useSignedStringMinMax(useSignedStringMinMax);
 return this;
}

public Builder<T> useStatsFilter(boolean useStatsFilter) {
 optionsBuilder.useStatsFilter(useStatsFilter);
 return this;
}

public Builder<T> useDictionaryFilter() {
 optionsBuilder.useDictionaryFilter();
 return this;
}

/**
 * @param conf a configuration
 * @param file a file path to open
 * @param filter a metadata filter
 * @return a parquet file reader
 * @throws IOException if there is an error while opening the file
 * @deprecated will be removed in 2.0.0; use {@link #open(InputFile,ParquetReadOptions)}
 */
@Deprecated
public static ParquetFileReader open(Configuration conf, Path file, MetadataFilter filter) throws IOException {
 return open(HadoopInputFile.fromPath(file, conf),
   HadoopReadOptions.builder(conf).withMetadataFilter(filter).build());
}

public Builder<T> useStatsFilter() {
 optionsBuilder.useStatsFilter();
 return this;
}

public Builder<T> set(String key, String value) {
 optionsBuilder.set(key, value);
 return this;
}

public Builder<T> useRecordFilter() {
 optionsBuilder.useRecordFilter();
 return this;
}

public Builder<T> useDictionaryFilter(boolean useDictionaryFilter) {
 optionsBuilder.useDictionaryFilter(useDictionaryFilter);
 return this;
}

public static Builder builder() {
 return new Builder();
}

public Builder<T> useRecordFilter() {
 optionsBuilder.useRecordFilter();
 return this;
}

Most used methods

Popular in Java

Reactive rest calls using spring rest template
requestLocationUpdates (LocationManager)
addToBackStack (FragmentTransaction)
findViewById (Activity)
MessageDigest (java.security)
Uses a one-way hash function to turn an arbitrary number of bytes into a fixed-length byte sequence.
Timestamp (java.sql)
A Java representation of the SQL TIMESTAMP type. It provides the capability of representing the SQL
SimpleDateFormat (java.text)
Formats and parses dates in a locale-sensitive manner. Formatting turns a Date into a String, and pa
TimeUnit (java.util.concurrent)
A TimeUnit represents time durations at a given unit of granularity and provides utility methods to
FileUtils (org.apache.commons.io)
General file manipulation utilities. Facilities are provided in the following areas: * writing to a
Annotation (javassist.bytecode.annotation)
The annotation structure.An instance of this class is returned bygetAnnotations() in AnnotationsAttr
Top 12 Jupyter Notebook extensions

How to useParquetReadOptions$Builder in org.apache.parquet

Best Java code snippets using org.apache.parquet.ParquetReadOptions$Builder (Showing top 20 results out of 315)

How to use
ParquetReadOptions$Builder
in
org.apache.parquet