org.apache.parquet.ParquetReadOptions$Builder.build java code examples

 public ParquetReader<T> build() throws IOException {
  ParquetReadOptions options = optionsBuilder.build();
  if (path != null) {
   FileSystem fs = path.getFileSystem(conf);
   FileStatus stat = fs.getFileStatus(path);
   if (stat.isFile()) {
    return new ParquetReader<>(
      Collections.singletonList((InputFile) HadoopInputFile.fromStatus(stat, conf)),
      options,
      getReadSupport());
   } else {
    List<InputFile> files = new ArrayList<>();
    for (FileStatus fileStatus : fs.listStatus(path, HiddenFileFilter.INSTANCE)) {
     files.add(HadoopInputFile.fromStatus(fileStatus, conf));
    }
    return new ParquetReader<T>(files, options, getReadSupport());
   }
  } else {
   return new ParquetReader<>(Collections.singletonList(file), options, getReadSupport());
  }
 }
}

 public ParquetReader<T> build() throws IOException {
  ParquetReadOptions options = optionsBuilder.build();
  if (path != null) {
   FileSystem fs = path.getFileSystem(conf);
   FileStatus stat = fs.getFileStatus(path);
   if (stat.isFile()) {
    return new ParquetReader<>(
      Collections.singletonList((InputFile) HadoopInputFile.fromStatus(stat, conf)),
      options,
      getReadSupport());
   } else {
    List<InputFile> files = new ArrayList<>();
    for (FileStatus fileStatus : fs.listStatus(path, HiddenFileFilter.INSTANCE)) {
     files.add(HadoopInputFile.fromStatus(fileStatus, conf));
    }
    return new ParquetReader<T>(files, options, getReadSupport());
   }
  } else {
   return new ParquetReader<>(Collections.singletonList(file), options, getReadSupport());
  }
 }
}

private ParquetReader(Configuration conf,
           Path file,
           ReadSupport<T> readSupport,
           FilterCompat.Filter filter) throws IOException {
 this(Collections.singletonList((InputFile) HadoopInputFile.fromPath(file, conf)),
   HadoopReadOptions.builder(conf)
     .withRecordFilter(checkNotNull(filter, "filter"))
     .build(),
   readSupport);
}

ParquetReadOptions options = optionsBuilder.build();

private void initializeInternalReader(ParquetInputSplit split, Configuration configuration) throws IOException {
 Path path = split.getPath();
 long[] rowGroupOffsets = split.getRowGroupOffsets();
 // if task.side.metadata is set, rowGroupOffsets is null
 ParquetReadOptions.Builder optionsBuilder = HadoopReadOptions.builder(configuration);
 if (rowGroupOffsets != null) {
  optionsBuilder.withOffsets(rowGroupOffsets);
 } else {
  optionsBuilder.withRange(split.getStart(), split.getEnd());
 }
 // open a reader with the metadata filter
 ParquetFileReader reader = ParquetFileReader.open(
   HadoopInputFile.fromPath(path, configuration), optionsBuilder.build());
 if (rowGroupOffsets != null) {
  // verify a row group was found for each offset
  List<BlockMetaData> blocks = reader.getFooter().getBlocks();
  if (blocks.size() != rowGroupOffsets.length) {
   throw new IllegalStateException(
     "All of the offsets in the split should be found in the file."
     + " expected: " + Arrays.toString(rowGroupOffsets)
     + " found: " + blocks);
  }
 }
 if (!reader.getRowGroups().isEmpty()) {
  checkDeltaByteArrayProblem(
    reader.getFooter().getFileMetaData(), configuration,
    reader.getRowGroups().get(0));
 }
 internalReader.initialize(reader, configuration);
}

private void initializeInternalReader(ParquetInputSplit split, Configuration configuration) throws IOException {
 Path path = split.getPath();
 long[] rowGroupOffsets = split.getRowGroupOffsets();
 // if task.side.metadata is set, rowGroupOffsets is null
 ParquetReadOptions.Builder optionsBuilder = HadoopReadOptions.builder(configuration);
 if (rowGroupOffsets != null) {
  optionsBuilder.withOffsets(rowGroupOffsets);
 } else {
  optionsBuilder.withRange(split.getStart(), split.getEnd());
 }
 // open a reader with the metadata filter
 ParquetFileReader reader = ParquetFileReader.open(
   HadoopInputFile.fromPath(path, configuration), optionsBuilder.build());
 if (rowGroupOffsets != null) {
  // verify a row group was found for each offset
  List<BlockMetaData> blocks = reader.getFooter().getBlocks();
  if (blocks.size() != rowGroupOffsets.length) {
   throw new IllegalStateException(
     "All of the offsets in the split should be found in the file."
     + " expected: " + Arrays.toString(rowGroupOffsets)
     + " found: " + blocks);
  }
 }
 if (!reader.getRowGroups().isEmpty()) {
  checkDeltaByteArrayProblem(
    reader.getFooter().getFileMetaData(), configuration,
    reader.getRowGroups().get(0));
 }
 internalReader.initialize(reader, configuration);
}

/**
 * Reads the meta data block in the footer of the file using provided input stream
 * @param file a {@link InputFile} to read
 * @param filter the filter to apply to row groups
 * @return the metadata blocks in the footer
 * @throws IOException if an error occurs while reading the file
 * @deprecated will be removed in 2.0.0;
 *             use {@link ParquetFileReader#open(InputFile, ParquetReadOptions)}
 */
@Deprecated
public static final ParquetMetadata readFooter(InputFile file, MetadataFilter filter) throws IOException {
 ParquetReadOptions options;
 if (file instanceof HadoopInputFile) {
  options = HadoopReadOptions.builder(((HadoopInputFile) file).getConfiguration())
    .withMetadataFilter(filter).build();
 } else {
  options = ParquetReadOptions.builder().withMetadataFilter(filter).build();
 }
 try (SeekableInputStream in = file.newStream()) {
  return readFooter(file, options, in);
 }
}

/**
 * Reads the meta data block in the footer of the file using provided input stream
 * @param file a {@link InputFile} to read
 * @param filter the filter to apply to row groups
 * @return the metadata blocks in the footer
 * @throws IOException if an error occurs while reading the file
 * @deprecated will be removed in 2.0.0;
 *             use {@link ParquetFileReader#open(InputFile, ParquetReadOptions)}
 */
@Deprecated
public static final ParquetMetadata readFooter(InputFile file, MetadataFilter filter) throws IOException {
 ParquetReadOptions options;
 if (file instanceof HadoopInputFile) {
  options = HadoopReadOptions.builder(((HadoopInputFile) file).getConfiguration())
    .withMetadataFilter(filter).build();
 } else {
  options = ParquetReadOptions.builder().withMetadataFilter(filter).build();
 }
 try (SeekableInputStream in = file.newStream()) {
  return readFooter(file, options, in);
 }
}

/**
 * @param conf a configuration
 * @param file a file path to open
 * @param filter a metadata filter
 * @return a parquet file reader
 * @throws IOException if there is an error while opening the file
 * @deprecated will be removed in 2.0.0; use {@link #open(InputFile,ParquetReadOptions)}
 */
@Deprecated
public static ParquetFileReader open(Configuration conf, Path file, MetadataFilter filter) throws IOException {
 return open(HadoopInputFile.fromPath(file, conf),
   HadoopReadOptions.builder(conf).withMetadataFilter(filter).build());
}

/**
 * @param conf a configuration
 * @param file a file path to open
 * @param filter a metadata filter
 * @return a parquet file reader
 * @throws IOException if there is an error while opening the file
 * @deprecated will be removed in 2.0.0; use {@link #open(InputFile,ParquetReadOptions)}
 */
@Deprecated
public static ParquetFileReader open(Configuration conf, Path file, MetadataFilter filter) throws IOException {
 return open(HadoopInputFile.fromPath(file, conf),
   HadoopReadOptions.builder(conf).withMetadataFilter(filter).build());
}

private ParquetReader(Configuration conf,
           Path file,
           ReadSupport<T> readSupport,
           FilterCompat.Filter filter) throws IOException {
 this(Collections.singletonList((InputFile) HadoopInputFile.fromPath(file, conf)),
   HadoopReadOptions.builder(conf)
     .withRecordFilter(checkNotNull(filter, "filter"))
     .build(),
   readSupport);
}

/**
 * Open a {@link InputFile file}.
 *
 * @param file an input file
 * @return an open ParquetFileReader
 * @throws IOException if there is an error while opening the file
 */
public static ParquetFileReader open(InputFile file) throws IOException {
 return new ParquetFileReader(file, ParquetReadOptions.builder().build());
}

/**
 * @param conf the Hadoop Configuration
 * @param file Path to a parquet file
 * @param filter a {@link MetadataFilter} for selecting row groups
 * @throws IOException if the file can not be opened
 * @deprecated will be removed in 2.0.0.
 */
@Deprecated
public ParquetFileReader(Configuration conf, Path file, MetadataFilter filter) throws IOException {
 this(HadoopInputFile.fromPath(file, conf),
   HadoopReadOptions.builder(conf).withMetadataFilter(filter).build());
}

public ParquetReadOptions toReadOptions() {
 return ParquetReadOptions.builder()
  .useSignedStringMinMax(enableStringsSignedMinMax)
  .build();
}

/**
 * @param conf the Hadoop Configuration
 * @param file Path to a parquet file
 * @param filter a {@link MetadataFilter} for selecting row groups
 * @throws IOException if the file can not be opened
 * @deprecated will be removed in 2.0.0.
 */
@Deprecated
public ParquetFileReader(Configuration conf, Path file, MetadataFilter filter) throws IOException {
 this(HadoopInputFile.fromPath(file, conf),
   HadoopReadOptions.builder(conf).withMetadataFilter(filter).build());
}

/**
 * Open a {@link InputFile file}.
 *
 * @param file an input file
 * @return an open ParquetFileReader
 * @throws IOException if there is an error while opening the file
 */
public static ParquetFileReader open(InputFile file) throws IOException {
 return new ParquetFileReader(file, ParquetReadOptions.builder().build());
}

Popular methods of ParquetReadOptions$Builder

Popular in Java

Reactive rest calls using spring rest template
onCreateOptionsMenu (Activity)
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
startActivity (Activity)
InputStream (java.io)
A readable source of bytes.Most clients will use input streams that read data from the file system (
URI (java.net)
A Uniform Resource Identifier that identifies an abstract or physical resource, as specified by RFC
Servlet (javax.servlet)
Defines methods that all servlets must implement. A servlet is a small Java program that runs within
LoggerFactory (org.slf4j)
The LoggerFactory is a utility class producing Loggers for various logging APIs, most notably for lo
Menu (java.awt)
Kernel (java.awt.image)
Top Vim plugins

How to use buildmethodin org.apache.parquet.ParquetReadOptions$Builder

Best Java code snippets using org.apache.parquet.ParquetReadOptions$Builder.build (Showing top 16 results out of 315)

How to use
build
method
in
org.apache.parquet.ParquetReadOptions$Builder