org.apache.parquet.schema.MessageType.getColumns java code examples

  + rowsReturned + " out of " + totalRowCount);
List<ColumnDescriptor> columns = requestedSchema.getColumns();
List<Type> types = requestedSchema.getFields();
columnReaders = new VectorizedColumnReader[columns.size()];
   columnReaders[i] =
     buildVectorizedParquetReader(columnTypesList.get(colsToInclude.get(i)), types.get(i),
       pages, requestedSchema.getColumns(), skipTimestampConversion, 0);
 for (int i = 0; i < types.size(); ++i) {
  columnReaders[i] = buildVectorizedParquetReader(columnTypesList.get(i), types.get(i), pages,
   requestedSchema.getColumns(), skipTimestampConversion, 0);

private Path wrapPathForCache(Path path, Object fileKey, JobConf configuration,
  List<BlockMetaData> blocks, String tag) throws IOException {
 if (fileKey == null || cache == null) {
  return path;
 }
 HashSet<ColumnPath> includedCols = new HashSet<>();
 for (ColumnDescriptor col : requestedSchema.getColumns()) {
  includedCols.add(ColumnPath.get(col.getPath()));
 }
 // We could make some assumptions given how the reader currently does the work (consecutive
 // chunks, etc.; blocks and columns stored in offset order in the lists), but we won't -
 // just save all the chunk boundaries and lengths for now.
 TreeMap<Long, Long> chunkIndex = new TreeMap<>();
 for (BlockMetaData block : blocks) {
  for (ColumnChunkMetaData mc : block.getColumns()) {
   if (!includedCols.contains(mc.getPath())) continue;
   chunkIndex.put(mc.getStartingPos(), mc.getStartingPos() + mc.getTotalSize());
  }
 }
 // Register the cache-aware path so that Parquet reader would go thru it.
 configuration.set("fs." + LlapCacheAwareFs.SCHEME + ".impl",
   LlapCacheAwareFs.class.getCanonicalName());
 path = LlapCacheAwareFs.registerFile(cache, path, fileKey, chunkIndex, configuration, tag);
 this.cacheFsPath = path;
 return path;
}

  "Failed to find related Parquet column descriptor with type " + type);
if (fileSchema.getColumns().contains(descriptors.get(0))) {
 return new VectorizedPrimitiveColumnReader(descriptors.get(0),
  pages.getPageReader(descriptors.get(0)), skipTimestampConversion, type, typeInfo);

configuration, footer.getFileMetaData(), path, blocks, requestedSchema.getColumns());

 private void checkEndOfRowGroup() throws IOException {
  if (rowsReturned != totalCountLoadedSoFar) return;
  PageReadStore pages = reader.readNextRowGroup();
  if (pages == null) {
   throw new IOException("expecting more rows but reached last block. Read "
     + rowsReturned + " out of " + totalRowCount);
  }
  List<ColumnDescriptor> columns = requestedSchema.getColumns();
  columnReaders = new VectorizedColumnReader[columns.size()];
  for (int i = 0; i < columns.size(); ++i) {
   if (missingColumns[i]) continue;
   columnReaders[i] = new VectorizedColumnReader(columns.get(i),
     pages.getPageReader(columns.get(i)));
  }
  totalCountLoadedSoFar += pages.getRowCount();
 }
}

 private void checkEndOfRowGroup() throws IOException {
  if (rowsReturned != totalCountLoadedSoFar) return;
  PageReadStore pages = reader.readNextRowGroup();
  if (pages == null) {
   throw new IOException("expecting more rows but reached last block. Read "
     + rowsReturned + " out of " + totalRowCount);
  }
  List<ColumnDescriptor> columns = requestedSchema.getColumns();
  List<Type> types = requestedSchema.asGroupType().getFields();
  columnReaders = new VectorizedColumnReader[columns.size()];
  for (int i = 0; i < columns.size(); ++i) {
   if (missingColumns[i]) continue;
   columnReaders[i] = new VectorizedColumnReader(columns.get(i), types.get(i).getOriginalType(),
    pages.getPageReader(columns.get(i)), convertTz);
  }
  totalCountLoadedSoFar += pages.getRowCount();
 }
}

 private void checkEndOfRowGroup() throws IOException {
  if (rowsReturned != totalCountLoadedSoFar) return;
  PageReadStore pages = reader.readNextRowGroup();
  if (pages == null) {
   throw new IOException("expecting more rows but reached last block. Read "
     + rowsReturned + " out of " + totalRowCount);
  }
  List<ColumnDescriptor> columns = requestedSchema.getColumns();
  List<Type> types = requestedSchema.asGroupType().getFields();
  columnReaders = new VectorizedColumnReader[columns.size()];
  for (int i = 0; i < columns.size(); ++i) {
   if (missingColumns[i]) continue;
   columnReaders[i] = new VectorizedColumnReader(columns.get(i), types.get(i).getOriginalType(),
    pages.getPageReader(columns.get(i)), convertTz);
  }
  totalCountLoadedSoFar += pages.getRowCount();
 }
}

private void initializeInternal() throws IOException, UnsupportedOperationException {
 // Check that the requested schema is supported.
 missingColumns = new boolean[requestedSchema.getFieldCount()];
 List<ColumnDescriptor> columns = requestedSchema.getColumns();
 List<String[]> paths = requestedSchema.getPaths();
 for (int i = 0; i < requestedSchema.getFieldCount(); ++i) {
  Type t = requestedSchema.getFields().get(i);
  if (!t.isPrimitive() || t.isRepetition(Type.Repetition.REPEATED)) {
   throw new UnsupportedOperationException("Complex types not supported.");
  }
  String[] colPath = paths.get(i);
  if (fileSchema.containsPath(colPath)) {
   ColumnDescriptor fd = fileSchema.getColumnDescription(colPath);
   if (!fd.equals(columns.get(i))) {
    throw new UnsupportedOperationException("Schema evolution not supported.");
   }
   missingColumns[i] = false;
  } else {
   if (columns.get(i).getMaxDefinitionLevel() == 0) {
    // Column is missing in data but the required data is non-nullable. This file is invalid.
    throw new IOException("Required column is missing in data file. Col: " +
     Arrays.toString(colPath));
   }
   missingColumns[i] = true;
  }
 }
}

private void initializeInternal() throws IOException, UnsupportedOperationException {
 /**
  * Check that the requested schema is supported.
  */
 missingColumns = new boolean[requestedSchema.getFieldCount()];
 List<ColumnDescriptor> columns = requestedSchema.getColumns();
 List<String[]> paths = requestedSchema.getPaths();
 for (int i = 0; i < requestedSchema.getFieldCount(); ++i) {
  Type t = requestedSchema.getFields().get(i);
  if (!t.isPrimitive() || t.isRepetition(Type.Repetition.REPEATED)) {
   throw new UnsupportedOperationException("Complex types not supported.");
  }
  String[] colPath = paths.get(i);
  if (fileSchema.containsPath(colPath)) {
   ColumnDescriptor fd = fileSchema.getColumnDescription(colPath);
   if (!fd.equals(columns.get(i))) {
    throw new UnsupportedOperationException("Schema evolution not supported.");
   }
   missingColumns[i] = false;
  } else {
   if (columns.get(i).getMaxDefinitionLevel() == 0) {
    // Column is missing in data but the required data is non-nullable. This file is invalid.
    throw new IOException("Required column is missing in data file. Col: " +
     Arrays.toString(colPath));
   }
   missingColumns[i] = true;
  }
 }
}

private void initializeInternal() throws IOException, UnsupportedOperationException {
 // Check that the requested schema is supported.
 missingColumns = new boolean[requestedSchema.getFieldCount()];
 List<ColumnDescriptor> columns = requestedSchema.getColumns();
 List<String[]> paths = requestedSchema.getPaths();
 for (int i = 0; i < requestedSchema.getFieldCount(); ++i) {
  Type t = requestedSchema.getFields().get(i);
  if (!t.isPrimitive() || t.isRepetition(Type.Repetition.REPEATED)) {
   throw new UnsupportedOperationException("Complex types not supported.");
  }
  String[] colPath = paths.get(i);
  if (fileSchema.containsPath(colPath)) {
   ColumnDescriptor fd = fileSchema.getColumnDescription(colPath);
   if (!fd.equals(columns.get(i))) {
    throw new UnsupportedOperationException("Schema evolution not supported.");
   }
   missingColumns[i] = false;
  } else {
   if (columns.get(i).getMaxDefinitionLevel() == 0) {
    // Column is missing in data but the required data is non-nullable. This file is invalid.
    throw new IOException("Required column is missing in data file. Col: " +
     Arrays.toString(colPath));
   }
   missingColumns[i] = true;
  }
 }
}

  config, footer.getFileMetaData(), file, blocks, requestedSchema.getColumns());
for (BlockMetaData block : blocks) {
 this.totalRowCount += block.getRowCount();

config, footer.getFileMetaData(), file, blocks, requestedSchema.getColumns());

config, footer.getFileMetaData(), file, blocks, requestedSchema.getColumns());

this.sparkSchema = StructType$.MODULE$.fromString(sparkRequestedSchemaString);
this.reader = new ParquetFileReader(
  configuration, footer.getFileMetaData(), file, blocks, requestedSchema.getColumns());
for (BlockMetaData block : blocks) {
 this.totalRowCount += block.getRowCount();

this.sparkSchema = StructType$.MODULE$.fromString(sparkRequestedSchemaString);
this.reader = new ParquetFileReader(
  configuration, footer.getFileMetaData(), file, blocks, requestedSchema.getColumns());

this.sparkSchema = StructType$.MODULE$.fromString(sparkRequestedSchemaString);
this.reader = new ParquetFileReader(
  configuration, footer.getFileMetaData(), file, blocks, requestedSchema.getColumns());

public void flushToFileWriter(ParquetFileWriter writer) throws IOException {
 for (ColumnDescriptor path : schema.getColumns()) {
  ColumnChunkPageWriter pageWriter = writers.get(path);
  pageWriter.writeToFileWriter(writer);
 }
}

/**
 * Writes the column chunks in the corresponding row group
 * @param writer the parquet file writer
 * @throws IOException if the file can not be created
 */
public void flushToFileWriter(ParquetFileWriter writer) throws IOException {
 for (ColumnDescriptor path : schema.getColumns()) {
  ColumnChunkPageWriter pageWriter = writers.get(path);
  pageWriter.writeToFileWriter(writer);
 }
}

public void flushToFileWriter(ParquetFileWriter writer) throws IOException {
 for (ColumnDescriptor path : schema.getColumns()) {
  ColumnChunkPageWriter pageWriter = writers.get(path);
  pageWriter.writeToFileWriter(writer);
 }
}

public ParquetFileReader(InputFile file, ParquetReadOptions options) throws IOException {
 this.converter = new ParquetMetadataConverter(options);
 this.file = file;
 this.f = file.newStream();
 this.options = options;
 this.footer = readFooter(file, options, f, converter);
 this.fileMetaData = footer.getFileMetaData();
 this.blocks = filterRowGroups(footer.getBlocks());
 for (ColumnDescriptor col : footer.getFileMetaData().getSchema().getColumns()) {
  paths.put(ColumnPath.get(col.getPath()), col);
 }
}

Popular methods of MessageType

Popular in Java

Finding current android device location
findViewById (Activity)
getSystemService (Context)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
BufferedInputStream (java.io)
A BufferedInputStream adds functionality to another input stream-namely, the ability to buffer the i
SocketException (java.net)
This SocketException may be thrown during socket creation or setting options, and is the superclass
CountDownLatch (java.util.concurrent)
A synchronization aid that allows one or more threads to wait until a set of operations being perfor
DateTimeFormat (org.joda.time.format)
Factory that creates instances of DateTimeFormatter from patterns and styles. Datetime formatting i
Logger (org.slf4j)
The org.slf4j.Logger interface is the main user entry point of SLF4J API. It is expected that loggin
Window (java.awt)
A Window object is a top-level window with no borders and no menubar. The default layout for a windo
Best IntelliJ plugins

How to use getColumnsmethodin org.apache.parquet.schema.MessageType

Best Java code snippets using org.apache.parquet.schema.MessageType.getColumns (Showing top 20 results out of 315)

How to use
getColumns
method
in
org.apache.parquet.schema.MessageType