parquet.schema.MessageType.getType java code examples

public static parquet.schema.Type getParquetTypeByName(String columnName, MessageType messageType)
{
  if (messageType.containsField(columnName)) {
    return messageType.getType(columnName);
  }
  // parquet is case-sensitive, but hive is not. all hive columns get converted to lowercase
  // check for direct match above but if no match found, try case-insensitive match
  for (parquet.schema.Type type : messageType.getFields()) {
    if (type.getName().equalsIgnoreCase(columnName)) {
      return type;
    }
  }
  return null;
}

  public static parquet.schema.Type getParquetType(HiveColumnHandle column, MessageType messageType, boolean useParquetColumnNames)
  {
    if (useParquetColumnNames) {
      return getParquetTypeByName(column.getName(), messageType);
    }

    if (column.getHiveColumnIndex() < messageType.getFieldCount()) {
      return messageType.getType(column.getHiveColumnIndex());
    }
    return null;
  }
}

    .toArray(String[]::new);
ColumnPath columnPath = ColumnPath.get(path);
PrimitiveTypeName primitiveTypeName = messageType.getType(columnPath.toArray()).asPrimitiveType().getPrimitiveTypeName();
ColumnChunkMetaData column = ColumnChunkMetaData.get(
    columnPath,

public Type getType(String ... path) {
 return getType(path, 0);
}

public Type getType(String ... path) {
 return getType(path, 0);
}

private static List<Mapping> computeMappingByPosition(
    DataModelDescriptor target, MessageType source) {
  if (LOG.isDebugEnabled()) {
    LOG.debug(MessageFormat.format(
        "Mapping columns by their position: model={0}", //$NON-NLS-1$
        target.getDataModelClass().getName()));
  }
  List<ColumnDescriptor> sources = source.getColumns();
  List<? extends PropertyDescriptor> targets = target.getPropertyDescriptors();
  List<Mapping> mappings = new ArrayList<>();
  int limit = Math.min(sources.size(), targets.size());
  for (int i = 0; i < limit; i++) {
    ColumnDescriptor s = sources.get(i);
    Type sType = source.getType(s.getPath());
    PropertyDescriptor t = targets.get(i);
    mappings.add(new Mapping(s, sType, t));
  }
  for (int i = limit, n = sources.size(); i < n; i++) {
    ColumnDescriptor s = sources.get(i);
    Type sType = source.getType(s.getPath());
    mappings.add(new Mapping(s, sType, null));
  }
  for (int i = limit, n = targets.size(); i < n; i++) {
    mappings.add(new Mapping(null, null, targets.get(i)));
  }
  return mappings;
}

  private static parquet.schema.Type getParquetTypeByName(String columnName, MessageType messageType)
  {
    if (messageType.containsField(columnName)) {
      return messageType.getType(columnName);
    }
    // parquet is case-sensitive, but hive is not. all hive columns get converted to lowercase
    // check for direct match above but if no match found, try case-insensitive match
    for (Type type : messageType.getFields()) {
      if (type.getName().equalsIgnoreCase(columnName)) {
        return type;
      }
    }

    return null;
  }
}

public List<ColumnDescriptor> getColumns() {
 List<String[]> paths = this.getPaths(0);
 List<ColumnDescriptor> columns = new ArrayList<ColumnDescriptor>(paths.size());
 for (String[] path : paths) {
  // TODO: optimize this
  columns.add(new ColumnDescriptor(path, getType(path).asPrimitiveType().getPrimitiveTypeName(), getMaxRepetitionLevel(path), getMaxDefinitionLevel(path)));
 }
 return columns;
}

private SchemaCompatibilityValidator(MessageType schema) {
 for (ColumnDescriptor cd : schema.getColumns()) {
  ColumnPath columnPath = ColumnPath.get(cd.getPath());
  columnsAccordingToSchema.put(columnPath, cd);
  OriginalType ot = schema.getType(cd.getPath()).getOriginalType();
  if (ot != null) {
   originalTypes.put(columnPath, ot);
  }
 }
}

/**
 * Searchs column names by index on a given Parquet file schema, and returns its corresponded
 * Parquet schema types.
 *
 * @param schema Message schema where to search for column names.
 * @param colNames List of column names.
 * @param colIndexes List of column indexes.
 * @return A MessageType object of the column names found.
 */
private static MessageType getSchemaByIndex(MessageType schema, List<String> colNames, List<Integer> colIndexes) {
 List<Type> schemaTypes = new ArrayList<Type>();
 for (Integer i : colIndexes) {
  if (i < colNames.size()) {
   if (i < schema.getFieldCount()) {
    schemaTypes.add(schema.getType(i));
   } else {
    //prefixing with '_mask_' to ensure no conflict with named
    //columns in the file schema
    schemaTypes.add(Types.optional(PrimitiveTypeName.BINARY).named("_mask_" + colNames.get(i)));
   }
  }
 }
 return new MessageType(schema.getName(), schemaTypes);
}

private static List<Mapping> computeMappingByName(
    DataModelDescriptor target, MessageType source) {
  if (LOG.isDebugEnabled()) {
    LOG.debug(MessageFormat.format(
        "Mapping columns by their name: model={0}", //$NON-NLS-1$
        target.getDataModelClass().getName()));
  }
  Set<PropertyDescriptor> rest = new LinkedHashSet<>(target.getPropertyDescriptors());
  List<Mapping> mappings = new ArrayList<>();
  for (ColumnDescriptor s : source.getColumns()) {
    String name = s.getPath()[0];
    Type sType = source.getType(s.getPath());
    PropertyDescriptor t = target.findPropertyDescriptor(name);
    if (t != null) {
      mappings.add(new Mapping(s, sType, t));
      rest.remove(t);
    } else {
      mappings.add(new Mapping(s, sType, null));
    }
  }
  for (PropertyDescriptor t : rest) {
    mappings.add(new Mapping(null, null, t));
  }
  return mappings;
}

public List<ColumnDescriptor> getColumns() {
 List<String[]> paths = this.getPaths(0);
 List<ColumnDescriptor> columns = new ArrayList<ColumnDescriptor>(paths.size());
 for (String[] path : paths) {
  // TODO: optimize this
  PrimitiveType primitiveType = getType(path).asPrimitiveType();
  columns.add(new ColumnDescriptor(
          path,
          primitiveType.getPrimitiveTypeName(),
          primitiveType.getTypeLength(),
          getMaxRepetitionLevel(path),
          getMaxDefinitionLevel(path)));
 }
 return columns;
}

public SchemaIntersection(MessageType fileSchema, Fields requestedFields) {
 if(requestedFields == Fields.UNKNOWN)
  requestedFields = Fields.ALL;
 Fields newFields = Fields.NONE;
 List<Type> newSchemaFields = new ArrayList<Type>();
 int schemaSize = fileSchema.getFieldCount();
 for (int i = 0; i < schemaSize; i++) {
  Type type = fileSchema.getType(i);
  Fields name = new Fields(type.getName());
  if(requestedFields.contains(name)) {
   newFields = newFields.append(name);
   newSchemaFields.add(type);
  }
 }
 this.sourceFields = newFields;
 this.requestedSchema = new MessageType(fileSchema.getName(), newSchemaFields);
}

public static parquet.schema.Type getParquetType(HiveColumnHandle column, MessageType messageType, boolean useParquetColumnNames)
{
  if (useParquetColumnNames) {
    return getParquetTypeByName(column.getName(), messageType);
  }
  if (column.getHiveColumnIndex() < messageType.getFieldCount()) {
    return messageType.getType(column.getHiveColumnIndex());
  }
  return null;
}

public ColumnDescriptor getColumnDescription(String[] path) {
 int maxRep = getMaxRepetitionLevel(path);
 int maxDef = getMaxDefinitionLevel(path);
 PrimitiveTypeName type = getType(path).asPrimitiveType().getPrimitiveTypeName();
 return new ColumnDescriptor(path, type, maxRep, maxDef);
}

public ColumnDescriptor getColumnDescription(String[] path) {
 int maxRep = getMaxRepetitionLevel(path);
 int maxDef = getMaxDefinitionLevel(path);
 PrimitiveType type = getType(path).asPrimitiveType();
 return new ColumnDescriptor(path, type.getPrimitiveTypeName(),
               type.getTypeLength(), maxRep, maxDef);
}

ColumnChunkMetaData column = ColumnChunkMetaData.get(
    columnPath,
    messageType.getType(columnPath.toArray()).asPrimitiveType().getPrimitiveTypeName(),
    CompressionCodecName.fromParquet(metaData.codec),
    readEncodings(metaData.encodings),
    readStats(metaData.statistics, messageType.getType(columnPath.toArray()).asPrimitiveType().getPrimitiveTypeName()),
    metaData.data_page_offset,
    metaData.dictionary_page_offset,

ColumnChunkMetaData column = ColumnChunkMetaData.get(
  path,
  messageType.getType(path.toArray()).asPrimitiveType().getPrimitiveTypeName(),
  CompressionCodecName.fromParquet(metaData.codec),
  fromFormatEncodings(metaData.encodings),
  fromParquetStatistics(metaData.statistics, messageType.getType(path.toArray()).asPrimitiveType().getPrimitiveTypeName()),
  metaData.data_page_offset,
  metaData.dictionary_page_offset,

 @Test
 public void testNestedTypes() {
  MessageType schema = MessageTypeParser.parseMessageType(Paper.schema.toString());
  Type type = schema.getType("Links", "Backward");
  assertEquals(PrimitiveTypeName.INT64,
    type.asPrimitiveType().getPrimitiveTypeName());
  assertEquals(0, schema.getMaxRepetitionLevel("DocId"));
  assertEquals(1, schema.getMaxRepetitionLevel("Name"));
  assertEquals(2, schema.getMaxRepetitionLevel("Name", "Language"));
  assertEquals(0, schema.getMaxDefinitionLevel("DocId"));
  assertEquals(1, schema.getMaxDefinitionLevel("Links"));
  assertEquals(2, schema.getMaxDefinitionLevel("Links", "Backward"));
 }
}

Popular methods of MessageType

Popular in Java

Running tasks concurrently on multiple threads
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
putExtra (Intent)
startActivity (Activity)
BufferedReader (java.io)
Wraps an existing Reader and buffers the input. Expensive interaction with the underlying reader is
Selector (java.nio.channels)
A controller for the selection of SelectableChannel objects. Selectable channels can be registered w
SQLException (java.sql)
An exception that indicates a failed JDBC operation. It provides the following information about pro
Date (java.util)
A specific moment in time, with millisecond precision. Values typically come from System#currentTime
Callable (java.util.concurrent)
A task that returns a result and may throw an exception. Implementors define a single method with no
JarFile (java.util.jar)
JarFile is used to read jar entries and their associated data from jar files.
Best IntelliJ plugins

How to use getTypemethodin parquet.schema.MessageType

Best Java code snippets using parquet.schema.MessageType.getType (Showing top 19 results out of 315)

How to use
getType
method
in
parquet.schema.MessageType