org.apache.parquet.schema.MessageType.getFields java code examples

/**
 * Translate the search argument to the filter predicate parquet uses. It includes
 * only the columns from the passed schema.
 * @return  a filter predicate translated from search argument. null is returned
 *          if failed to convert.
 */
public static FilterPredicate toFilterPredicate(SearchArgument sarg, MessageType schema) {
 Set<String> columns = null;
 if (schema != null) {
  columns = new HashSet<String>();
  for (Type field : schema.getFields()) {
   columns.add(field.getName());
  }
 }
 try {
  return translate(sarg.getExpression(), sarg.getLeaves(), columns, schema);
 } catch(Exception e) {
  return null;
 }
}

List<Type> types = requestedSchema.getFields();
columnReaders = new VectorizedColumnReader[columns.size()];

for (Type type : fullSchema.getFields()) {
 if (tsField.equals(type.getName())
   || metricsFields.contains(type.getName())

for (Type type : fullSchema.getFields()) {
 if (tsField.equals(type.getName())
   || metricsFields.contains(type.getName())

 public static void testConversion(
  final String columnNamesStr,
  final String columnsTypeStr,
  final String actualSchema) throws Exception {
  final List<String> columnNames = createHiveColumnsFrom(columnNamesStr);
  final List<TypeInfo> columnTypes = createHiveTypeInfoFrom(columnsTypeStr);
  final MessageType messageTypeFound = HiveSchemaConverter.convert(columnNames, columnTypes);
  final MessageType expectedMT = MessageTypeParser.parseMessageType(actualSchema);
  assertEquals("converting " + columnNamesStr + ": " + columnsTypeStr + " to " + actualSchema,
   expectedMT, messageTypeFound);

  // Required to check the original types manually as PrimitiveType.equals does not care about it
  List<Type> expectedFields = expectedMT.getFields();
  List<Type> actualFields = messageTypeFound.getFields();
  for (int i = 0, n = expectedFields.size(); i < n; ++i) {
   OriginalType exp = expectedFields.get(i).getOriginalType();
   OriginalType act = actualFields.get(i).getOriginalType();
   assertEquals("Original types of the field do not match", exp, act);
  }
 }
}

 @Test
 public void testMapOriginalType() throws Exception {
  final String hiveColumnTypes = "map<string,string>";
  final String hiveColumnNames = "mapCol";
  final List<String> columnNames = createHiveColumnsFrom(hiveColumnNames);
  final List<TypeInfo> columnTypes = createHiveTypeInfoFrom(hiveColumnTypes);
  final MessageType messageTypeFound = HiveSchemaConverter.convert(columnNames, columnTypes);
  // this messageType only has one optional field, whose name is mapCol, original Type is MAP
  assertEquals(1, messageTypeFound.getFieldCount());
  org.apache.parquet.schema.Type topLevel = messageTypeFound.getFields().get(0);
  assertEquals("mapCol",topLevel.getName());
  assertEquals(OriginalType.MAP, topLevel.getOriginalType());
  assertEquals(Repetition.OPTIONAL, topLevel.getRepetition());

  assertEquals(1, topLevel.asGroupType().getFieldCount());
  org.apache.parquet.schema.Type secondLevel = topLevel.asGroupType().getFields().get(0);
  //there is one repeated field for mapCol, the field name is "map" and its original Type is MAP_KEY_VALUE;
  assertEquals("map", secondLevel.getName());
  assertEquals(OriginalType.MAP_KEY_VALUE, secondLevel.getOriginalType());
  assertEquals(Repetition.REPEATED, secondLevel.getRepetition());
 }
}

private void initializeInternal() throws IOException, UnsupportedOperationException {
 // Check that the requested schema is supported.
 missingColumns = new boolean[requestedSchema.getFieldCount()];
 List<ColumnDescriptor> columns = requestedSchema.getColumns();
 List<String[]> paths = requestedSchema.getPaths();
 for (int i = 0; i < requestedSchema.getFieldCount(); ++i) {
  Type t = requestedSchema.getFields().get(i);
  if (!t.isPrimitive() || t.isRepetition(Type.Repetition.REPEATED)) {
   throw new UnsupportedOperationException("Complex types not supported.");
  }
  String[] colPath = paths.get(i);
  if (fileSchema.containsPath(colPath)) {
   ColumnDescriptor fd = fileSchema.getColumnDescription(colPath);
   if (!fd.equals(columns.get(i))) {
    throw new UnsupportedOperationException("Schema evolution not supported.");
   }
   missingColumns[i] = false;
  } else {
   if (columns.get(i).getMaxDefinitionLevel() == 0) {
    // Column is missing in data but the required data is non-nullable. This file is invalid.
    throw new IOException("Required column is missing in data file. Col: " +
     Arrays.toString(colPath));
   }
   missingColumns[i] = true;
  }
 }
}

private void initializeInternal() throws IOException, UnsupportedOperationException {
 // Check that the requested schema is supported.
 missingColumns = new boolean[requestedSchema.getFieldCount()];
 List<ColumnDescriptor> columns = requestedSchema.getColumns();
 List<String[]> paths = requestedSchema.getPaths();
 for (int i = 0; i < requestedSchema.getFieldCount(); ++i) {
  Type t = requestedSchema.getFields().get(i);
  if (!t.isPrimitive() || t.isRepetition(Type.Repetition.REPEATED)) {
   throw new UnsupportedOperationException("Complex types not supported.");
  }
  String[] colPath = paths.get(i);
  if (fileSchema.containsPath(colPath)) {
   ColumnDescriptor fd = fileSchema.getColumnDescription(colPath);
   if (!fd.equals(columns.get(i))) {
    throw new UnsupportedOperationException("Schema evolution not supported.");
   }
   missingColumns[i] = false;
  } else {
   if (columns.get(i).getMaxDefinitionLevel() == 0) {
    // Column is missing in data but the required data is non-nullable. This file is invalid.
    throw new IOException("Required column is missing in data file. Col: " +
     Arrays.toString(colPath));
   }
   missingColumns[i] = true;
  }
 }
}

private void initializeInternal() throws IOException, UnsupportedOperationException {
 /**
  * Check that the requested schema is supported.
  */
 missingColumns = new boolean[requestedSchema.getFieldCount()];
 List<ColumnDescriptor> columns = requestedSchema.getColumns();
 List<String[]> paths = requestedSchema.getPaths();
 for (int i = 0; i < requestedSchema.getFieldCount(); ++i) {
  Type t = requestedSchema.getFields().get(i);
  if (!t.isPrimitive() || t.isRepetition(Type.Repetition.REPEATED)) {
   throw new UnsupportedOperationException("Complex types not supported.");
  }
  String[] colPath = paths.get(i);
  if (fileSchema.containsPath(colPath)) {
   ColumnDescriptor fd = fileSchema.getColumnDescription(colPath);
   if (!fd.equals(columns.get(i))) {
    throw new UnsupportedOperationException("Schema evolution not supported.");
   }
   missingColumns[i] = false;
  } else {
   if (columns.get(i).getMaxDefinitionLevel() == 0) {
    // Column is missing in data but the required data is non-nullable. This file is invalid.
    throw new IOException("Required column is missing in data file. Col: " +
     Arrays.toString(colPath));
   }
   missingColumns[i] = true;
  }
 }
}

/**
 * Converts a Parquet schema to a Tajo schema.
 *
 * @param parquetSchema The Parquet schema to convert.
 * @return The resulting Tajo schema.
 */
public Schema convert(MessageType parquetSchema) {
 return convertFields(parquetSchema.getFields());
}

/**
 * Converts a Parquet schema to a Tajo schema.
 *
 * @param parquetSchema The Parquet schema to convert.
 * @return The resulting Tajo schema.
 */
public Schema convert(MessageType parquetSchema) {
 return convertFields(parquetSchema.getFields());
}

public static List<IParquetInputField> buildInputFields( MessageType schema ) {
 List<IParquetInputField> inputFields = new ArrayList<>();
 for ( Type type : schema.getFields() ) {
  if ( type.isPrimitive() ) {
   inputFields.add( convertField( type ) );
  }
 }
 return inputFields;
}

public Schema convert(MessageType parquetSchema) {
 return convertFields(parquetSchema.getName(), parquetSchema.getFields());
}

public static int getFieldIndex(MessageType fileSchema, String name)
{
  try {
    return fileSchema.getFieldIndex(name.toLowerCase(Locale.ENGLISH));
  }
  catch (InvalidRecordException e) {
    for (org.apache.parquet.schema.Type type : fileSchema.getFields()) {
      if (type.getName().equalsIgnoreCase(name)) {
        return fileSchema.getFieldIndex(type.getName());
      }
    }
    return -1;
  }
}

public static org.apache.parquet.schema.Type getParquetTypeByName(String columnName, MessageType messageType)
{
  if (messageType.containsField(columnName)) {
    return messageType.getType(columnName);
  }
  // parquet is case-sensitive, but hive is not. all hive columns get converted to lowercase
  // check for direct match above but if no match found, try case-insensitive match
  for (org.apache.parquet.schema.Type type : messageType.getFields()) {
    if (type.getName().equalsIgnoreCase(columnName)) {
      return type;
    }
  }
  return null;
}

/**
 * Maps a Parquet and Arrow Schema
 * For now does not validate primitive type compatibility
 * @param arrowSchema an Arrow schema
 * @param parquetSchema a Parquet message type
 * @return the mapping between the 2
 */
public SchemaMapping map(Schema arrowSchema, MessageType parquetSchema) {
 List<TypeMapping> children = map(arrowSchema.getFields(), parquetSchema.getFields());
 return new SchemaMapping(arrowSchema, parquetSchema, children);
}

private Map<Integer, Converter> buildFieldToConverter(final MessageType schema) {
  final Map<Integer, Converter> fieldToConverter = new HashMap<>(fieldCount);
  int i = 0;
  for (final Type field : schema.getFields()) {
    if (field.isPrimitive()) {
      fieldToConverter.put(i, new PrimitiveConverter(parquetColumnToObject, field.asPrimitiveType().getPrimitiveTypeName().javaType.getSimpleName(), new String[]{field.getName()}, field.getOriginalType()));
    } else {
      fieldToConverter.put(i, new BypassGroupConverter(parquetColumnToObject, field.asGroupType(), new String[]{field.getName()}));
    }
    i++;
  }
  return fieldToConverter;
}

 public static MessageType addFallbackIds(MessageType fileSchema) {
  MessageTypeBuilder builder = org.apache.parquet.schema.Types.buildMessage();

  int ordinal = 1; // ids are assigned starting at 1
  for (Type type : fileSchema.getFields()) {
   builder.addField(type.withId(ordinal));
   ordinal += 1;
  }

  return builder.named(fileSchema.getName());
 }
}

/**
 * Creates an Arrow Schema from an Parquet one and returns the mapping
 * @param parquetSchema the provided Parquet Schema
 * @return the mapping between the 2
 */
public SchemaMapping fromParquet(MessageType parquetSchema) {
 List<Type> fields = parquetSchema.getFields();
 List<TypeMapping> mappings = fromParquet(fields);
 List<Field> arrowFields = fields(mappings);
 return new SchemaMapping(new Schema(arrowFields), parquetSchema, mappings);
}

private void verifyParquetSchema() {
 ParquetReader reader = new ParquetReader(tableDirPath);
 MessageType parquetSchema = reader.readParquetSchema();
 String[] types = configuration.getTypes();
 for (int i = 0; i < types.length; i ++) {
  String type = types[i];
  if (isNumericSqlType(type)) {
   OriginalType parquetFieldType = parquetSchema.getFields().get(i).getOriginalType();
   assertEquals(OriginalType.DECIMAL, parquetFieldType);
  }
 }
}

How to use getFieldsmethodin org.apache.parquet.schema.MessageType

Best Java code snippets using org.apache.parquet.schema.MessageType.getFields (Showing top 20 results out of 315)

How to use
getFields
method
in
org.apache.parquet.schema.MessageType