/** * Hive implementation of ParquetRecordReader results in partition columns not present in the original parquet file * to also be part of the projected schema. Hive expects the record reader implementation to return the row in its * entirety (with un-projected column having null values). As we use writerSchema for this, make sure writer schema * also includes partition columns * @param schema Schema to be changed * @return */ private static Schema addPartitionFields(Schema schema, List<String> partitioningFields) { final Set<String> firstLevelFieldNames = schema.getFields().stream().map(Field::name) .map(String::toLowerCase).collect(Collectors.toSet()); List<String> fieldsToAdd = partitioningFields.stream().map(String::toLowerCase) .filter(x -> !firstLevelFieldNames.contains(x)).collect(Collectors.toList()); return HoodieAvroUtils.appendNullSchemaFields(schema, fieldsToAdd); }