public static parquet.schema.Type getParquetTypeByName(String columnName, MessageType messageType) { if (messageType.containsField(columnName)) { return messageType.getType(columnName); } // parquet is case-sensitive, but hive is not. all hive columns get converted to lowercase // check for direct match above but if no match found, try case-insensitive match for (parquet.schema.Type type : messageType.getFields()) { if (type.getName().equalsIgnoreCase(columnName)) { return type; } } return null; }
public static int getFieldIndex(MessageType fileSchema, String name) { try { return fileSchema.getFieldIndex(name.toLowerCase(Locale.ENGLISH)); } catch (InvalidRecordException e) { for (parquet.schema.Type type : fileSchema.getFields()) { if (type.getName().equalsIgnoreCase(name)) { return fileSchema.getFieldIndex(type.getName()); } } return -1; } }
private static Type createDecimalType(DecimalMetadata decimalMetadata) { return DecimalType.createDecimalType(decimalMetadata.getPrecision(), decimalMetadata.getScale()); }
public void add(int fieldIndex, Primitive value) { Type type = this.schema.getType(fieldIndex); List<Object> list = this.data[fieldIndex]; if (!type.isRepetition(REPEATED) && !list.isEmpty()) { throw new IllegalStateException( "field " + fieldIndex + " (" + type.getName() + ") can not have more than one value: " + list); } else { list.add(value); } }
public Group addGroup(int fieldIndex) { ParquetGroup g = new ParquetGroup(this.schema.getType(fieldIndex).asGroupType()); this.data[fieldIndex].add(g); return g; }
public RichColumnDescriptor( ColumnDescriptor descriptor, PrimitiveType primitiveType) { super(descriptor.getPath(), primitiveType.getPrimitiveTypeName(), primitiveType.getTypeLength(), descriptor.getMaxRepetitionLevel(), descriptor.getMaxDefinitionLevel()); this.primitiveType = primitiveType; this.required = primitiveType.getRepetition() != OPTIONAL; }
private static GroupType listWrapper(Repetition repetition, String alias, OriginalType originalType, Type nested) { if (!nested.isRepetition(Repetition.REPEATED)) { throw new IllegalArgumentException("Nested type should be repeated: " + nested); } return new GroupType(repetition, alias, originalType, nested); }
public static MessageType convert(final List<String> columnNames, final List<TypeInfo> columnTypes) { return new MessageType("hive_schema", convertTypes(columnNames, columnTypes)); }
private static GroupType listWrapper(final String name, final OriginalType originalType, final Type elementType, final Repetition repetition) { return new GroupType(repetition, name, originalType, elementType); } }
public static parquet.schema.Type getParquetType(HiveColumnHandle column, MessageType messageType, boolean useParquetColumnNames) { if (useParquetColumnNames) { return getParquetTypeByName(column.getName(), messageType); } if (column.getHiveColumnIndex() < messageType.getFieldCount()) { return messageType.getType(column.getHiveColumnIndex()); } return null; } }
private static MessageType readParquetSchema(List<SchemaElement> schema) { Iterator<SchemaElement> schemaIterator = schema.iterator(); SchemaElement rootSchema = schemaIterator.next(); Types.MessageTypeBuilder builder = Types.buildMessage(); readTypeSchema(builder, schemaIterator, rootSchema.getNum_children()); return builder.named(rootSchema.name); }
/** * Add a {@link Group} given a String key. * @param key * @param object */ private void addGroup(String key, Group object) { int fieldIndex = getIndex(key); this.schema.getType(fieldIndex).asGroupType(); this.data[fieldIndex].add(object); } }
private static GroupType listWrapper(Repetition repetition, String alias, OriginalType originalType, Type nested) { if (!nested.isRepetition(Repetition.REPEATED)) { throw new IllegalArgumentException("Nested type should be repeated: " + nested); } return new GroupType(repetition, alias, originalType, nested); }
public static MessageType convert(final List<String> columnNames, final List<TypeInfo> columnTypes) { return new MessageType("hive_schema", convertTypes(columnNames, columnTypes)); }
private static GroupType listWrapper(final String name, final OriginalType originalType, final GroupType groupType) { return new GroupType(Repetition.OPTIONAL, name, originalType, groupType); } }
public static MessageType convert(final List<String> columnNames, final List<TypeInfo> columnTypes) { return new MessageType("hive_schema", convertTypes(columnNames, columnTypes)); }
private static GroupType listWrapper(final String name, final OriginalType originalType, final Type elementType, final Repetition repetition) { return new GroupType(repetition, name, originalType, elementType); } }
private static GroupType convertStructType(final String name, final StructTypeInfo typeInfo) { final List<String> columnNames = typeInfo.getAllStructFieldNames(); final List<TypeInfo> columnTypes = typeInfo.getAllStructFieldTypeInfos(); return new GroupType(Repetition.OPTIONAL, name, convertTypes(columnNames, columnTypes)); }
private static GroupType convertStructType(final String name, final StructTypeInfo typeInfo, final Repetition repetition) { final List<String> columnNames = typeInfo.getAllStructFieldNames(); final List<TypeInfo> columnTypes = typeInfo.getAllStructFieldTypeInfos(); return new GroupType(repetition, name, convertTypes(columnNames, columnTypes)); }
private static GroupType convertStructType(final String name, final StructTypeInfo typeInfo, final Repetition repetition) { final List<String> columnNames = typeInfo.getAllStructFieldNames(); final List<TypeInfo> columnTypes = typeInfo.getAllStructFieldTypeInfos(); return new GroupType(repetition, name, convertTypes(columnNames, columnTypes)); }