Refine search
GroupType repeatedType = type.getType(0).asGroupType(); recordConsumer.startGroup(); recordConsumer.startField(repeatedType.getName(), 0); ObjectInspector elementInspector = inspector.getListElementObjectInspector(); Type elementType = repeatedType.getType(0); String elementName = elementType.getName(); recordConsumer.endField(repeatedType.getName(), 0);
public ParquetGroup(GroupType schema) { this.schema = schema; this.data = new List[schema.getFields().size()]; for (int i = 0; i < schema.getFieldCount(); ++i) { this.data[i] = new ArrayList(); } }
public static GroupType mapType(Repetition repetition, String alias, String mapAlias, Type keyType, Type valueType) { //support projection only on key of a map if (valueType == null) { return listWrapper( repetition, alias, MAP_KEY_VALUE, new GroupType( Repetition.REPEATED, mapAlias, keyType)); } else { if (!valueType.getName().equals("value")) { throw new RuntimeException(valueType.getName() + " should be value"); } return listWrapper( repetition, alias, MAP_KEY_VALUE, new GroupType( Repetition.REPEATED, mapAlias, keyType, valueType)); } }
public void add(int fieldIndex, Binary value) { switch (this.getType().getType(fieldIndex).asPrimitiveType().getPrimitiveTypeName()) { case BINARY: this.add(fieldIndex, new BinaryValue(value)); break; case INT96: this.add(fieldIndex, new Int96Value(value)); break; default: throw new UnsupportedOperationException( this.getType().asPrimitiveType().getName() + " not supported for Binary"); } }
/** * It writes all the fields contained inside a group to the RecordConsumer. * * @param value The list of values contained in the group. * @param inspector The object inspector used to get the correct value type. * @param type Type that contains information about the group schema. */ private void writeGroupFields(final Object value, final StructObjectInspector inspector, final GroupType type) { if (value != null) { List<? extends StructField> fields = inspector.getAllStructFieldRefs(); List<Object> fieldValuesList = inspector.getStructFieldsDataAsList(value); for (int i = 0; i < type.getFieldCount(); i++) { Type fieldType = type.getType(i); String fieldName = fieldType.getName(); Object fieldValue = fieldValuesList.get(i); if (fieldValue != null) { ObjectInspector fieldInspector = fields.get(i).getFieldObjectInspector(); recordConsumer.startField(fieldName, i); writeValue(fieldValue, fieldInspector, fieldType); recordConsumer.endField(fieldName, i); } } } }
OriginalType originalType = parquetGroupType.getOriginalType(); if (originalType != null) { switch (originalType) { case LIST: if (parquetGroupType.getFieldCount() != 1) { throw new UnsupportedOperationException("Invalid list type " + parquetGroupType); Type elementType = parquetGroupType.getType(0); return createHiveArray(elementType, parquetGroupType.getName()); case MAP: if (parquetGroupType.getFieldCount() != 1 || parquetGroupType.getType(0) GroupType mapKeyValType = parquetGroupType.getType(0).asGroupType(); if (!mapKeyValType.isRepetition(Type.Repetition.REPEATED) || !mapKeyValType.getOriginalType().equals(OriginalType.MAP_KEY_VALUE) || mapKeyValType.getFieldCount() != 2) { throw new UnsupportedOperationException("Invalid map type " + parquetGroupType); Type keyType = mapKeyValType.getType(0); "Map key type must be binary (UTF8): " + keyType); Type valueType = mapKeyValType.getType(1); return createHiveMap(convertField(keyType), convertField(valueType)); case ENUM: return createHiveStruct(parquetGroupType.getFields());
void checkGroupContains(Type subType) { if (subType.isPrimitive()) { throw new InvalidRecordException(subType + " found: expected " + this); } List<Type> fields = subType.asGroupType().getFields(); for (Type otherType : fields) { Type thisType = this.getType(otherType.getName()); thisType.checkContains(otherType); } }
private FieldSchema getComplexFieldSchema(String fieldName, Type parquetType) throws FrontendException { GroupType parquetGroupType = parquetType.asGroupType(); OriginalType originalType = parquetGroupType.getOriginalType(); if (originalType != null) { switch(originalType) { case MAP: if (parquetGroupType.getFieldCount() != 1 || parquetGroupType.getType(0).isPrimitive()) { throw new SchemaConversionException("Invalid map type " + parquetGroupType); GroupType mapKeyValType = parquetGroupType.getType(0).asGroupType(); if (!mapKeyValType.isRepetition(Repetition.REPEATED) || !mapKeyValType.getOriginalType().equals(OriginalType.MAP_KEY_VALUE) || mapKeyValType.getFieldCount()!=2) { throw new SchemaConversionException("Invalid map type " + parquetGroupType); Type valueType = mapKeyValType.getType(1); Schema s = convertField(valueType); s.getField(0).alias = null; return new FieldSchema(fieldName, s, DataType.MAP); case LIST: if (parquetGroupType.getFieldCount()!= 1 || parquetGroupType.getType(0).isPrimitive()) { throw new SchemaConversionException("Invalid list type " + parquetGroupType ); GroupType tupleType = parquetGroupType.getType(0).asGroupType(); if (!tupleType.isRepetition(Repetition.REPEATED)) { throw new SchemaConversionException("Invalid list type " + parquetGroupType); Schema tupleSchema = new Schema(new FieldSchema(tupleType.getName(), convertFields(tupleType.getFields()), DataType.TUPLE));
private boolean hasMissingRequiredFieldInGroupType(GroupType requested, GroupType fullSchema) { for (Type field : fullSchema.getFields()) { if (requested.containsField(field.getName())) { Type requestedType = requested.getType(field.getName()); // if a field is in requested schema and the type of it is a group type, then do recursive check if (!field.isPrimitive()) { if (hasMissingRequiredFieldInGroupType(requestedType.asGroupType(), field.asGroupType())) { return true; } else { continue;// check next field } } } else { if (field.getRepetition() == Type.Repetition.REQUIRED) { return true; // if a field is missing in requested schema and it's required } else { continue; // the missing field is not required, then continue checking next field } } } return false; }
if (entryType.getOriginalType() != null) { checkArgument( entryType.getOriginalType() == MAP_KEY_VALUE, "Expected MAP column '%s' field to be type %s, but is %s", columnName, GroupType entryGroupType = entryType.asGroupType(); checkArgument( entryGroupType.getFieldCount() == 2, "Expected MAP column '%s' entry to have two fields, but has %s fields", columnName, entryGroupType.getFieldCount()); checkArgument( entryGroupType.getFieldName(0).equals("key"), "Expected MAP column '%s' entry field 0 to be named 'key', but is named %s", columnName, entryGroupType.getFieldName(0)); checkArgument( entryGroupType.getFieldName(1).equals("value"), "Expected MAP column '%s' entry field 1 to be named 'value', but is named %s", columnName, entryGroupType.getFieldName(1)); checkArgument( entryGroupType.getType(0).isPrimitive(), "Expected MAP column '%s' entry field 0 to be primitive, but is %s", columnName, entryGroupType.getType(0)); keyConverter = createConverter(prestoType.getTypeParameters().get(0), columnName + ".key", entryGroupType.getFields().get(0), 0);
private boolean contains(GroupType group, String[] path, int index) { if (index == path.length) { return false; } if (group.containsField(path[index])) { Type type = group.getType(path[index]); if (type.isPrimitive()) { return index + 1 == path.length; } else { return contains(type.asGroupType(), path, index + 1); } } return false; }
private void visitChildren(GroupColumnIO newIO, GroupType groupType, GroupType requestedGroupType) { GroupColumnIO oldIO = current; current = newIO; for (Type type : groupType.getFields()) { // if the file schema does not contain the field it will just stay null if (requestedGroupType.containsField(type.getName())) { currentRequestedIndex = requestedGroupType.getFieldIndex(type.getName()); currentRequestedType = requestedGroupType.getType(currentRequestedIndex); if (currentRequestedType.getRepetition().isMoreRestrictiveThan(type.getRepetition())) { incompatibleSchema(type, currentRequestedType); } type.accept(this); } } current = oldIO; }
public void add(int fieldIndex, Primitive value) { Type type = this.schema.getType(fieldIndex); List<Object> list = this.data[fieldIndex]; if (!type.isRepetition(REPEATED) && !list.isEmpty()) { throw new IllegalStateException( "field " + fieldIndex + " (" + type.getName() + ") can not have more than one value: " + list); } else { list.add(value); } }
public static GroupType listType(Repetition repetition, String alias, Type nestedType) { GroupType repeatedField; if (nestedType.isPrimitive()) { repeatedField = new GroupType( Repetition.REPEATED, "bag", nestedType); } else { final GroupType nestedGroupType = nestedType.asGroupType(); repeatedField = new GroupType( Repetition.REPEATED, nestedGroupType.getName(), nestedGroupType.getFields()); } return listWrapper( repetition, alias, LIST, repeatedField ); } }
public Group addGroup(int fieldIndex) { ParquetGroup g = new ParquetGroup(this.schema.getType(fieldIndex).asGroupType()); this.data[fieldIndex].add(g); return g; }
List<Type> newFields = new ArrayList<Type>(); for (Type type : this.getFields()) { Type merged; if (toMerge.containsField(type.getName())) { Type fieldToMerge = toMerge.getType(type.getName()); if (fieldToMerge.getRepetition().isMoreRestrictiveThan(type.getRepetition())) { throw new IncompatibleSchemaModificationException("repetition constraint is more restrictive: can not merge type " + fieldToMerge + " into " + type); for (Type type : toMerge.getFields()) { if (!this.containsField(type.getName())) { newFields.add(type);
/** * {@inheritDoc} */ public void endGroup() { delegate.endGroup(); validateMissingFields(types.peek().asGroupType().getFieldCount()); types.pop(); previousField.pop(); }
/** * {@inheritDoc} */ @Override protected boolean typeEquals(Type other) { Type otherType = (Type) other; if (otherType.isPrimitive()) { return false; } else { GroupType groupType = otherType.asGroupType(); return getRepetition() == groupType.getRepetition() && getName().equals(groupType.getName()) && getFields().equals(groupType.getFields()); } }
@Override protected Type union(Type toMerge, boolean strict) { if (toMerge.isPrimitive()) { throw new IncompatibleSchemaModificationException("can not merge primitive type " + toMerge + " into group type " + this); } return new GroupType(toMerge.getRepetition(), getName(), mergeFields(toMerge.asGroupType())); }
public ParquetListConverter(Type prestoType, String columnName, GroupType listType, int fieldIndex) { checkArgument( listType.getFieldCount() == 1, "Expected LIST column '%s' to only have one field, but has %s fields", columnName, listType.getFieldCount()); checkArgument(ARRAY.equals(prestoType.getTypeSignature().getBase())); this.arrayType = prestoType; this.fieldIndex = fieldIndex; // The Parquet specification requires that the element value of a // LIST type be wrapped in an inner repeated group, like so: // // optional group listField (LIST) { // repeated group list { // optional int element // } // } // // However, some parquet libraries don't follow this spec. The // compatibility rules used here are specified in the Parquet // documentation at http://git.io/vOpNz. parquet.schema.Type elementType = listType.getType(0); if (isElementType(elementType, listType.getName())) { elementConverter = createConverter(prestoType.getTypeParameters().get(0), columnName + ".element", elementType, 0); } else { elementConverter = new ParquetListEntryConverter(prestoType.getTypeParameters().get(0), columnName, elementType.asGroupType()); } }