public GroupDataWriter(StructObjectInspector inspector, GroupType groupType) { this.inspector = inspector; structFields = this.inspector.getAllStructFieldRefs(); structWriters = new DataWriter[structFields.size()]; for (int i = 0; i < structFields.size(); i++) { StructField field = structFields.get(i); structWriters[i] = createWriter(field.getFieldObjectInspector(), groupType.getType(i)); } }
public ListDataWriter(ListObjectInspector inspector, GroupType groupType) { this.inspector = inspector; // Get the internal array structure GroupType repeatedType = groupType.getType(0).asGroupType(); this.repeatedGroupName = repeatedType.getName(); Type elementType = repeatedType.getType(0); this.elementName = elementType.getName(); ObjectInspector elementInspector = this.inspector.getListElementObjectInspector(); this.elementWriter = createWriter(elementInspector, elementType); }
public MapDataWriter(MapObjectInspector inspector, GroupType groupType) { this.inspector = inspector; // Get the internal map structure (MAP_KEY_VALUE) GroupType repeatedType = groupType.getType(0).asGroupType(); this.repeatedGroupName = repeatedType.getName(); // Get key element information Type keyType = repeatedType.getType(0); ObjectInspector keyInspector = this.inspector.getMapKeyObjectInspector(); this.keyName = keyType.getName(); this.keyWriter = createWriter(keyInspector, keyType); // Get value element information Type valuetype = repeatedType.getType(1); ObjectInspector valueInspector = this.inspector.getMapValueObjectInspector(); this.valueName = valuetype.getName(); this.valueWriter = createWriter(valueInspector, valuetype); }
public ElementConverter(GroupType repeatedType, HiveGroupConverter parent, TypeInfo hiveTypeInfo) { setMetadata(parent.getMetadata()); this.parent = parent; this.elementConverter = getConverterFromDescription( repeatedType.getType(0), 0, this, hiveTypeInfo); }
public KeyValueConverter(GroupType keyValueType, HiveGroupConverter parent, TypeInfo hiveTypeInfo) { setMetadata(parent.getMetadata()); this.parent = parent; this.keyConverter = getConverterFromDescription( keyValueType.getType(0), 0, this, hiveTypeInfo == null ? null : ((MapTypeInfo) hiveTypeInfo).getMapKeyTypeInfo()); this.valueConverter = getConverterFromDescription( keyValueType.getType(1), 1, this, hiveTypeInfo == null ? null : ((MapTypeInfo) hiveTypeInfo).getMapValueTypeInfo()); }
/** * Prints the given group in the row of Parquet file. * * @param g The given group. */ private static void printGroup(Group g) { int fieldCnt = g.getType().getFieldCount(); for (int field = 0; field < fieldCnt; field++) { int valCnt = g.getFieldRepetitionCount(field); Type fieldType = g.getType().getType(field); String fieldName = fieldType.getName(); for (int idx = 0; idx < valCnt; idx++) { if (fieldType.isPrimitive()) System.out.println(fieldName + " " + g.getValueToString(field, idx)); else printGroup(g.getGroup(field, idx)); } } System.out.println(); }
TypeInfo elemType = ((ListTypeInfo) colType).getListElementTypeInfo(); if (elemType.getCategory() == ObjectInspector.Category.STRUCT) { Type subFieldType = fieldType.asGroupType().getType(0); if (!subFieldType.isPrimitive()) { String subFieldName = subFieldType.getName(); if (name.equals(ParquetHiveSerDe.ARRAY) || name.equals(ParquetHiveSerDe.LIST)) { subFieldType = new GroupType(Repetition.REPEATED, subFieldName, getProjectedType(elemType, subFieldType.asGroupType().getType(0))); } else { subFieldType = getProjectedType(elemType, subFieldType);
private HiveCollectionConverter(GroupType collectionType, ConverterParent parent, int index, boolean isMap, TypeInfo hiveTypeInfo) { setMetadata(parent.getMetadata()); this.collectionType = collectionType; this.parent = parent; this.index = index; Type repeatedType = collectionType.getType(0); if (isMap) { this.innerConverter = new KeyValueConverter( repeatedType.asGroupType(), this, hiveTypeInfo); } else if (isElementType(repeatedType, collectionType.getName())) { this.innerConverter = getConverterFromDescription(repeatedType, 0, this, extractListCompatibleType(hiveTypeInfo)); } else { this.innerConverter = new ElementConverter( repeatedType.asGroupType(), this, extractListCompatibleType(hiveTypeInfo)); } }
public ElementConverter(GroupType repeatedType, Schema elementSchema, GenericData model) { Type elementType = repeatedType.getType(0); Schema nonNullElementSchema = AvroSchemaConverter.getNonNull(elementSchema); this.elementConverter = newConverter(nonNullElementSchema, elementType, model, new ParentValueContainer() { @Override @SuppressWarnings("unchecked") public void add(Object value) { ElementConverter.this.element = value; } }); }
/** * {@inheritDoc} */ public void startGroup() { previousField.push(-1); types.push(types.peek().asGroupType().getType(fields.peek())); delegate.startGroup(); }
private void add(int fieldIndex, Primitive value) { Type type = schema.getType(fieldIndex); List<Object> list = data[fieldIndex]; if (!type.isRepetition(Type.Repetition.REPEATED) && !list.isEmpty()) { throw new IllegalStateException("field "+fieldIndex+" (" + type.getName() + ") can not have more than one value: " + list); } list.add(value); }
@Override protected int getMaxRepetitionLevel(String[] path, int depth) { int myVal = isRepetition(Repetition.REPEATED) ? 1 : 0; if (depth == path.length) { return myVal; } return myVal + getType(path[depth]).getMaxRepetitionLevel(path, depth + 1); }
private void validateMissingFields(int index) { for (int i = previousField.peek() + 1; i < index; i++) { Type type = types.peek().asGroupType().getType(i); if (type.isRepetition(Repetition.REQUIRED)) { throw new InvalidRecordException("required field is missing " + type); } } }
@Override protected int getMaxDefinitionLevel(String[] path, int depth) { int myVal = !isRepetition(Repetition.REQUIRED) ? 1 : 0; if (depth == path.length) { return myVal; } return myVal + getType(path[depth]).getMaxDefinitionLevel(path, depth + 1); }
public TupleConverter(GroupType parquetSchema) { int schemaSize = parquetSchema.getFieldCount(); this.converters = new Converter[schemaSize]; for (int i = 0; i < schemaSize; i++) { Type type = parquetSchema.getType(i); converters[i] = newConverter(type, i); } }
public MapConverter(ParentValueContainer parent, GroupType mapType, Schema mapSchema, GenericData model) { this.parent = parent; GroupType repeatedKeyValueType = mapType.getType(0).asGroupType(); this.keyValueConverter = new MapKeyValueConverter( repeatedKeyValueType, mapSchema, model); }
@Override public ParquetValueWriter<?> struct(GroupType struct, List<ParquetValueWriter<?>> fieldWriters) { List<Type> fields = struct.getFields(); List<ParquetValueWriter<?>> writers = Lists.newArrayListWithExpectedSize(fieldWriters.size()); for (int i = 0; i < fields.size(); i += 1) { Type fieldType = struct.getType(i); int fieldD = type.getMaxDefinitionLevel(path(fieldType.getName())); writers.add(option(fieldType, fieldD, fieldWriters.get(i))); } return new RecordWriter(writers); }
@Override public ParquetValueWriter<?> struct(GroupType struct, List<ParquetValueWriter<?>> fieldWriters) { List<Type> fields = struct.getFields(); List<ParquetValueWriter<?>> writers = Lists.newArrayListWithExpectedSize(fieldWriters.size()); for (int i = 0; i < fields.size(); i += 1) { Type fieldType = struct.getType(i); int fieldD = type.getMaxDefinitionLevel(path(fieldType.getName())); writers.add(option(fieldType, fieldD, fieldWriters.get(i))); } return new RecordWriter(writers); }
private OriginalType getOriginalType(Type type, String[] path, int depth) { if (type.isPrimitive()) { return type.getOriginalType(); } Type t = ((GroupType) type).getType(path[depth]); return getOriginalType(t, path, depth + 1); }
@Override public ParquetValueReader<?> list(Types.ListType expectedList, GroupType array, ParquetValueReader<?> elementReader) { GroupType repeated = array.getFields().get(0).asGroupType(); String[] repeatedPath = currentPath(); int repeatedD = type.getMaxDefinitionLevel(repeatedPath) - 1; int repeatedR = type.getMaxRepetitionLevel(repeatedPath) - 1; Type elementType = repeated.getType(0); int elementD = type.getMaxDefinitionLevel(path(elementType.getName())) - 1; return new ArrayReader<>(repeatedD, repeatedR, option(elementType, elementD, elementReader)); }