public HiveStructConverter(final GroupType selectedGroupType, final ConverterParent parent, final int index, final GroupType containingGroupType, TypeInfo hiveTypeInfo) { this.parent = parent; this.index = index; this.totalFieldCount = containingGroupType.getFieldCount(); init(selectedGroupType, parent, index, containingGroupType, hiveTypeInfo); }
public HiveStructConverter(final GroupType requestedSchema, final GroupType tableSchema, Map<String, String> metadata, TypeInfo hiveTypeInfo) { setMetadata(metadata); this.reuseWritableArray = true; this.writables = new Writable[tableSchema.getFieldCount()]; this.parent = null; this.index = 0; this.totalFieldCount = tableSchema.getFieldCount(); init(requestedSchema, null, 0, tableSchema, hiveTypeInfo); }
private static boolean isElementType(Type repeatedType, String parentName) { if (repeatedType.isPrimitive() || (repeatedType.asGroupType().getFieldCount() != 1)) { return true; } else if (repeatedType.getName().equals("array")) { return true; // existing avro data } else if (repeatedType.getName().equals(parentName + "_tuple")) { return true; // existing thrift data } // false for the following cases: // * name is "list", which matches the spec // * name is "bag", which indicates existing hive or pig data // * ambiguous case, which should be assumed is 3-level according to spec return false; } }
/** * check if a parquet type is a valid 'list' type */ private static boolean isLogicalListType(Type listType) { return !listType.isPrimitive() && listType.getOriginalType() != null && listType.getOriginalType().equals(OriginalType.LIST) && listType.asGroupType().getFieldCount() == 1 && listType.asGroupType().getFields().get(0).isRepetition(Type.Repetition.REPEATED); }
/** * check if a parquet type is a valid 'map' type */ private static boolean isLogicalMapType(Type groupType) { OriginalType ot = groupType.getOriginalType(); if (groupType.isPrimitive() || ot == null || groupType.isRepetition(Type.Repetition.REPEATED)) { return false; } if (groupType.getOriginalType().equals(OriginalType.MAP) || groupType.getOriginalType().equals(OriginalType.MAP_KEY_VALUE)) { GroupType myMapType = groupType.asGroupType(); if (myMapType.getFieldCount() != 1 || myMapType.getFields().get(0).isPrimitive()) { return false; } GroupType mapItemType = myMapType.getFields().get(0).asGroupType(); return mapItemType.isRepetition(Type.Repetition.REPEATED) && mapItemType.getFieldCount() == 2 && mapItemType.getFields().get(0).getName().equalsIgnoreCase("key") && mapItemType.getFields().get(0).isPrimitive() && mapItemType.getFields().get(1).getName().equalsIgnoreCase("value"); } return false; }
static boolean isWrappedListPrimitive(Object o) { if (o instanceof Group) { Group g = (Group) o; return g.getType().isRepetition(Type.Repetition.REPEATED) && !g.getType().isPrimitive() && g.getType().asGroupType().getFieldCount() == 1 && g.getType().getFields().get(0).isPrimitive(); } return false; }
while (groupType.getFieldCount() < 2) { if (nestGroup > MAP_DEFINITION_LEVEL_MAX) { throw new RuntimeException(
/** * Prints the given group in the row of Parquet file. * * @param g The given group. */ private static void printGroup(Group g) { int fieldCnt = g.getType().getFieldCount(); for (int field = 0; field < fieldCnt; field++) { int valCnt = g.getFieldRepetitionCount(field); Type fieldType = g.getType().getType(field); String fieldName = fieldType.getName(); for (int idx = 0; idx < valCnt; idx++) { if (fieldType.isPrimitive()) System.out.println(fieldName + " " + g.getValueToString(field, idx)); else printGroup(g.getGroup(field, idx)); } } System.out.println(); }
private void init(final GroupType selectedGroupType, final ConverterParent parent, final int index, final GroupType containingGroupType, TypeInfo hiveTypeInfo) { if (parent != null) { setMetadata(parent.getMetadata()); } final int selectedFieldCount = selectedGroupType.getFieldCount(); converters = new Converter[selectedFieldCount]; this.repeatedConverters = new ArrayList<Repeated>(); if (hiveTypeInfo != null && hiveTypeInfo.getCategory().equals(ObjectInspector.Category.STRUCT)) { this.hiveFieldNames = ((StructTypeInfo) hiveTypeInfo).getAllStructFieldNames(); this.hiveFieldTypeInfos = ((StructTypeInfo) hiveTypeInfo).getAllStructFieldTypeInfos(); } List<Type> selectedFields = selectedGroupType.getFields(); for (int i = 0; i < selectedFieldCount; i++) { Type subtype = selectedFields.get(i); if (isSubType(containingGroupType, subtype)) { int fieldIndex = containingGroupType.getFieldIndex(subtype.getName()); TypeInfo _hiveTypeInfo = getFieldTypeIgnoreCase(hiveTypeInfo, subtype.getName(), fieldIndex); converters[i] = getFieldConverter(subtype, fieldIndex, _hiveTypeInfo); } else { throw new IllegalStateException("Group type [" + containingGroupType + "] does not contain requested field: " + subtype); } } }
@Test public void testMapOriginalType() throws Exception { final String hiveColumnTypes = "map<string,string>"; final String hiveColumnNames = "mapCol"; final List<String> columnNames = createHiveColumnsFrom(hiveColumnNames); final List<TypeInfo> columnTypes = createHiveTypeInfoFrom(hiveColumnTypes); final MessageType messageTypeFound = HiveSchemaConverter.convert(columnNames, columnTypes); // this messageType only has one optional field, whose name is mapCol, original Type is MAP assertEquals(1, messageTypeFound.getFieldCount()); org.apache.parquet.schema.Type topLevel = messageTypeFound.getFields().get(0); assertEquals("mapCol",topLevel.getName()); assertEquals(OriginalType.MAP, topLevel.getOriginalType()); assertEquals(Repetition.OPTIONAL, topLevel.getRepetition()); assertEquals(1, topLevel.asGroupType().getFieldCount()); org.apache.parquet.schema.Type secondLevel = topLevel.asGroupType().getFields().get(0); //there is one repeated field for mapCol, the field name is "map" and its original Type is MAP_KEY_VALUE; assertEquals("map", secondLevel.getName()); assertEquals(OriginalType.MAP_KEY_VALUE, secondLevel.getOriginalType()); assertEquals(Repetition.REPEATED, secondLevel.getRepetition()); } }
@SuppressWarnings("unchecked") public SimpleGroup(GroupType schema) { this.schema = schema; this.data = new List[schema.getFields().size()]; for (int i = 0; i < schema.getFieldCount(); i++) { this.data[i] = new ArrayList<Object>(); } }
private void visitChildren(final List<SchemaElement> result, GroupType groupType, SchemaElement element) { element.setNum_children(groupType.getFieldCount()); result.add(element); for (org.apache.parquet.schema.Type field : groupType.getFields()) { addToList(result, field); } } });
private void visitChildren(final List<SchemaElement> result, GroupType groupType, SchemaElement element) { element.setNum_children(groupType.getFieldCount()); result.add(element); for (org.apache.parquet.schema.Type field : groupType.getFields()) { addToList(result, field); } } });
private void writeProperties(final Properties properties, final GroupType type, final int startIndex) throws SerialisationException { for (int i = startIndex; i < type.getFieldCount(); i++) { final String fieldName = type.getFieldName(i); final String columnName; if (fieldName.contains("_")) { columnName = fieldName.substring(0, fieldName.indexOf("_")); } else { columnName = fieldName; } i = writeGafferObject(columnName, properties.get(columnName), type, i) - 1; } }
public TupleConverter(GroupType parquetSchema) { int schemaSize = parquetSchema.getFieldCount(); this.converters = new Converter[schemaSize]; for (int i = 0; i < schemaSize; i++) { Type type = parquetSchema.getType(i); converters[i] = newConverter(type, i); } }
/** * {@inheritDoc} */ public void endMessage() { delegate.endMessage(); validateMissingFields(types.peek().asGroupType().getFieldCount()); previousField.pop(); }
public ParquetTupleConverter(GroupType parquetSchema) { int schemaSize = parquetSchema.getFieldCount(); this.converters = new Converter[schemaSize]; for (int i = 0; i < schemaSize; i++) { Type type = parquetSchema.getType(i); converters[i] = newConverter(type, i); } }
public ParquetTupleConverter(GroupType parquetSchema) { int schemaSize = parquetSchema.getFieldCount(); this.converters = new Converter[schemaSize]; for (int i = 0; i < schemaSize; i++) { Type type = parquetSchema.getType(i); converters[i] = newConverter(type, i); } }
@Override public void visit(GroupType groupType) { if (groupType.getFieldCount() <= 0) { throw new InvalidSchemaException( "Cannot write a schema with an empty group: " + groupType); } for (Type type : groupType.getFields()) { type.accept(this); } }
private static <T> List<T> visitFields(Types.StructType struct, GroupType group, TypeWithSchemaVisitor<T> visitor) { List<T> results = Lists.newArrayListWithExpectedSize(group.getFieldCount()); for (Type field : group.getFields()) { int id = -1; if (field.getId() != null) { id = field.getId().intValue(); } Types.NestedField iField = (struct != null && id >= 0) ? struct.field(id) : null; results.add(visitField(iField, field, visitor)); } return results; }