/** * check if a parquet type is a valid 'list' type */ private static boolean isLogicalListType(Type listType) { return !listType.isPrimitive() && listType.getOriginalType() != null && listType.getOriginalType().equals(OriginalType.LIST) && listType.asGroupType().getFieldCount() == 1 && listType.asGroupType().getFields().get(0).isRepetition(Type.Repetition.REPEATED); }
public ListDataWriter(ListObjectInspector inspector, GroupType groupType) { this.inspector = inspector; // Get the internal array structure GroupType repeatedType = groupType.getType(0).asGroupType(); this.repeatedGroupName = repeatedType.getName(); Type elementType = repeatedType.getType(0); this.elementName = elementType.getName(); ObjectInspector elementInspector = this.inspector.getListElementObjectInspector(); this.elementWriter = createWriter(elementInspector, elementType); }
private static long getMinValue(final PrimitiveType type, String typeName, long defaultValue) { if (OriginalType.UINT_8 == type.getOriginalType() || OriginalType.UINT_16 == type.getOriginalType() || OriginalType.UINT_32 == type.getOriginalType() || OriginalType.UINT_64 == type.getOriginalType()) { return 0; } else { switch (typeName) { case serdeConstants.INT_TYPE_NAME: return Integer.MIN_VALUE; case serdeConstants.SMALLINT_TYPE_NAME: return Short.MIN_VALUE; case serdeConstants.TINYINT_TYPE_NAME: return Byte.MIN_VALUE; default: return defaultValue; } } }
/** * check if a parquet type is a valid 'map' type */ private static boolean isLogicalMapType(Type groupType) { OriginalType ot = groupType.getOriginalType(); if (groupType.isPrimitive() || ot == null || groupType.isRepetition(Type.Repetition.REPEATED)) { return false; } if (groupType.getOriginalType().equals(OriginalType.MAP) || groupType.getOriginalType().equals(OriginalType.MAP_KEY_VALUE)) { GroupType myMapType = groupType.asGroupType(); if (myMapType.getFieldCount() != 1 || myMapType.getFields().get(0).isPrimitive()) { return false; } GroupType mapItemType = myMapType.getFields().get(0).asGroupType(); return mapItemType.isRepetition(Type.Repetition.REPEATED) && mapItemType.getFieldCount() == 2 && mapItemType.getFields().get(0).getName().equalsIgnoreCase("key") && mapItemType.getFields().get(0).isPrimitive() && mapItemType.getFields().get(1).getName().equalsIgnoreCase("value"); } return false; }
private static boolean isElementType(Type repeatedType, String parentName) { if (repeatedType.isPrimitive() || (repeatedType.asGroupType().getFieldCount() != 1)) { return true; } else if (repeatedType.getName().equals("array")) { return true; // existing avro data } else if (repeatedType.getName().equals(parentName + "_tuple")) { return true; // existing thrift data } // false for the following cases: // * name is "list", which matches the spec // * name is "bag", which indicates existing hive or pig data // * ambiguous case, which should be assumed is 3-level according to spec return false; } }
private PrimitiveType getElementType(Type type) { if (type.isPrimitive()) { return type.asPrimitiveType(); } if (type.asGroupType().getFields().size() > 1) { throw new RuntimeException( "Current Parquet Vectorization reader doesn't support nested type"); } return type.asGroupType().getFields().get(0).asGroupType().getFields().get(0) .asPrimitiveType(); }
static boolean isWrappedListPrimitive(Object o) { if (o instanceof Group) { Group g = (Group) o; return g.getType().isRepetition(Type.Repetition.REPEATED) && !g.getType().isPrimitive() && g.getType().asGroupType().getFieldCount() == 1 && g.getType().getFields().get(0).isPrimitive(); } return false; }
protected static Converter getConverterFromDescription(Type type, int index, ConverterParent parent, TypeInfo hiveTypeInfo) { if (type == null) { return null; } if (type.isPrimitive()) { return getConverterFromDescription(type.asPrimitiveType(), index, parent, hiveTypeInfo); } return getConverterFromDescription(type.asGroupType(), index, parent, hiveTypeInfo); }
/** * convert a repeated field into a list of primitives or groups */ private static List<Object> convertRepeatedFieldToList(Group g, int fieldIndex, boolean binaryAsString) { Type t = g.getType().getFields().get(fieldIndex); assert t.getRepetition().equals(Type.Repetition.REPEATED); int repeated = g.getFieldRepetitionCount(fieldIndex); List<Object> vals = new ArrayList<>(); for (int i = 0; i < repeated; i++) { if (t.isPrimitive()) { vals.add(convertPrimitiveField(g, fieldIndex, i, binaryAsString)); } else { vals.add(g.getGroup(fieldIndex, i)); } } return vals; }
/** * Searchs for a fieldName into a parquet GroupType by ignoring string case. * GroupType#getType(String fieldName) is case sensitive, so we use this method. * * @param groupType Group of field types where to search for fieldName * @param fieldName The field what we are searching * @return The Type object of the field found; null otherwise. */ private static Type getFieldTypeIgnoreCase(GroupType groupType, String fieldName) { for (Type type : groupType.getFields()) { if (type.getName().equalsIgnoreCase(fieldName)) { return type; } } return null; }
/** * Searches column names by name on a given Parquet message schema, and returns its projected * Parquet schema types. * * @param schema Message type schema where to search for column names. * @param colNames List of column names. * @param colTypes List of column types. * @return A MessageType object of projected columns. */ public static MessageType getSchemaByName(MessageType schema, List<String> colNames, List<TypeInfo> colTypes) { List<Type> projectedFields = getProjectedGroupFields(schema, colNames, colTypes); Type[] typesArray = projectedFields.toArray(new Type[0]); return Types.buildMessage() .addFields(typesArray) .named(schema.getName()); }
/** * Check the underlying Parquet file is able to parse as Hive Decimal type. * * @param type */ protected void decimalTypeCheck(Type type) { DecimalMetadata decimalMetadata = type.asPrimitiveType().getDecimalMetadata(); if (decimalMetadata == null) { throw new UnsupportedOperationException("The underlying Parquet type cannot be able to " + "converted to Hive Decimal type: " + type); } }
private static GroupType buildProjectedGroupType( GroupType originalType, List<Type> types) { if (types == null || types.isEmpty()) { return null; } return new GroupType(originalType.getRepetition(), originalType.getName(), types); }
public HiveStructConverter(final GroupType selectedGroupType, final ConverterParent parent, final int index, final GroupType containingGroupType, TypeInfo hiveTypeInfo) { this.parent = parent; this.index = index; this.totalFieldCount = containingGroupType.getFieldCount(); init(selectedGroupType, parent, index, containingGroupType, hiveTypeInfo); }
@Override public int length(final Object o) { if (o instanceof List) { return ((List) o).size(); } else if (o instanceof Group) { // both lists and maps are 'Group' type, but we should only have a group here in a map context Group g = (Group) o; return g.getType().getFields().size(); } else { return 0; } }
public static MessageType convert(final List<String> columnNames, final List<TypeInfo> columnTypes) { final MessageType schema = new MessageType("hive_schema", convertTypes(columnNames, columnTypes)); return schema; }
private void writeParquetRecord(String schema, ParquetHiveRecord record) throws SerDeException { MessageType fileSchema = MessageTypeParser.parseMessageType(schema); DataWritableWriter hiveParquetWriter = new DataWritableWriter(mockRecordConsumer, fileSchema); hiveParquetWriter.write(record); }
public static void setSchema(final MessageType schema, final Configuration configuration) { configuration.set(PARQUET_HIVE_SCHEMA, schema.toString()); }
public MapDataWriter(MapObjectInspector inspector, GroupType groupType) { this.inspector = inspector; // Get the internal map structure (MAP_KEY_VALUE) GroupType repeatedType = groupType.getType(0).asGroupType(); this.repeatedGroupName = repeatedType.getName(); // Get key element information Type keyType = repeatedType.getType(0); ObjectInspector keyInspector = this.inspector.getMapKeyObjectInspector(); this.keyName = keyType.getName(); this.keyWriter = createWriter(keyInspector, keyType); // Get value element information Type valuetype = repeatedType.getType(1); ObjectInspector valueInspector = this.inspector.getMapValueObjectInspector(); this.valueName = valuetype.getName(); this.valueWriter = createWriter(valueInspector, valuetype); }
public HiveStructConverter(final GroupType requestedSchema, final GroupType tableSchema, Map<String, String> metadata, TypeInfo hiveTypeInfo) { setMetadata(metadata); this.reuseWritableArray = true; this.writables = new Writable[tableSchema.getFieldCount()]; this.parent = null; this.index = 0; this.totalFieldCount = tableSchema.getFieldCount(); init(requestedSchema, null, 0, tableSchema, hiveTypeInfo); }