public static TupleDomain<ColumnDescriptor> getParquetTupleDomain(Map<List<String>, RichColumnDescriptor> descriptorsByPath, TupleDomain<HiveColumnHandle> effectivePredicate) { if (effectivePredicate.isNone()) { return TupleDomain.none(); } ImmutableMap.Builder<ColumnDescriptor, Domain> predicate = ImmutableMap.builder(); for (Entry<HiveColumnHandle, Domain> entry : effectivePredicate.getDomains().get().entrySet()) { HiveColumnHandle columnHandle = entry.getKey(); // skip looking up predicates for complex types as Parquet only stores stats for primitives if (!columnHandle.getHiveType().getCategory().equals(PRIMITIVE)) { continue; } RichColumnDescriptor descriptor = descriptorsByPath.get(ImmutableList.of(columnHandle.getName())); if (descriptor != null) { predicate.put(descriptor, entry.getValue()); } } return TupleDomain.withColumnDomains(predicate.build()); }
private boolean canCoerceForStruct(HiveType fromHiveType, HiveType toHiveType) { if (!fromHiveType.getCategory().equals(Category.STRUCT) || !toHiveType.getCategory().equals(Category.STRUCT)) { return false; } List<String> fromFieldNames = ((StructTypeInfo) fromHiveType.getTypeInfo()).getAllStructFieldNames(); List<String> toFieldNames = ((StructTypeInfo) toHiveType.getTypeInfo()).getAllStructFieldNames(); List<HiveType> fromFieldTypes = extractStructFieldTypes(fromHiveType); List<HiveType> toFieldTypes = extractStructFieldTypes(toHiveType); // Rule: // * Fields may be added or dropped from the end. // * For all other field indices, the corresponding fields must have // the same name, and the type must be coercible. for (int i = 0; i < min(fromFieldTypes.size(), toFieldTypes.size()); i++) { if (!fromFieldNames.get(i).equals(toFieldNames.get(i))) { return false; } if (!fromFieldTypes.get(i).equals(toFieldTypes.get(i)) && !canCoerce(fromFieldTypes.get(i), toFieldTypes.get(i))) { return false; } } return true; } }
private void prepareInSet(DeferredObject[] arguments) throws HiveException { constantInSet = new HashSet<Object>(); if (compareOI.getCategory().equals(ObjectInspector.Category.PRIMITIVE)) { for (int i = 1; i < arguments.length; ++i) { constantInSet.add(((PrimitiveObjectInspector) compareOI) .getPrimitiveJavaObject(conversionHelper .convertIfNecessary(arguments[i].get(), argumentOIs[i]))); } } else { for (int i = 1; i < arguments.length; ++i) { constantInSet.add(((ConstantObjectInspector) argumentOIs[i]).getWritableConstantValue()); } } }
@Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver; returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true); checkArgsSize(arguments, 1, 1); switch(arguments[0].getCategory()) { case LIST: if(!((ListObjectInspector)(arguments[0])).getListElementObjectInspector() .getCategory().equals(ObjectInspector.Category.UNION)) { break; } default: throw new UDFArgumentTypeException(0, "Argument 1" + " of function SORT_ARRAY must be " + serdeConstants.LIST_TYPE_NAME + ", and element type should be either primitive, list, struct, or map, " + "but " + arguments[0].getTypeName() + " was found."); } ObjectInspector elementObjectInspector = ((ListObjectInspector)(arguments[0])).getListElementObjectInspector(); argumentOIs = arguments; converters = new Converter[arguments.length]; ObjectInspector returnOI = returnOIResolver.get(elementObjectInspector); converters[0] = ObjectInspectorConverters.getConverter(elementObjectInspector, returnOI); return ObjectInspectorFactory.getStandardListObjectInspector(returnOI); }
private void init(final GroupType selectedGroupType, final ConverterParent parent, final int index, final GroupType containingGroupType, TypeInfo hiveTypeInfo) { if (parent != null) { setMetadata(parent.getMetadata()); } final int selectedFieldCount = selectedGroupType.getFieldCount(); converters = new Converter[selectedFieldCount]; this.repeatedConverters = new ArrayList<Repeated>(); if (hiveTypeInfo != null && hiveTypeInfo.getCategory().equals(ObjectInspector.Category.STRUCT)) { this.hiveFieldNames = ((StructTypeInfo) hiveTypeInfo).getAllStructFieldNames(); this.hiveFieldTypeInfos = ((StructTypeInfo) hiveTypeInfo).getAllStructFieldTypeInfos(); } List<Type> selectedFields = selectedGroupType.getFields(); for (int i = 0; i < selectedFieldCount; i++) { Type subtype = selectedFields.get(i); if (isSubType(containingGroupType, subtype)) { int fieldIndex = containingGroupType.getFieldIndex(subtype.getName()); TypeInfo _hiveTypeInfo = getFieldTypeIgnoreCase(hiveTypeInfo, subtype.getName(), fieldIndex); converters[i] = getFieldConverter(subtype, fieldIndex, _hiveTypeInfo); } else { throw new IllegalStateException("Group type [" + containingGroupType + "] does not contain requested field: " + subtype); } } }
private TypeInfo getFieldTypeIgnoreCase(TypeInfo hiveTypeInfo, String fieldName, int fieldIndex) { if (hiveTypeInfo == null) { return null; } else if (hiveTypeInfo.getCategory().equals(ObjectInspector.Category.STRUCT)) { return getStructFieldTypeInfo(fieldName, fieldIndex); } else if (hiveTypeInfo.getCategory().equals(ObjectInspector.Category.MAP)) { //This cover the case where hive table may have map<key, value> but the data file is // of type array<struct<value1, value2>> //Using index in place of type name. if (fieldIndex == 0) { return ((MapTypeInfo) hiveTypeInfo).getMapKeyTypeInfo(); } else if (fieldIndex == 1) { return ((MapTypeInfo) hiveTypeInfo).getMapValueTypeInfo(); } else {//Other fields are skipped for this case return null; } } throw new RuntimeException("Unknown hive type info " + hiveTypeInfo + " when searching for field " + fieldName); }
@Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { if (arguments.length != 1) { throw new UDFArgumentLengthException( "The function EWAH_BITMAP_EMPTY(b) takes exactly 1 argument"); } if (arguments[0].getCategory().equals(Category.LIST)) { bitmapOI = (ListObjectInspector) arguments[0]; } else { throw new UDFArgumentTypeException(0, "\"" + Category.LIST.toString().toLowerCase() + "\" is expected at function EWAH_BITMAP_EMPTY, but \"" + arguments[0].getTypeName() + "\" is found"); } boolOI = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; return boolOI; }
@Override protected void serializeField(ByteStream.Output out, Object obj, ObjectInspector objInspector, LazySerDeParameters serdeParams) throws SerDeException { if (!objInspector.getCategory().equals(Category.PRIMITIVE) || (objInspector.getTypeName().equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME))) { //do this for all complex types and binary try { serialize(out, SerDeUtils.getJSONString(obj, objInspector, serdeParams.getNullSequence().toString()), PrimitiveObjectInspectorFactory.javaStringObjectInspector, serdeParams.getSeparators(), 1, serdeParams.getNullSequence(), serdeParams.isEscaped(), serdeParams.getEscapeChar(), serdeParams.getNeedsEscape()); } catch (IOException e) { throw new SerDeException(e); } } else { //primitives except binary super.serializeField(out, obj, objInspector, serdeParams); } } }
private void prepareInSet(DeferredObject[] arguments) throws HiveException { constantInSet = new HashSet<Object>(); if (compareOI.getCategory().equals(ObjectInspector.Category.PRIMITIVE)) { for (int i = 1; i < arguments.length; ++i) { constantInSet.add(((PrimitiveObjectInspector) compareOI) .getPrimitiveJavaObject(conversionHelper .convertIfNecessary(arguments[i].get(), argumentOIs[i]))); } } else { for (int i = 1; i < arguments.length; ++i) { constantInSet.add(((ConstantObjectInspector) argumentOIs[i]).getWritableConstantValue()); } } }
private TypeInfo getFieldTypeIgnoreCase(TypeInfo hiveTypeInfo, String fieldName, int fieldIndex) { if (hiveTypeInfo == null) { return null; } else if (hiveTypeInfo.getCategory().equals(ObjectInspector.Category.STRUCT)) { return getStructFieldTypeInfo(fieldName, fieldIndex); } else if (hiveTypeInfo.getCategory().equals(ObjectInspector.Category.MAP)) { //This cover the case where hive table may have map<key, value> but the data file is // of type array<struct<value1, value2>> //Using index in place of type name. if (fieldIndex == 0) { return ((MapTypeInfo) hiveTypeInfo).getMapKeyTypeInfo(); } else if (fieldIndex == 1) { return ((MapTypeInfo) hiveTypeInfo).getMapValueTypeInfo(); } else {//Other fields are skipped for this case return null; } } throw new RuntimeException("Unknown hive type info " + hiveTypeInfo + " when searching for field " + fieldName); }
byte[] serializeKeyField(Object keyValue, StructField keyField, ColumnMapping keyMapping) throws IOException { if (keyValue == null) { throw new IOException("HBase row key cannot be NULL"); } ObjectInspector keyFieldOI = keyField.getFieldObjectInspector(); if (!keyFieldOI.getCategory().equals(ObjectInspector.Category.PRIMITIVE) && keyMapping.isCategory(ObjectInspector.Category.PRIMITIVE)) { // we always serialize the String type using the escaped algorithm for LazyString return serialize(SerDeUtils.getJSONString(keyValue, keyFieldOI), PrimitiveObjectInspectorFactory.javaStringObjectInspector, 1, false); } // use the serialization option switch to write primitive values as either a variable // length UTF8 string or a fixed width bytes if serializing in binary format boolean writeBinary = keyMapping.binaryStorage.get(0); return serialize(keyValue, keyFieldOI, 1, writeBinary); }
@Override protected int getLength(ObjectInspector objectInspector, ByteArrayRef cachedByteArrayRef, int start, int length) { if (length == 0) { return -1; } Category category = objectInspector.getCategory(); if (category.equals(Category.PRIMITIVE)) { PrimitiveCategory primitiveCategory = ((PrimitiveObjectInspector) objectInspector) .getPrimitiveCategory(); if (primitiveCategory.equals(PrimitiveCategory.STRING) && (length == 1) && (cachedByteArrayRef.getData()[start] == LazyBinaryColumnarSerDe.INVALID_UTF__SINGLE_BYTE[0])) { return 0; } } return length; }
/** * Checks that an inspector matches the category indicated as a parameter. * @param inspector The object inspector to check * @param category The category to match * @throws IllegalArgumentException if inspector does not match the category */ private void checkInspectorCategory(ObjectInspector inspector, ObjectInspector.Category category) { if (!inspector.getCategory().equals(category)) { throw new IllegalArgumentException("Invalid data type: expected " + category + " type, but found: " + inspector.getCategory()); } }
@Override public Writable serialize(final Object obj, final ObjectInspector objInspector) throws SerDeException { if (!objInspector.getCategory().equals(Category.STRUCT)) { throw new SerDeException("Cannot serialize " + objInspector.getCategory() + ". Can only serialize a struct"); } serializedSize = ((StructObjectInspector)objInspector).getAllStructFieldRefs().size(); status = LAST_OPERATION.SERIALIZE; parquetRow.value = obj; parquetRow.inspector= (StructObjectInspector)objInspector; return parquetRow; }
/** * Checks that an inspector matches the category indicated as a parameter. * @param inspector The object inspector to check * @param category The category to match * @throws IllegalArgumentException if inspector does not match the category */ private void checkInspectorCategory(ObjectInspector inspector, ObjectInspector.Category category) { if (!inspector.getCategory().equals(category)) { throw new IllegalArgumentException("Invalid data type: expected " + category + " type, but found: " + inspector.getCategory()); } }
private boolean supportedCategories(TypeInfo ti) { final ObjectInspector.Category c = ti.getCategory(); return c.equals(ObjectInspector.Category.PRIMITIVE) || c.equals(ObjectInspector.Category.MAP) || c.equals(ObjectInspector.Category.LIST) || c.equals(ObjectInspector.Category.STRUCT) || c.equals(ObjectInspector.Category.UNION); }
@Override public Writable serialize(final Object obj, final ObjectInspector objInspector) throws SerDeException { if (!objInspector.getCategory().equals(Category.STRUCT)) { throw new SerDeException("Cannot serialize " + objInspector.getCategory() + ". Can only serialize a struct"); } serializedSize = ((StructObjectInspector)objInspector).getAllStructFieldRefs().size(); status = LAST_OPERATION.SERIALIZE; parquetRow.value = obj; parquetRow.inspector= (StructObjectInspector)objInspector; return parquetRow; }
/** * Checks that an inspector matches the category indicated as a parameter. * * @param inspector The object inspector to check * @param category The category to match * @throws IllegalArgumentException if inspector does not match the category */ private void checkInspectorCategory(ObjectInspector inspector, ObjectInspector.Category category) { if (!inspector.getCategory().equals(category)) { throw new IllegalArgumentException("Invalid data type: expected " + category + " type, but found: " + inspector.getCategory()); } }
private boolean canCoerceForMap(HiveType fromHiveType, HiveType toHiveType) { if (!fromHiveType.getCategory().equals(Category.MAP) || !toHiveType.getCategory().equals(Category.MAP)) { return false; } HiveType fromKeyType = HiveType.valueOf(((MapTypeInfo) fromHiveType.getTypeInfo()).getMapKeyTypeInfo().getTypeName()); HiveType fromValueType = HiveType.valueOf(((MapTypeInfo) fromHiveType.getTypeInfo()).getMapValueTypeInfo().getTypeName()); HiveType toKeyType = HiveType.valueOf(((MapTypeInfo) toHiveType.getTypeInfo()).getMapKeyTypeInfo().getTypeName()); HiveType toValueType = HiveType.valueOf(((MapTypeInfo) toHiveType.getTypeInfo()).getMapValueTypeInfo().getTypeName()); return (fromKeyType.equals(toKeyType) || canCoerce(fromKeyType, toKeyType)) && (fromValueType.equals(toValueType) || canCoerce(fromValueType, toValueType)); }
private boolean canCoerceForList(HiveType fromHiveType, HiveType toHiveType) { if (!fromHiveType.getCategory().equals(Category.LIST) || !toHiveType.getCategory().equals(Category.LIST)) { return false; } HiveType fromElementType = HiveType.valueOf(((ListTypeInfo) fromHiveType.getTypeInfo()).getListElementTypeInfo().getTypeName()); HiveType toElementType = HiveType.valueOf(((ListTypeInfo) toHiveType.getTypeInfo()).getListElementTypeInfo().getTypeName()); return fromElementType.equals(toElementType) || canCoerce(fromElementType, toElementType); }