/** * check if a parquet type is a valid 'list' type */ private static boolean isLogicalListType(Type listType) { return !listType.isPrimitive() && listType.getOriginalType() != null && listType.getOriginalType().equals(OriginalType.LIST) && listType.asGroupType().getFieldCount() == 1 && listType.asGroupType().getFields().get(0).isRepetition(Type.Repetition.REPEATED); }
/** * check if a parquet type is a valid 'map' type */ private static boolean isLogicalMapType(Type groupType) { OriginalType ot = groupType.getOriginalType(); if (groupType.isPrimitive() || ot == null || groupType.isRepetition(Type.Repetition.REPEATED)) { return false; } if (groupType.getOriginalType().equals(OriginalType.MAP) || groupType.getOriginalType().equals(OriginalType.MAP_KEY_VALUE)) { GroupType myMapType = groupType.asGroupType(); if (myMapType.getFieldCount() != 1 || myMapType.getFields().get(0).isPrimitive()) { return false; } GroupType mapItemType = myMapType.getFields().get(0).asGroupType(); return mapItemType.isRepetition(Type.Repetition.REPEATED) && mapItemType.getFieldCount() == 2 && mapItemType.getFields().get(0).getName().equalsIgnoreCase("key") && mapItemType.getFields().get(0).isPrimitive() && mapItemType.getFields().get(1).getName().equalsIgnoreCase("value"); } return false; }
OriginalType originalType = type.getOriginalType(); if (originalType != null && originalType.equals(OriginalType.LIST)) { checkInspectorCategory(inspector, ObjectInspector.Category.LIST); return new ListDataWriter((ListObjectInspector)inspector, groupType); } else if (originalType != null && originalType.equals(OriginalType.MAP)) { checkInspectorCategory(inspector, ObjectInspector.Category.MAP); return new MapDataWriter((MapObjectInspector)inspector, groupType);
private static byte[] roughGuessTypes(MessageType messageType) { byte[] types = new byte[messageType.getPaths().size()]; for (int i = 0; i < types.length; i++) { Type parquetType = messageType.getType(i); assert parquetType.isPrimitive(); switch (parquetType.asPrimitiveType().getPrimitiveTypeName()) { case INT32: case BOOLEAN: case FLOAT: case DOUBLE: types[i] = Vec.T_NUM; break; case INT96: types[i] = Vec.T_TIME; break; case INT64: types[i] = OriginalType.TIMESTAMP_MILLIS.equals(parquetType.getOriginalType()) ? Vec.T_TIME : Vec.T_NUM; break; default: types[i] = Vec.T_BAD; } } return types; }
private PrimitiveConverter newConverter(int colIdx, byte vecType, PrimitiveType parquetType) { switch (vecType) { case Vec.T_BAD: case Vec.T_CAT: case Vec.T_STR: case Vec.T_UUID: case Vec.T_TIME: if (OriginalType.TIMESTAMP_MILLIS.equals(parquetType.getOriginalType()) || parquetType.getPrimitiveTypeName().equals(PrimitiveType.PrimitiveTypeName.INT96)) { return new TimestampConverter(colIdx, _writer); } else { boolean dictSupport = parquetType.getOriginalType() == OriginalType.UTF8 || parquetType.getOriginalType() == OriginalType.ENUM; return new StringConverter(_writer, colIdx, dictSupport); } case Vec.T_NUM: if (OriginalType.DECIMAL.equals(parquetType.getOriginalType())) return new DecimalConverter(colIdx, parquetType.getDecimalMetadata().getScale(), _writer); else return new NumberConverter(colIdx, _writer); default: throw new UnsupportedOperationException("Unsupported type " + vecType); } }
OriginalType originalType = type.getOriginalType(); if (originalType != null && originalType.equals(OriginalType.LIST)) { checkInspectorCategory(inspector, ObjectInspector.Category.LIST); if (singleLevelArray) { else if (originalType != null && (originalType.equals(OriginalType.MAP) || originalType.equals(OriginalType.MAP_KEY_VALUE))) { checkInspectorCategory(inspector, ObjectInspector.Category.MAP); writeMap(value, (MapObjectInspector) inspector, groupType);
OriginalType originalType = type.getOriginalType(); if (originalType != null && originalType.equals(OriginalType.LIST)) { checkInspectorCategory(inspector, ObjectInspector.Category.LIST); if (singleLevelArray) { else if (originalType != null && (originalType.equals(OriginalType.MAP) || originalType.equals(OriginalType.MAP_KEY_VALUE))) { checkInspectorCategory(inspector, ObjectInspector.Category.MAP); writeMap(value, (MapObjectInspector) inspector, groupType);
for (ColumnTypeMetadata_v2 columnTypeMetadata : ((ParquetTableMetadata_v2) parquetTableMetadata).columnTypeInfo.values()) { if (OriginalType.DATE.equals(columnTypeMetadata.originalType)) { names = columnTypeMetadata.name; if (OriginalType.DATE.equals(originalType) && columnMetadata.hasSingleValue(rowCount) && (Integer) columnMetadata.getMaxValue() > ParquetReaderUtility.DATE_CORRUPTION_THRESHOLD) { int newMinMax = ParquetReaderUtility.autoCorrectCorruptedDate((Integer) columnMetadata.getMaxValue());
OriginalType newOriginalType = (schemaElement.isSetLogicalType() && getLogicalTypeAnnotation(schemaElement.logicalType) != null) ? getLogicalTypeAnnotation(schemaElement.logicalType).toOriginalType() : null; if (!originalType.equals(newOriginalType)) { if (newOriginalType != null) { LOG.warn("Converted type and logical type metadata mismatch (convertedType: {}, logical type: {}). Using value in converted type.",
if (!keyType.isPrimitive() || !keyType.asPrimitiveType().getPrimitiveTypeName().equals(PrimitiveTypeName.BINARY) || !keyType.getOriginalType().equals(OriginalType.UTF8)) { throw new IllegalArgumentException("Map key type must be binary (UTF8): " + keyType);