public static ColumnIO getArrayElementColumn(ColumnIO columnIO) { while (columnIO instanceof GroupColumnIO && !columnIO.getType().isRepetition(REPEATED)) { columnIO = ((GroupColumnIO) columnIO).getChild(0); } /* If array has a standard 3-level structure with middle level repeated group with a single field: * optional group my_list (LIST) { * repeated group element { * required binary str (UTF8); * }; * } */ if (columnIO instanceof GroupColumnIO && columnIO.getType().getOriginalType() == null && ((GroupColumnIO) columnIO).getChildrenCount() == 1 && !columnIO.getName().equals("array") && !columnIO.getName().equals(columnIO.getParent().getName() + "_tuple")) { return ((GroupColumnIO) columnIO).getChild(0); } /* Backward-compatibility support for 2-level arrays where a repeated field is not a group: * optional group my_list (LIST) { * repeated int32 element; * } */ return columnIO; }
OriginalType originalType = type.getOriginalType();
private String getSourceTypeDescription(Mapping mapping) { OriginalType originalType = mapping.sourceType.getOriginalType(); if (originalType != null) { return String.valueOf(originalType); } return String.valueOf(mapping.source.getType()); }
private SchemaCompatibilityValidator(MessageType schema) { for (ColumnDescriptor cd : schema.getColumns()) { ColumnPath columnPath = ColumnPath.get(cd.getPath()); columnsAccordingToSchema.put(columnPath, cd); OriginalType ot = schema.getType(cd.getPath()).getOriginalType(); if (ot != null) { originalTypes.put(columnPath, ot); } } }
private Type filterBag(GroupType bagType, FieldSchema bagFieldSchema) throws FrontendException { if (DEBUG) LOG.debug("filtering BAG schema:\n" + bagType + "\nwith:\n " + bagFieldSchema); if (bagType.getFieldCount() != 1) { throw new RuntimeException("not unwrapping the right type, this should be a Bag: " + bagType); } Type nested = bagType.getType(0); FieldSchema innerField = bagFieldSchema.schema.getField(0); if (nested.isPrimitive() || nested.getOriginalType() == OriginalType.MAP || nested.getOriginalType() == OriginalType.LIST) { // Bags always contain tuples => we skip the extra tuple that was inserted in that case. innerField = innerField.schema.getField(0); } return bagType.withNewFields(filter(nested, innerField)); } }
private static GroupedConverter createGroupConverter(Type prestoType, String columnName, parquet.schema.Type parquetType, int fieldIndex) { GroupType groupType = parquetType.asGroupType(); switch (prestoType.getTypeSignature().getBase()) { case ARRAY: return new ParquetListConverter(prestoType, columnName, groupType, fieldIndex); case MAP: return new ParquetMapConverter(prestoType, columnName, groupType, fieldIndex); case ROW: return new ParquetStructConverter(prestoType, columnName, groupType, fieldIndex); default: throw new IllegalArgumentException("Column " + columnName + " type " + parquetType.getOriginalType() + " not supported"); } }
private boolean isCompatible(Type sourceType, PropertyDescriptor target) { ParquetValueDriver driver = ParquetValueDrivers.of( target.getTypeInfo(), target.getValueClass()); Type targetType = driver.getType(target.getFieldName()); if (sourceType.getOriginalType() != null) { if (sourceType.getOriginalType() == targetType.getOriginalType()) { return true; } } if (sourceType.isPrimitive()) { if (targetType.isPrimitive() == false) { return false; } return sourceType.asPrimitiveType().getPrimitiveTypeName() == targetType.asPrimitiveType().getPrimitiveTypeName(); } return false; }
final PrimitiveTypeName parquetPrimitiveTypeName = fieldType.asPrimitiveType().getPrimitiveTypeName(); final OriginalType originalType = fieldType.getOriginalType(); return parquetPrimitiveTypeName.convert( new PrimitiveType.PrimitiveTypeNameConverter<Column, RuntimeException>() {
BagConverter(GroupType parquetSchema, FieldSchema pigSchema, ParentValueContainer parent, boolean numbersDefaultToZero, boolean columnIndexAccess) throws FrontendException { this.parent = parent; if (parquetSchema.getFieldCount() != 1) { throw new IllegalArgumentException("bags have only one field. " + parquetSchema + " size = " + parquetSchema.getFieldCount()); } Type nestedType = parquetSchema.getType(0); ParentValueContainer childsParent; FieldSchema pigField; if (nestedType.isPrimitive() || nestedType.getOriginalType() == OriginalType.MAP || nestedType.getOriginalType() == OriginalType.LIST) { // Pig bags always contain tuples // In that case we need to wrap the value in an extra tuple childsParent = new ParentValueContainer() { @Override void add(Object value) { buffer.add(TF.newTuple(value)); }}; pigField = pigSchema.schema.getField(0).schema.getField(0); } else { childsParent = new ParentValueContainer() { @Override void add(Object value) { buffer.add((Tuple)value); }}; pigField = pigSchema.schema.getField(0); } child = newConverter(pigField, nestedType, childsParent, numbersDefaultToZero, columnIndexAccess); }
final PrimitiveTypeName parquetPrimitiveTypeName = parquetType.asPrimitiveType().getPrimitiveTypeName(); final OriginalType originalType = parquetType.getOriginalType(); return parquetPrimitiveTypeName.convert( new PrimitiveTypeNameConverter<Schema.FieldSchema, FrontendException>() {
final PrimitiveType.PrimitiveTypeName parquetPrimitiveTypeName = parquetType.asPrimitiveType() .getPrimitiveTypeName(); final OriginalType originalType = parquetType.getOriginalType(); if (originalType == OriginalType.DECIMAL) { final DecimalMetadata decimalMetadata = parquetType.asPrimitiveType().getDecimalMetadata(); if (!keyType.isPrimitive() || !keyType.asPrimitiveType().getPrimitiveTypeName() .equals(PrimitiveType.PrimitiveTypeName.BINARY) || !keyType.getOriginalType().equals(OriginalType.UTF8)) { throw new UnsupportedOperationException( "Map key type must be binary (UTF8): " + keyType);
/** * It writes the field value to the Parquet RecordConsumer. It detects the field type, and calls * the correct write function. * @param value The writable object that contains the value. * @param inspector The object inspector used to get the correct value type. * @param type Type that contains information about the type schema. */ private void writeValue(final Object value, final ObjectInspector inspector, final Type type) { if (type.isPrimitive()) { checkInspectorCategory(inspector, ObjectInspector.Category.PRIMITIVE); writePrimitive(value, (PrimitiveObjectInspector)inspector); } else { GroupType groupType = type.asGroupType(); OriginalType originalType = type.getOriginalType(); if (originalType != null && originalType.equals(OriginalType.LIST)) { checkInspectorCategory(inspector, ObjectInspector.Category.LIST); writeArray(value, (ListObjectInspector)inspector, groupType); } else if (originalType != null && originalType.equals(OriginalType.MAP)) { checkInspectorCategory(inspector, ObjectInspector.Category.MAP); writeMap(value, (MapObjectInspector)inspector, groupType); } else { checkInspectorCategory(inspector, ObjectInspector.Category.STRUCT); writeGroup(value, (StructObjectInspector)inspector, groupType); } } }
private Converter createConverter(Type field) { OriginalType otype = field.getOriginalType(); if (field.isPrimitive()) { if (otype != null) { switch (otype) { case MAP: break; case LIST: break; case UTF8: return new StringConverter(field.getName()); case MAP_KEY_VALUE: break; case ENUM: break; } } return new SimplePrimitiveConverter(field.getName()); } GroupType groupType = field.asGroupType(); if (otype != null) { switch (otype) { case MAP: return new SimpleMapRecordConverter(groupType, field.getName(), this); case LIST: return new SimpleListRecordConverter(groupType, field.getName(), this); } } return new SimpleRecordConverter(groupType, field.getName(), this); }
return new FieldStringConverter(parent, type.getOriginalType() == OriginalType.UTF8); case DataType.BYTEARRAY: return new FieldByteArrayConverter(parent);
OriginalType originalType = type.getOriginalType();