static boolean isWrappedListPrimitive(Object o) { if (o instanceof Group) { Group g = (Group) o; return g.getType().isRepetition(Type.Repetition.REPEATED) && !g.getType().isPrimitive() && g.getType().asGroupType().getFieldCount() == 1 && g.getType().getFields().get(0).isPrimitive(); } return false; }
/** * check if a parquet type is a valid 'map' type */ private static boolean isLogicalMapType(Type groupType) { OriginalType ot = groupType.getOriginalType(); if (groupType.isPrimitive() || ot == null || groupType.isRepetition(Type.Repetition.REPEATED)) { return false; } if (groupType.getOriginalType().equals(OriginalType.MAP) || groupType.getOriginalType().equals(OriginalType.MAP_KEY_VALUE)) { GroupType myMapType = groupType.asGroupType(); if (myMapType.getFieldCount() != 1 || myMapType.getFields().get(0).isPrimitive()) { return false; } GroupType mapItemType = myMapType.getFields().get(0).asGroupType(); return mapItemType.isRepetition(Type.Repetition.REPEATED) && mapItemType.getFieldCount() == 2 && mapItemType.getFields().get(0).getName().equalsIgnoreCase("key") && mapItemType.getFields().get(0).isPrimitive() && mapItemType.getFields().get(1).getName().equalsIgnoreCase("value"); } return false; }
@Override protected int getMaxRepetitionLevel(String[] path, int depth) { int myVal = isRepetition(Repetition.REPEATED) ? 1 : 0; if (depth == path.length) { return myVal; } return myVal + getType(path[depth]).getMaxRepetitionLevel(path, depth + 1); }
@Override protected int getMaxDefinitionLevel(String[] path, int depth) { int myVal = !isRepetition(Repetition.REQUIRED) ? 1 : 0; if (depth == path.length) { return myVal; } return myVal + getType(path[depth]).getMaxDefinitionLevel(path, depth + 1); }
@Override protected int getMaxDefinitionLevel(String[] path, int depth) { int myVal = !isRepetition(Repetition.REQUIRED) ? 1 : 0; if (depth == path.length) { return myVal; } return myVal + getType(path[depth]).getMaxDefinitionLevel(path, depth + 1); }
@Override protected int getMaxRepetitionLevel(String[] path, int depth) { int myVal = isRepetition(Repetition.REPEATED) ? 1 : 0; if (depth == path.length) { return myVal; } return myVal + getType(path[depth]).getMaxRepetitionLevel(path, depth + 1); }
switch (annotation) { case LIST: Preconditions.checkArgument(!group.isRepetition(REPEATED), "Invalid list: top-level group is repeated: " + group); Preconditions.checkArgument(group.getFieldCount() == 1, Preconditions.checkArgument(repeatedElement.isRepetition(REPEATED), "Invalid list: inner group is not repeated"); Preconditions.checkArgument(repeatedElement.getFieldCount() <= 1, Preconditions.checkArgument(!group.isRepetition(REPEATED), "Invalid map: top-level group is repeated: " + group); Preconditions.checkArgument(group.getFieldCount() == 1, Preconditions.checkArgument(repeatedKeyValue.isRepetition(REPEATED), "Invalid map: inner group is not repeated"); Preconditions.checkArgument(repeatedKeyValue.getFieldCount() <= 2,
switch (annotation) { case LIST: Preconditions.checkArgument(!group.isRepetition(REPEATED), "Invalid list: top-level group is repeated: " + group); Preconditions.checkArgument(group.getFieldCount() == 1, Preconditions.checkArgument(repeatedElement.isRepetition(REPEATED), "Invalid list: inner group is not repeated"); Preconditions.checkArgument(repeatedElement.getFieldCount() <= 1, Preconditions.checkArgument(!group.isRepetition(REPEATED), "Invalid map: top-level group is repeated: " + group); Preconditions.checkArgument(group.getFieldCount() == 1, Preconditions.checkArgument(repeatedKeyValue.isRepetition(REPEATED), "Invalid map: inner group is not repeated"); Preconditions.checkArgument(repeatedKeyValue.getFieldCount() <= 2,
/** * @param type parquet types * @param name overrides parquet.getName() * @return the mapping */ private TypeMapping fromParquetGroup(GroupType type, String name) { OriginalType ot = type.getOriginalType(); if (ot == null) { List<TypeMapping> typeMappings = fromParquet(type.getFields()); Field arrowField = new Field(name, type.isRepetition(OPTIONAL), new Struct_(), fields(typeMappings)); return new StructTypeMapping(arrowField, type, typeMappings); } else { switch (ot) { case LIST: List3Levels list3Levels = new List3Levels(type); TypeMapping child = fromParquet(list3Levels.getElement(), null, list3Levels.getElement().getRepetition()); Field arrowField = new Field(name, type.isRepetition(OPTIONAL), new ArrowType.List(), asList(child.getArrowField())); return new ListTypeMapping(arrowField, list3Levels, child); default: throw new UnsupportedOperationException("Unsupported type " + type); } } }
Converter defaultGroupConverter(OutputMutator mutator, GroupType groupType, final String nameForChild, Collection<SchemaPath> c, List<Field> arrowSchema) { if (groupType.getOriginalType() == OriginalType.LIST && LogicalListL1Converter.isSupportedSchema(groupType)) { return new LogicalListL1Converter( nameForChild, mutator, getWriterProvider(), groupType, c, options, arrowSchema, schemaHelper ); } final StructWriter struct; if (groupType.isRepetition(REPEATED)) { if (arrowSchema != null) { //TODO assert this should never occur at this level // only parquet writer that writes arrowSchema doesn't write repeated fields except // as part of a LOGICAL LIST, thus this scenario (repeated + arrow schema present) can // only happen in LogicalList converter arrowSchema = handleRepeatedField(arrowSchema, groupType); } struct = list(nameForChild).struct(); } else { struct = getWriterProvider().struct(nameForChild); } return new StructGroupConverter(mutator, struct, groupType, c, options, arrowSchema, schemaHelper); }
if (!mapKeyValType.isRepetition(REPEATED) || mapKeyValType.getFieldCount()!=2) { throw new UnsupportedOperationException("Invalid map type " + parquetGroupType);