/** * check if a parquet type is a valid 'list' type */ private static boolean isLogicalListType(Type listType) { return !listType.isPrimitive() && listType.getOriginalType() != null && listType.getOriginalType().equals(OriginalType.LIST) && listType.asGroupType().getFieldCount() == 1 && listType.asGroupType().getFields().get(0).isRepetition(Type.Repetition.REPEATED); }
/** * check if a parquet type is a valid 'map' type */ private static boolean isLogicalMapType(Type groupType) { OriginalType ot = groupType.getOriginalType(); if (groupType.isPrimitive() || ot == null || groupType.isRepetition(Type.Repetition.REPEATED)) { return false; } if (groupType.getOriginalType().equals(OriginalType.MAP) || groupType.getOriginalType().equals(OriginalType.MAP_KEY_VALUE)) { GroupType myMapType = groupType.asGroupType(); if (myMapType.getFieldCount() != 1 || myMapType.getFields().get(0).isPrimitive()) { return false; } GroupType mapItemType = myMapType.getFields().get(0).asGroupType(); return mapItemType.isRepetition(Type.Repetition.REPEATED) && mapItemType.getFieldCount() == 2 && mapItemType.getFields().get(0).getName().equalsIgnoreCase("key") && mapItemType.getFields().get(0).isPrimitive() && mapItemType.getFields().get(1).getName().equalsIgnoreCase("value"); } return false; }
private Converter getFieldConverter(Type type, int fieldIndex, TypeInfo hiveTypeInfo) { Converter converter; if (type.isRepetition(Type.Repetition.REPEATED)) { if (type.isPrimitive()) { converter = new Repeated.RepeatedPrimitiveConverter( type.asPrimitiveType(), this, fieldIndex, hiveTypeInfo); } else { converter = new Repeated.RepeatedGroupConverter( type.asGroupType(), this, fieldIndex, hiveTypeInfo == null ? null : ((ListTypeInfo) hiveTypeInfo) .getListElementTypeInfo()); } repeatedConverters.add((Repeated) converter); } else { converter = getConverterFromDescription(type, fieldIndex, this, hiveTypeInfo); } return converter; }
private boolean isSubType( final GroupType groupType, final Type subtype) { if (subtype.isPrimitive() || subtype.isRepetition(Type.Repetition.REPEATED)) { return groupType.getFields().contains(subtype); } else { for (Type g : groupType.getFields()) { if (!g.isPrimitive() && g.getName().equals(subtype.getName())) { // check all elements are contained in g boolean containsAll = false; for (Type subSubType : subtype.asGroupType().getFields()) { containsAll = isSubType(g.asGroupType(), subSubType); if (!containsAll) { break; } } if (containsAll) { return containsAll; } } } return false; } }
if (fieldType.isRepetition(Type.Repetition.REPEATED)) { return convertRepeatedFieldToList(g, fieldIndex, binaryAsString);
private void initializeInternal() throws IOException, UnsupportedOperationException { // Check that the requested schema is supported. missingColumns = new boolean[requestedSchema.getFieldCount()]; List<ColumnDescriptor> columns = requestedSchema.getColumns(); List<String[]> paths = requestedSchema.getPaths(); for (int i = 0; i < requestedSchema.getFieldCount(); ++i) { Type t = requestedSchema.getFields().get(i); if (!t.isPrimitive() || t.isRepetition(Type.Repetition.REPEATED)) { throw new UnsupportedOperationException("Complex types not supported."); } String[] colPath = paths.get(i); if (fileSchema.containsPath(colPath)) { ColumnDescriptor fd = fileSchema.getColumnDescription(colPath); if (!fd.equals(columns.get(i))) { throw new UnsupportedOperationException("Schema evolution not supported."); } missingColumns[i] = false; } else { if (columns.get(i).getMaxDefinitionLevel() == 0) { // Column is missing in data but the required data is non-nullable. This file is invalid. throw new IOException("Required column is missing in data file. Col: " + Arrays.toString(colPath)); } missingColumns[i] = true; } } }
private void initializeInternal() throws IOException, UnsupportedOperationException { // Check that the requested schema is supported. missingColumns = new boolean[requestedSchema.getFieldCount()]; List<ColumnDescriptor> columns = requestedSchema.getColumns(); List<String[]> paths = requestedSchema.getPaths(); for (int i = 0; i < requestedSchema.getFieldCount(); ++i) { Type t = requestedSchema.getFields().get(i); if (!t.isPrimitive() || t.isRepetition(Type.Repetition.REPEATED)) { throw new UnsupportedOperationException("Complex types not supported."); } String[] colPath = paths.get(i); if (fileSchema.containsPath(colPath)) { ColumnDescriptor fd = fileSchema.getColumnDescription(colPath); if (!fd.equals(columns.get(i))) { throw new UnsupportedOperationException("Schema evolution not supported."); } missingColumns[i] = false; } else { if (columns.get(i).getMaxDefinitionLevel() == 0) { // Column is missing in data but the required data is non-nullable. This file is invalid. throw new IOException("Required column is missing in data file. Col: " + Arrays.toString(colPath)); } missingColumns[i] = true; } } }
private void initializeInternal() throws IOException, UnsupportedOperationException { /** * Check that the requested schema is supported. */ missingColumns = new boolean[requestedSchema.getFieldCount()]; List<ColumnDescriptor> columns = requestedSchema.getColumns(); List<String[]> paths = requestedSchema.getPaths(); for (int i = 0; i < requestedSchema.getFieldCount(); ++i) { Type t = requestedSchema.getFields().get(i); if (!t.isPrimitive() || t.isRepetition(Type.Repetition.REPEATED)) { throw new UnsupportedOperationException("Complex types not supported."); } String[] colPath = paths.get(i); if (fileSchema.containsPath(colPath)) { ColumnDescriptor fd = fileSchema.getColumnDescription(colPath); if (!fd.equals(columns.get(i))) { throw new UnsupportedOperationException("Schema evolution not supported."); } missingColumns[i] = false; } else { if (columns.get(i).getMaxDefinitionLevel() == 0) { // Column is missing in data but the required data is non-nullable. This file is invalid. throw new IOException("Required column is missing in data file. Col: " + Arrays.toString(colPath)); } missingColumns[i] = true; } } }
public static <T> ParquetValueWriter<T> option(Type type, int definitionLevel, ParquetValueWriter<T> writer) { if (type.isRepetition(Type.Repetition.OPTIONAL)) { return new OptionWriter<>(definitionLevel, writer); } return writer; }
public static <T> ParquetValueReader<T> option(Type type, int definitionLevel, ParquetValueReader<T> reader) { if (type.isRepetition(Type.Repetition.OPTIONAL)) { return new OptionReader<>(definitionLevel, reader); } return reader; }
private void add(int fieldIndex, Primitive value) { Type type = schema.getType(fieldIndex); List<Object> list = data[fieldIndex]; if (!type.isRepetition(Type.Repetition.REPEATED) && !list.isEmpty()) { throw new IllegalStateException("field "+fieldIndex+" (" + type.getName() + ") can not have more than one value: " + list); } list.add(value); }
private void add(int fieldIndex, Primitive value) { Type type = schema.getType(fieldIndex); List<Object> list = data[fieldIndex]; if (!type.isRepetition(Type.Repetition.REPEATED) && !list.isEmpty()) { throw new IllegalStateException("field "+fieldIndex+" (" + type.getName() + ") can not have more than one value: " + list); } list.add(value); }
private void validateMissingFields(int index) { for (int i = previousField.peek() + 1; i < index; i++) { Type type = types.peek().asGroupType().getType(i); if (type.isRepetition(Repetition.REQUIRED)) { throw new InvalidRecordException("required field is missing " + type); } } }
private void validateMissingFields(int index) { for (int i = previousField.peek() + 1; i < index; i++) { Type type = types.peek().asGroupType().getType(i); if (type.isRepetition(Repetition.REQUIRED)) { throw new InvalidRecordException("required field is missing " + type); } } }
private static GroupType listWrapper(Repetition repetition, String alias, OriginalType originalType, Type nested) { if (!nested.isRepetition(Repetition.REPEATED)) { throw new IllegalArgumentException("Nested type should be repeated: " + nested); } return new GroupType(repetition, alias, originalType, nested); }
private static GroupType listWrapper(Repetition repetition, String alias, OriginalType originalType, Type nested) { if (!nested.isRepetition(Repetition.REPEATED)) { throw new IllegalArgumentException("Nested type should be repeated: " + nested); } return new GroupType(repetition, alias, originalType, nested); }
private static GroupType listWrapper(Repetition repetition, String alias, OriginalType originalType, Type nested) { if (!nested.isRepetition(Repetition.REPEATED)) { throw new IllegalArgumentException("Nested type should be repeated: " + nested); } return new GroupType(repetition, alias, originalType, nested); }
private static GroupType listWrapper(Repetition repetition, String alias, OriginalType originalType, Type nested) { if (!nested.isRepetition(Repetition.REPEATED)) { throw new IllegalArgumentException("Nested type should be repeated: " + nested); } return new GroupType(repetition, alias, originalType, nested); }
ColumnIO getParent(int r) { if (getRepetitionLevel() == r && getType().isRepetition(Repetition.REPEATED)) { return this; } else if (getParent()!=null && getParent().getDefinitionLevel()>=r) { return getParent().getParent(r); } else { throw new InvalidRecordException("no parent("+r+") for "+Arrays.toString(this.getFieldPath())); } }
ColumnIO getParent(int r) { if (getRepetitionLevel() == r && getType().isRepetition(Repetition.REPEATED)) { return this; } else if (getParent()!=null && getParent().getDefinitionLevel()>=r) { return getParent().getParent(r); } else { throw new InvalidRecordException("no parent("+r+") for "+Arrays.toString(this.getFieldPath())); } }