public void add(int fieldIndex, Primitive value) { Type type = this.schema.getType(fieldIndex); List<Object> list = this.data[fieldIndex]; if (!type.isRepetition(REPEATED) && !list.isEmpty()) { throw new IllegalStateException( "field " + fieldIndex + " (" + type.getName() + ") can not have more than one value: " + list); } else { list.add(value); } }
private static GroupType listWrapper(Repetition repetition, String alias, OriginalType originalType, Type nested) { if (!nested.isRepetition(Repetition.REPEATED)) { throw new IllegalArgumentException("Nested type should be repeated: " + nested); } return new GroupType(repetition, alias, originalType, nested); }
private static GroupType listWrapper(Repetition repetition, String alias, OriginalType originalType, Type nested) { if (!nested.isRepetition(Repetition.REPEATED)) { throw new IllegalArgumentException("Nested type should be repeated: " + nested); } return new GroupType(repetition, alias, originalType, nested); }
public static ColumnIO getArrayElementColumn(ColumnIO columnIO) { while (columnIO instanceof GroupColumnIO && !columnIO.getType().isRepetition(REPEATED)) { columnIO = ((GroupColumnIO) columnIO).getChild(0); } /* If array has a standard 3-level structure with middle level repeated group with a single field: * optional group my_list (LIST) { * repeated group element { * required binary str (UTF8); * }; * } */ if (columnIO instanceof GroupColumnIO && columnIO.getType().getOriginalType() == null && ((GroupColumnIO) columnIO).getChildrenCount() == 1 && !columnIO.getName().equals("array") && !columnIO.getName().equals(columnIO.getParent().getName() + "_tuple")) { return ((GroupColumnIO) columnIO).getChild(0); } /* Backward-compatibility support for 2-level arrays where a repeated field is not a group: * optional group my_list (LIST) { * repeated int32 element; * } */ return columnIO; }
/** * to preserve the difference between empty list and null when optional * * @param repetition * @param alias name of the field * @param originalType * @param nested the nested repeated field * @return a group type */ private static GroupType listWrapper(Repetition repetition, String alias, OriginalType originalType, Type nested) { if (!nested.isRepetition(Repetition.REPEATED)) { throw new IllegalArgumentException("Nested type should be repeated: " + nested); } return new GroupType(repetition, alias, originalType, nested); }
private void add(int fieldIndex, Primitive value) { Type type = schema.getType(fieldIndex); List<Object> list = data[fieldIndex]; if (!type.isRepetition(Type.Repetition.REPEATED) && !list.isEmpty()) { throw new IllegalStateException("field "+fieldIndex+" (" + type.getName() + ") can not have more than one value: " + list); } list.add(value); }
public void add(int fieldIndex, Primitive value) { Type type = this.schema.getType(fieldIndex); List<Object> list = this.data[fieldIndex]; if (!type.isRepetition(REPEATED) && !list.isEmpty()) { throw new IllegalStateException( "field " + fieldIndex + " (" + type.getName() + ") can not have more than one value: " + list); } else { list.add(value); } }
private Schema convertFields(List<Type> parquetFields) { List<Column> columns = new ArrayList<Column>(); for (int i = 0; i < parquetFields.size(); ++i) { Type fieldType = parquetFields.get(i); if (fieldType.isRepetition(Type.Repetition.REPEATED)) { throw new RuntimeException("REPEATED not supported outside LIST or" + " MAP. Type: " + fieldType); } columns.add(convertField(fieldType)); } Column[] columnsArray = new Column[columns.size()]; columnsArray = columns.toArray(columnsArray); return new Schema(columnsArray); }
private void validateMissingFields(int index) { for (int i = previousField.peek() + 1; i < index; i++) { Type type = types.peek().asGroupType().getType(i); if (type.isRepetition(Repetition.REQUIRED)) { throw new InvalidRecordException("required field is missing " + type); } } }
@Override void setLevels(int r, int d, String[] fieldPath, int[] indexFieldPath, List<ColumnIO> repetition, List<ColumnIO> path) { super.setLevels(r, d, fieldPath, indexFieldPath, repetition, path); for (ColumnIO child : this.children) { String[] newFieldPath = Arrays.copyOf(fieldPath, fieldPath.length + 1); int[] newIndexFieldPath = Arrays.copyOf(indexFieldPath, indexFieldPath.length + 1); newFieldPath[fieldPath.length] = child.getType().getName(); newIndexFieldPath[indexFieldPath.length] = child.getIndex(); List<ColumnIO> newRepetition; if (child.getType().isRepetition(REPEATED)) { newRepetition = new ArrayList<ColumnIO>(repetition); newRepetition.add(child); } else { newRepetition = repetition; } List<ColumnIO> newPath = new ArrayList<ColumnIO>(path); newPath.add(child); child.setLevels( // the type repetition level increases whenever there's a possible repetition child.getType().isRepetition(REPEATED) ? r + 1 : r, // the type definition level increases whenever a field can be missing (not required) !child.getType().isRepetition(REQUIRED) ? d + 1 : d, newFieldPath, newIndexFieldPath, newRepetition, newPath ); } }
/** * Returns equivalent Hive table schema read from a parquet file * * @param messageType : Parquet Schema * @return : Hive Table schema read from parquet file MAP[String,String] */ public static Map<String, String> convertParquetSchemaToHiveSchema(MessageType messageType) throws IOException { Map<String, String> schema = Maps.newLinkedHashMap(); List<Type> parquetFields = messageType.getFields(); for (Type parquetType : parquetFields) { StringBuilder result = new StringBuilder(); String key = parquetType.getName(); if (parquetType.isRepetition(Type.Repetition.REPEATED)) { result.append(createHiveArray(parquetType, "")); } else { result.append(convertField(parquetType)); } schema.put(hiveCompatibleFieldName(key, false), result.toString()); } return schema; }
private static GroupType listWrapper(Repetition repetition, String alias, OriginalType originalType, Type nested) { if (!nested.isRepetition(Repetition.REPEATED)) { throw new IllegalArgumentException("Nested type should be repeated: " + nested); } return new GroupType(repetition, alias, originalType, nested); }
private static GroupType listWrapper(Repetition repetition, String alias, OriginalType originalType, Type nested) { if (!nested.isRepetition(Repetition.REPEATED)) { throw new IllegalArgumentException("Nested type should be repeated: " + nested); } return new GroupType(repetition, alias, originalType, nested); }
private Schema convertFields(List<Type> parquetFields) { List<FieldSchema> fields = new ArrayList<Schema.FieldSchema>(); for (Type parquetType : parquetFields) { try{ FieldSchema innerfieldSchema = getFieldSchema(parquetType); if (parquetType.isRepetition(Repetition.REPEATED)) { Schema bagSchema = new Schema(Arrays.asList(innerfieldSchema)); fields.add(new FieldSchema(null, bagSchema, DataType.BAG)); } else { fields.add(innerfieldSchema); } } catch (FrontendException fe) { throw new SchemaConversionException("can't convert "+ parquetType, fe); } } return new Schema(fields); }
private Converter getFieldConverter(Type type, int fieldIndex) { Converter converter; if (type.isRepetition(Type.Repetition.REPEATED)) { if (type.isPrimitive()) { converter = new Repeated.RepeatedPrimitiveConverter( type.asPrimitiveType(), this, fieldIndex); } else { converter = new Repeated.RepeatedGroupConverter( type.asGroupType(), this, fieldIndex); } repeatedConverters.add((Repeated) converter); } else { converter = getConverterFromDescription(type, fieldIndex, this); } return converter; }
ColumnIO getParent(int r) { if (getRepetitionLevel() == r && getType().isRepetition(Repetition.REPEATED)) { return this; } else if (getParent()!=null && getParent().getDefinitionLevel()>=r) { return getParent().getParent(r); } else { throw new InvalidRecordException("no parent("+r+") for "+Arrays.toString(this.getFieldPath())); } }
private void writeRecordFields(GroupType schema, Schema tajoSchema, Tuple tuple) { List<Type> fields = schema.getFields(); // Parquet ignores Tajo NULL_TYPE columns, so the index may differ. int index = 0; for (int tajoIndex = 0; tajoIndex < tajoSchema.size(); ++tajoIndex) { Column column = tajoSchema.getColumn(tajoIndex); if (column.getDataType().getType() == TajoDataTypes.Type.NULL_TYPE) { continue; } Datum datum = tuple.get(tajoIndex); Type fieldType = fields.get(index); if (!tuple.isNull(tajoIndex)) { recordConsumer.startField(fieldType.getName(), index); writeValue(fieldType, column, datum); recordConsumer.endField(fieldType.getName(), index); } else if (fieldType.isRepetition(Type.Repetition.REQUIRED)) { throw new RuntimeException("Null-value for required field: " + column.getSimpleName()); } ++index; } }
int i = 0; for (Type field : parquetSchema.getFields()) { if (field.isPrimitive() && field.isRepetition(Repetition.OPTIONAL)) { PrimitiveType primitiveType = field.asPrimitiveType(); switch (primitiveType.getPrimitiveTypeName()) {
if (!elementType.isRepetition(Type.Repetition.REPEATED)) { throw new UnsupportedOperationException("Invalid list type " + parquetGroupType);