return Types.primitive(PrimitiveTypeName.BINARY, repetition).as(OriginalType.UTF8) .named(name); return Types.primitive(PrimitiveTypeName.INT32, repetition).named(name); return Types.primitive(PrimitiveTypeName.INT64, repetition).named(name); return Types.primitive(PrimitiveTypeName.DOUBLE, repetition).named(name); return Types.primitive(PrimitiveTypeName.FLOAT, repetition).named(name); return Types.primitive(PrimitiveTypeName.BOOLEAN, repetition).named(name); return Types.primitive(PrimitiveTypeName.BINARY, repetition).named(name); return Types.primitive(PrimitiveTypeName.INT96, repetition).named(name); serdeConstants.CHAR_TYPE_NAME)) { if (repetition == Repetition.OPTIONAL) { return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); return Types.repeated(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); serdeConstants.VARCHAR_TYPE_NAME)) { if (repetition == Repetition.OPTIONAL) { return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); return Types.repeated(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name);
return Types.primitive(PrimitiveTypeName.BINARY, repetition).as(OriginalType.UTF8) .named(name); return Types.primitive(PrimitiveTypeName.INT32, repetition).named(name); return Types.primitive(PrimitiveTypeName.INT64, repetition).named(name); return Types.primitive(PrimitiveTypeName.DOUBLE, repetition).named(name); return Types.primitive(PrimitiveTypeName.FLOAT, repetition).named(name); return Types.primitive(PrimitiveTypeName.BOOLEAN, repetition).named(name); return Types.primitive(PrimitiveTypeName.BINARY, repetition).named(name); return Types.primitive(PrimitiveTypeName.INT96, repetition).named(name); serdeConstants.CHAR_TYPE_NAME)) { if (repetition == Repetition.OPTIONAL) { return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); return Types.repeated(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); serdeConstants.VARCHAR_TYPE_NAME)) { if (repetition == Repetition.OPTIONAL) { return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); return Types.repeated(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name);
return Types.primitive(PrimitiveTypeName.BINARY, repetition).as(OriginalType.UTF8) .named(name); return Types.primitive(PrimitiveTypeName.INT32, repetition).named(name); return Types.primitive(PrimitiveTypeName.INT64, repetition).named(name); return Types.primitive(PrimitiveTypeName.DOUBLE, repetition).named(name); return Types.primitive(PrimitiveTypeName.FLOAT, repetition).named(name); return Types.primitive(PrimitiveTypeName.BOOLEAN, repetition).named(name); return Types.primitive(PrimitiveTypeName.BINARY, repetition).named(name); return Types.primitive(PrimitiveTypeName.INT96, repetition).named(name); serdeConstants.CHAR_TYPE_NAME)) { if (repetition == Repetition.OPTIONAL) { return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); return Types.repeated(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); serdeConstants.VARCHAR_TYPE_NAME)) { if (repetition == Repetition.OPTIONAL) { return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); return Types.repeated(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name);
return Types.primitive(PrimitiveTypeName.BINARY, repetition).as(OriginalType.UTF8) .named(name); return Types.primitive(PrimitiveTypeName.INT32, repetition).named(name); return Types.primitive(PrimitiveTypeName.INT64, repetition).named(name); return Types.primitive(PrimitiveTypeName.DOUBLE, repetition).named(name); return Types.primitive(PrimitiveTypeName.FLOAT, repetition).named(name); return Types.primitive(PrimitiveTypeName.BOOLEAN, repetition).named(name); return Types.primitive(PrimitiveTypeName.BINARY, repetition).named(name); return Types.primitive(PrimitiveTypeName.INT96, repetition).named(name); return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); int scale = decimalTypeInfo.scale(); int bytes = ParquetHiveSerDe.PRECISION_TO_BYTE_COUNT[prec - 1]; return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(OriginalType.DECIMAL).scale(scale).precision(prec).named(name); return Types.primitive(PrimitiveTypeName.INT32, repetition).as(OriginalType.DATE).named(name);
.optional(PrimitiveType.PrimitiveTypeName.INT32).named("element") .named("list").named("int_list").named("ArrayOfInts"); .as(parquet.schema.OriginalType.LIST).repeatedGroup().requiredGroup() .as(OriginalType.LIST).repeatedGroup() .required(PrimitiveType.PrimitiveTypeName.INT32).named("element").named("list") .named("element").named("list").named("int_list_list").named("ArrayOfArrayOfInts"); .repeated(PrimitiveType.PrimitiveTypeName.INT32).named("element").named("int_list") .named("ArrayOfInts"); .required(PrimitiveType.PrimitiveTypeName.BINARY).named("str") .required(PrimitiveType.PrimitiveTypeName.INT32).named("num").named("element") .named("tuple_list").named("ArrayOfTuples"); .required(PrimitiveType.PrimitiveTypeName.BINARY).named("str").named("array") .named("one_tuple_list").named("ArrayOfOneTuples"); .required(PrimitiveType.PrimitiveTypeName.BINARY).named("str") .named("one_tuple_list_tuple").named("one_tuple_list").named("ArrayOfOneTuples2"); .required(PrimitiveType.PrimitiveTypeName.BINARY).named("str") .named("one_tuple_list").named("one_tuple_list").named("ArrayOfOneTuples3"); .as(parquet.schema.OriginalType.LIST).repeatedGroup().as(OriginalType.MAP) .repeatedGroup().as(OriginalType.MAP_KEY_VALUE) .required(PrimitiveType.PrimitiveTypeName.BINARY).as(OriginalType.UTF8) .named("string_key").required(PrimitiveType.PrimitiveTypeName.INT32) .named("int_value").named("key_value").named("array").named("map_list")
if (typeInfo.getCategory().equals(Category.PRIMITIVE)) { if (typeInfo.equals(TypeInfoFactory.stringTypeInfo)) { return Types.primitive(PrimitiveTypeName.BINARY, repetition).as(OriginalType.UTF8) .named(name); } else if (typeInfo.equals(TypeInfoFactory.intTypeInfo) || typeInfo.equals(TypeInfoFactory.shortTypeInfo) || typeInfo.equals(TypeInfoFactory.byteTypeInfo)) { return Types.primitive(PrimitiveTypeName.INT32, repetition).named(name); } else if (typeInfo.equals(TypeInfoFactory.longTypeInfo)) { return Types.primitive(PrimitiveTypeName.INT64, repetition).named(name); } else if (typeInfo.equals(TypeInfoFactory.doubleTypeInfo)) { return Types.primitive(PrimitiveTypeName.DOUBLE, repetition).named(name); } else if (typeInfo.equals(TypeInfoFactory.floatTypeInfo)) { return Types.primitive(PrimitiveTypeName.FLOAT, repetition).named(name); } else if (typeInfo.equals(TypeInfoFactory.booleanTypeInfo)) { return Types.primitive(PrimitiveTypeName.BOOLEAN, repetition).named(name); } else if (typeInfo.equals(TypeInfoFactory.binaryTypeInfo)) { return Types.primitive(PrimitiveTypeName.BINARY, repetition).named(name); } else if (typeInfo.equals(TypeInfoFactory.timestampTypeInfo)) { return Types.primitive(PrimitiveTypeName.INT96, repetition).named(name); } else if (typeInfo.equals(TypeInfoFactory.voidTypeInfo)) { throw new UnsupportedOperationException("Void type not implemented"); } else if (typeInfo.getTypeName().toLowerCase().startsWith( serdeConstants.CHAR_TYPE_NAME)) { return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8) .named(name); } else if (typeInfo.getTypeName().toLowerCase().startsWith( serdeConstants.VARCHAR_TYPE_NAME)) { return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8)
schemaTypes.add(Types.optional(PrimitiveTypeName.BINARY).named(colName));
throw new IllegalArgumentException("expecting (length) for field of type fixed_len_byte_array"); childBuilder.length(Integer.parseInt(st.nextToken())); check(st.nextToken(), ")", "type length ended by )", st); if (t.equalsIgnoreCase("(")) { originalType = OriginalType.valueOf(st.nextToken()); childBuilder.as(originalType); if (OriginalType.DECIMAL == originalType) { t = st.nextToken(); childBuilder.precision(Integer.parseInt(st.nextToken())); t = st.nextToken(); if (t.equalsIgnoreCase(",")) { childBuilder.scale(Integer.parseInt(st.nextToken())); t = st.nextToken(); childBuilder.id(Integer.parseInt(st.nextToken())); t = st.nextToken(); childBuilder.named(name); } catch (IllegalArgumentException e) { throw new IllegalArgumentException("problem reading type: type = " + type + ", name = " + name + ", original type = " + originalType, e);
Types.PrimitiveBuilder<?> primitiveBuilder = builder.primitive(getTypeName(element.type), Repetition.valueOf(element.repetition_type.name())); if (element.isSetType_length()) { primitiveBuilder.length(element.type_length); primitiveBuilder.precision(element.precision); primitiveBuilder.scale(element.scale);
/** * Searchs column names by index on a given Parquet file schema, and returns its corresponded * Parquet schema types. * * @param schema Message schema where to search for column names. * @param colNames List of column names. * @param colIndexes List of column indexes. * @return A MessageType object of the column names found. */ private static MessageType getSchemaByIndex(MessageType schema, List<String> colNames, List<Integer> colIndexes) { List<Type> schemaTypes = new ArrayList<Type>(); for (Integer i : colIndexes) { if (i < colNames.size()) { if (i < schema.getFieldCount()) { schemaTypes.add(schema.getType(i)); } else { //prefixing with '_mask_' to ensure no conflict with named //columns in the file schema schemaTypes.add(Types.optional(PrimitiveTypeName.BINARY).named("_mask_" + colNames.get(i))); } } } return new MessageType(schema.getName(), schemaTypes); }
@Override protected Type buildSchema() { String columnName = this.jsonSchema.getColumnName(); if (this.repeated) { return Types.repeated(BINARY).as(UTF8).named(columnName); } switch (this.jsonSchema.optionalOrRequired()) { case OPTIONAL: return Types.optional(BINARY).as(UTF8).named(columnName); case REQUIRED: return Types.required(BINARY).as(UTF8).named(columnName); default: throw new RuntimeException("Unsupported Repetition type"); } } }
@Override public Type getType(String name) { int byteLength = getByteLength(precision); PrimitiveTypeName typeName = USE_BINARY ? PrimitiveTypeName.BINARY : PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY; PrimitiveBuilder<PrimitiveType> builder = Types.optional(typeName).as(OriginalType.DECIMAL); // NOTE: return types of PrimitiveBuilder.{length,precision,scale} are unstable try { BUILDER_LENGTH_METHOD.invoke(builder, byteLength); BUILDER_PRECISION_METHOD.invoke(builder, precision); BUILDER_SCALE_METHOD.invoke(builder, scale); } catch (ReflectiveOperationException e) { throw new IllegalArgumentException("error occurred while resolving decimal type", e); } return builder.named(name); }
@Override protected Type union(Type toMerge, boolean strict) { if (!toMerge.isPrimitive() || (strict && !primitive.equals(toMerge.asPrimitiveType().getPrimitiveTypeName()))) { throw new IncompatibleSchemaModificationException("can not merge type " + toMerge + " into " + this); } Types.PrimitiveBuilder<PrimitiveType> builder = Types.primitive( primitive, toMerge.getRepetition()); if (PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY == primitive) { builder.length(length); } return builder.named(getName()); } }
return Types.primitive(PrimitiveTypeName.BINARY, repetition).as(OriginalType.UTF8) .named(name); return Types.primitive(PrimitiveTypeName.INT32, repetition).named(name); return Types.primitive(PrimitiveTypeName.INT64, repetition).named(name); return Types.primitive(PrimitiveTypeName.DOUBLE, repetition).named(name); return Types.primitive(PrimitiveTypeName.FLOAT, repetition).named(name); return Types.primitive(PrimitiveTypeName.BOOLEAN, repetition).named(name); return Types.primitive(PrimitiveTypeName.BINARY, repetition).named(name); return Types.primitive(PrimitiveTypeName.INT96, repetition).named(name); serdeConstants.CHAR_TYPE_NAME)) { if (repetition == Repetition.OPTIONAL) { return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); return Types.repeated(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); serdeConstants.VARCHAR_TYPE_NAME)) { if (repetition == Repetition.OPTIONAL) { return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); return Types.repeated(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name);
return Types.primitive(PrimitiveTypeName.BINARY, repetition).as(OriginalType.UTF8) .named(name); return Types.primitive(PrimitiveTypeName.INT32, repetition).named(name); return Types.primitive(PrimitiveTypeName.INT64, repetition).named(name); return Types.primitive(PrimitiveTypeName.DOUBLE, repetition).named(name); return Types.primitive(PrimitiveTypeName.FLOAT, repetition).named(name); return Types.primitive(PrimitiveTypeName.BOOLEAN, repetition).named(name); return Types.primitive(PrimitiveTypeName.BINARY, repetition).named(name); return Types.primitive(PrimitiveTypeName.INT96, repetition).named(name); return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); int scale = decimalTypeInfo.scale(); int bytes = ParquetHiveSerDe.PRECISION_TO_BYTE_COUNT[prec - 1]; return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(OriginalType.DECIMAL).scale(scale).precision(prec).named(name); return Types.primitive(PrimitiveTypeName.INT32, repetition).as(OriginalType.DATE).named(name);
@Override protected Type buildSchema() { String columnName = this.jsonSchema.getColumnName(); if (this.repeated) { return Types.repeated(BINARY).as(UTF8).named(columnName); } switch (this.jsonSchema.optionalOrRequired()) { case OPTIONAL: return Types.optional(BINARY).as(UTF8).named(columnName); case REQUIRED: return Types.required(BINARY).as(UTF8).named(columnName); default: throw new RuntimeException("Unsupported Repetition type"); } } }
Types.PrimitiveBuilder<?> primitiveBuilder = builder.primitive(getTypeName(element.type), Repetition.valueOf(element.repetition_type.name())); if (element.isSetType_length()) { primitiveBuilder.length(element.type_length); primitiveBuilder.precision(element.precision); primitiveBuilder.scale(element.scale);