@Override protected Type buildSchema() { String columnName = this.jsonSchema.getColumnName(); if (this.repeated) { return Types.repeated(BINARY).as(UTF8).named(columnName); } switch (this.jsonSchema.optionalOrRequired()) { case OPTIONAL: return Types.optional(BINARY).as(UTF8).named(columnName); case REQUIRED: return Types.required(BINARY).as(UTF8).named(columnName); default: throw new RuntimeException("Unsupported Repetition type"); } } }
return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); int scale = decimalTypeInfo.scale(); int bytes = ParquetHiveSerDe.PRECISION_TO_BYTE_COUNT[prec - 1]; return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(OriginalType.DECIMAL).scale(scale).precision(prec).named(name);
serdeConstants.CHAR_TYPE_NAME)) { if (repetition == Repetition.OPTIONAL) { return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); serdeConstants.VARCHAR_TYPE_NAME)) { if (repetition == Repetition.OPTIONAL) { return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); int bytes = ParquetHiveSerDe.PRECISION_TO_BYTE_COUNT[prec - 1]; if (repetition == Repetition.OPTIONAL) { return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(OriginalType.DECIMAL).scale(scale).precision(prec).named(name);
serdeConstants.CHAR_TYPE_NAME)) { if (repetition == Repetition.OPTIONAL) { return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); serdeConstants.VARCHAR_TYPE_NAME)) { if (repetition == Repetition.OPTIONAL) { return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); int bytes = ParquetHiveSerDe.PRECISION_TO_BYTE_COUNT[prec - 1]; if (repetition == Repetition.OPTIONAL) { return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(OriginalType.DECIMAL).scale(scale).precision(prec).named(name);
@Override public Type getType(String name) { int byteLength = getByteLength(precision); PrimitiveTypeName typeName = USE_BINARY ? PrimitiveTypeName.BINARY : PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY; PrimitiveBuilder<PrimitiveType> builder = Types.optional(typeName).as(OriginalType.DECIMAL); // NOTE: return types of PrimitiveBuilder.{length,precision,scale} are unstable try { BUILDER_LENGTH_METHOD.invoke(builder, byteLength); BUILDER_PRECISION_METHOD.invoke(builder, precision); BUILDER_SCALE_METHOD.invoke(builder, scale); } catch (ReflectiveOperationException e) { throw new IllegalArgumentException("error occurred while resolving decimal type", e); } return builder.named(name); }
@Override public Type getType(String name) { return Types.optional(PrimitiveTypeName.BINARY) .as(OriginalType.UTF8) .named(name); }
/** * Searchs column names by index on a given Parquet file schema, and returns its corresponded * Parquet schema types. * * @param schema Message schema where to search for column names. * @param colNames List of column names. * @param colIndexes List of column indexes. * @return A MessageType object of the column names found. */ private static MessageType getSchemaByIndex(MessageType schema, List<String> colNames, List<Integer> colIndexes) { List<Type> schemaTypes = new ArrayList<Type>(); for (Integer i : colIndexes) { if (i < colNames.size()) { if (i < schema.getFieldCount()) { schemaTypes.add(schema.getType(i)); } else { //prefixing with '_mask_' to ensure no conflict with named //columns in the file schema schemaTypes.add(Types.optional(PrimitiveTypeName.BINARY).named("_mask_" + colNames.get(i))); } } } return new MessageType(schema.getName(), schemaTypes); }
schemaTypes.add(Types.optional(PrimitiveTypeName.BINARY).named(colName));
@Override protected Type buildSchema() { String columnName = this.jsonSchema.getColumnName(); if (this.repeated) { return Types.repeated(BINARY).as(UTF8).named(columnName); } switch (this.jsonSchema.optionalOrRequired()) { case OPTIONAL: return Types.optional(BINARY).as(UTF8).named(columnName); case REQUIRED: return Types.required(BINARY).as(UTF8).named(columnName); default: throw new RuntimeException("Unsupported Repetition type"); } } }
} else if (typeInfo.getTypeName().toLowerCase().startsWith( serdeConstants.CHAR_TYPE_NAME)) { return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8) .named(name); } else if (typeInfo.getTypeName().toLowerCase().startsWith( serdeConstants.VARCHAR_TYPE_NAME)) { return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8) .named(name); } else if (typeInfo instanceof DecimalTypeInfo) { int scale = decimalTypeInfo.scale(); int bytes = ParquetHiveSerDe.PRECISION_TO_BYTE_COUNT[prec - 1]; return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(OriginalType.DECIMAL). scale(scale).precision(prec).named(name); } else if (typeInfo.equals(TypeInfoFactory.dateTypeInfo)) {
return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); int scale = decimalTypeInfo.scale(); int bytes = ParquetHiveSerDe.PRECISION_TO_BYTE_COUNT[prec - 1]; return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(OriginalType.DECIMAL).scale(scale).precision(prec).named(name);
serdeConstants.CHAR_TYPE_NAME)) { if (repetition == Repetition.OPTIONAL) { return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); serdeConstants.VARCHAR_TYPE_NAME)) { if (repetition == Repetition.OPTIONAL) { return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); int bytes = ParquetHiveSerDe.PRECISION_TO_BYTE_COUNT[prec - 1]; if (repetition == Repetition.OPTIONAL) { return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(OriginalType.DECIMAL).scale(scale).precision(prec).named(name);
serdeConstants.CHAR_TYPE_NAME)) { if (repetition == Repetition.OPTIONAL) { return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); serdeConstants.VARCHAR_TYPE_NAME)) { if (repetition == Repetition.OPTIONAL) { return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); int bytes = ParquetHiveSerDe.PRECISION_TO_BYTE_COUNT[prec - 1]; if (repetition == Repetition.OPTIONAL) { return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(OriginalType.DECIMAL).scale(scale).precision(prec).named(name);