/** * Convert a Spark {@link DataType struct} to a {@link Type} with new field ids. * <p> * This conversion assigns fresh ids. * <p> * Some data types are represented as the same Spark type. These are converted to a default type. * <p> * To convert using a reference schema for field ids and ambiguous types, use * {@link #convert(Schema, StructType)}. * * @param sparkType a Spark DataType * @return the equivalent Type * @throws IllegalArgumentException if the type cannot be converted */ public static Type convert(DataType sparkType) { return visit(sparkType, new SparkTypeToType()); }
private StructType lazyType() { if (type == null) { this.type = convert(lazySchema()); } return type; }
private static PartitionSpec identitySpec(Schema schema, String... partitionNames) { return identitySpec(schema, Lists.newArrayList(partitionNames)); }
private void pushFilters(DataSourceReader reader, com.netflix.iceberg.expressions.Expression... filters) { Expression[] expressions = new Expression[filters.length]; for (int i = 0; i < filters.length; i += 1) { expressions[i] = SparkExpressions.convert(filters[i], SCHEMA); } pushFilters(reader, expressions); }
/** * Convert a Spark {@link StructType struct} to a {@link Schema} based on the given schema. * <p> * This conversion does not assign new ids; it uses ids from the base schema. * <p> * Data types, field order, and nullability will match the spark type. This conversion may return * a schema that is not compatible with base schema. * * @param baseSchema a Schema on which conversion is based * @param sparkType a Spark StructType * @return the equivalent Schema * @throws IllegalArgumentException if the type cannot be converted or there are missing ids */ public static Schema convert(Schema baseSchema, StructType sparkType) { // convert to a type with fresh ids Types.StructType struct = visit(sparkType, new SparkTypeToType(sparkType)).asStructType(); // reassign ids to match the base schema Schema schema = TypeUtil.reassignIds(new Schema(struct.fields()), baseSchema); // fix types that can't be represented in Spark (UUID and Fixed) return FixupTypes.fixup(schema, baseSchema); }
@Override public <T> Expression isNull(BoundReference<T> ref) { return column(ref).isNull().expr(); }
/** * Convert a {@link Type} to a {@link DataType Spark type}. * * @param type a Type * @return the equivalent Spark type * @throws IllegalArgumentException if the type cannot be converted to Spark */ public static DataType convert(Type type) { return visit(type, new TypeToSparkType()); }
private static List<Object> convertLiterals(List<Expression> values) { List<Object> converted = Lists.newArrayListWithExpectedSize(values.size()); for (Expression value : values) { if (value instanceof Literal) { Literal lit = (Literal) value; converted.add(valueFromSpark(lit)); } else { return null; } } return converted; }
private Schema lazySchema() { if (schema == null) { if (requestedSchema != null) { this.schema = prune(table.schema(), requestedSchema); } else { this.schema = table.schema(); } } return schema; }
@Override public <T> Expression notNull(BoundReference<T> ref) { return column(ref).isNotNull().expr(); }
/** * Convert a {@link Schema} to a {@link DataType Spark type}. * * @param schema a Schema * @return the equivalent Spark type * @throws IllegalArgumentException if the type cannot be converted to Spark */ public static StructType convert(Schema schema) { return (StructType) visit(schema, new TypeToSparkType()); }
private static PartitionSpec identitySpec(Schema schema, Collection<Column> columns) { List<String> names = Lists.newArrayList(); for (Column column : columns) { if (column.isPartition()) { names.add(column.name()); } } return identitySpec(schema, names); }
@Override public <T> Expression gt(BoundReference<T> ref, com.netflix.iceberg.expressions.Literal<T> lit) { return column(ref).gt(lit.value()).expr(); }
@Override public <T> Expression ltEq(BoundReference<T> ref, com.netflix.iceberg.expressions.Literal<T> lit) { return column(ref).leq(lit.value()).expr(); }
@Override public <T> Expression eq(BoundReference<T> ref, com.netflix.iceberg.expressions.Literal<T> lit) { return column(ref).equalTo(lit.value()).expr(); }
@Override public <T> Expression lt(BoundReference<T> ref, com.netflix.iceberg.expressions.Literal<T> lit) { return column(ref).lt(lit.value()).expr(); }
@Override public <T> Expression gtEq(BoundReference<T> ref, com.netflix.iceberg.expressions.Literal<T> lit) { return column(ref).geq(lit.value()).expr(); }
@Override public <T> Expression notEq(BoundReference<T> ref, com.netflix.iceberg.expressions.Literal<T> lit) { return column(ref).notEqual(lit.value()).expr(); }