private static boolean tupleDomainReferencesColumnHandle( TupleDomain<ColumnHandle> tupleDomain, ColumnHandle columnHandle) { return tupleDomain.getDomains() .map(domains -> domains.containsKey(columnHandle)) .orElse(false); } }
private static boolean isColumnPredicate(ColumnDescriptor columnDescriptor, TupleDomain<ColumnDescriptor> parquetTupleDomain) { verify(parquetTupleDomain.getDomains().isPresent(), "parquetTupleDomain is empty"); return parquetTupleDomain.getDomains().get().keySet().contains(columnDescriptor); }
private static Optional<Domain> getPathDomain(TupleDomain<HiveColumnHandle> effectivePredicate) { if (!effectivePredicate.getDomains().isPresent()) { return Optional.empty(); } return effectivePredicate.getDomains().get().entrySet().stream() .filter(entry -> isPathColumnHandle(entry.getKey())) .findFirst() .map(Map.Entry::getValue); }
@VisibleForTesting static Document buildQuery(TupleDomain<ColumnHandle> tupleDomain) { Document query = new Document(); if (tupleDomain.getDomains().isPresent()) { for (Map.Entry<ColumnHandle, Domain> entry : tupleDomain.getDomains().get().entrySet()) { MongoColumnHandle column = (MongoColumnHandle) entry.getKey(); query.putAll(buildPredicate(column, entry.getValue())); } } return query; }
public TupleDomain<ColumnHandle> getUnenforcedConstraints() { Map<ColumnHandle, Domain> pushedDown = clusteringPushDownResult.getDomains(); Map<ColumnHandle, Domain> notPushedDown = new HashMap<>(predicates.getDomains().get()); if (!notPushedDown.isEmpty() && !pushedDown.isEmpty()) { notPushedDown.entrySet().removeAll(pushedDown.entrySet()); } return TupleDomain.withColumnDomains(notPushedDown); }
private static Type createVarcharType(TupleDomain<ColumnDescriptor> effectivePredicate, RichColumnDescriptor column) { // We look at the effectivePredicate domain here, because it matches the Hive column type // more accurately than the type available in the RichColumnDescriptor. // For example, a Hive column of type varchar(length) is encoded as a Parquet BINARY, but // when that is converted to a Presto Type the length information wasn't retained. Optional<Map<ColumnDescriptor, Domain>> predicateDomains = effectivePredicate.getDomains(); if (predicateDomains.isPresent()) { Domain domain = predicateDomains.get().get(column); if (domain != null) { return domain.getType(); } } return VarcharType.VARCHAR; }
private static Optional<Domain> getDomain(OptionalInt timestampOrdinalPosition, TupleDomain<LocalFileColumnHandle> predicate) { Optional<Map<LocalFileColumnHandle, Domain>> domains = predicate.getDomains(); Domain domain = null; if (domains.isPresent() && timestampOrdinalPosition.isPresent()) { Map<LocalFileColumnHandle, Domain> domainMap = domains.get(); Set<Domain> timestampDomain = domainMap.entrySet().stream() .filter(entry -> entry.getKey().getOrdinalPosition() == timestampOrdinalPosition.getAsInt()) .map(Map.Entry::getValue) .collect(toSet()); if (!timestampDomain.isEmpty()) { domain = Iterables.getOnlyElement(timestampDomain); } } return Optional.ofNullable(domain); }
/** * Extract all column constraints that require exactly one value or only null in their respective Domains. * Returns an empty Optional if the Domain is none. */ public static <T> Optional<Map<T, NullableValue>> extractFixedValues(TupleDomain<T> tupleDomain) { if (!tupleDomain.getDomains().isPresent()) { return Optional.empty(); } return Optional.of(tupleDomain.getDomains().get() .entrySet().stream() .filter(entry -> entry.getValue().isNullableSingleValue()) .collect(toMap(Map.Entry::getKey, entry -> new NullableValue(entry.getValue().getType(), entry.getValue().getNullableSingleValue())))); }
public Expression toPredicate(TupleDomain<Symbol> tupleDomain) { if (tupleDomain.isNone()) { return FALSE_LITERAL; } Map<Symbol, Domain> domains = tupleDomain.getDomains().get(); return domains.entrySet().stream() .sorted(comparing(entry -> entry.getKey().getName())) .map(entry -> toPredicate(entry.getValue(), entry.getKey().toSymbolReference())) .collect(collectingAndThen(toImmutableList(), ExpressionUtils::combineConjuncts)); }
public static PrestoThriftTupleDomain tupleDomainToThriftTupleDomain(TupleDomain<ColumnHandle> tupleDomain) { if (!tupleDomain.getDomains().isPresent()) { return new PrestoThriftTupleDomain(null); } return new PrestoThriftTupleDomain(tupleDomain.getDomains().get() .entrySet().stream() .collect(toImmutableMap( entry -> ((ThriftColumnHandle) entry.getKey()).getColumnName(), entry -> fromDomain(entry.getValue())))); } }
private List<String> toConjuncts(List<HiveColumnHandle> columns, TupleDomain<HiveColumnHandle> tupleDomain) { Builder<String> builder = builder(); for (HiveColumnHandle column : columns) { Type type = column.getHiveType().getType(typeManager); if (tupleDomain.getDomains().isPresent() && isSupported(type)) { Domain domain = tupleDomain.getDomains().get().get(column); if (domain != null) { builder.add(toPredicate(domain, type, column.getHiveColumnIndex())); } } } return builder.build(); }
private void printConstraint(int indent, ColumnHandle column, TupleDomain<ColumnHandle> constraint) { checkArgument(!constraint.isNone()); Map<ColumnHandle, Domain> domains = constraint.getDomains().get(); if (!constraint.isAll() && domains.containsKey(column)) { print(indent, ":: %s", formatDomain(domains.get(column).simplify())); } }
public static Optional<String> stringFilter(TupleDomain<Integer> constraint, int index) { if (constraint.isNone()) { return Optional.empty(); } Domain domain = constraint.getDomains().get().get(index); if ((domain == null) || !domain.isSingleValue()) { return Optional.empty(); } Object value = domain.getSingleValue(); if (value instanceof Slice) { return Optional.of(((Slice) value).toStringUtf8()); } return Optional.empty(); }
private List<String> toConjuncts(List<JdbcColumnHandle> columns, TupleDomain<ColumnHandle> tupleDomain, List<TypeAndValue> accumulator) { ImmutableList.Builder<String> builder = ImmutableList.builder(); for (JdbcColumnHandle column : columns) { Type type = column.getColumnType(); if (isAcceptedType(type)) { Domain domain = tupleDomain.getDomains().get().get(column); if (domain != null) { builder.add(toPredicate(column.getColumnName(), domain, type, accumulator)); } } } return builder.build(); }
private Set<ColumnConstraint> parseConstraints(TableHandle tableHandle, TupleDomain<ColumnHandle> constraint) { checkArgument(!constraint.isNone()); ImmutableSet.Builder<ColumnConstraint> columnConstraints = ImmutableSet.builder(); for (Map.Entry<ColumnHandle, Domain> entry : constraint.getDomains().get().entrySet()) { ColumnMetadata columnMetadata = metadata.getColumnMetadata(session, tableHandle, entry.getKey()); columnConstraints.add(new ColumnConstraint( columnMetadata.getName(), columnMetadata.getType().getTypeSignature(), parseDomain(entry.getValue().simplify()))); } return columnConstraints.build(); }
private static boolean isEqualRange(TupleDomain<Symbol> tupleDomain, Symbol symbol, long upperBound) { if (tupleDomain.isNone()) { return false; } Domain domain = tupleDomain.getDomains().get().get(symbol); return domain.getValues().equals(ValueSet.ofRanges(Range.lessThanOrEqual(domain.getType(), upperBound))); }
private static TupleDomain<HiveColumnHandle> toCompactTupleDomain(TupleDomain<ColumnHandle> effectivePredicate, int threshold) { ImmutableMap.Builder<HiveColumnHandle, Domain> builder = ImmutableMap.builder(); effectivePredicate.getDomains().ifPresent(domains -> { for (Map.Entry<ColumnHandle, Domain> entry : domains.entrySet()) { HiveColumnHandle hiveColumnHandle = (HiveColumnHandle) entry.getKey(); ValueSet values = entry.getValue().getValues(); ValueSet compactValueSet = values.getValuesProcessor().<Optional<ValueSet>>transform( ranges -> ranges.getRangeCount() > threshold ? Optional.of(ValueSet.ofRanges(ranges.getSpan())) : Optional.empty(), discreteValues -> discreteValues.getValues().size() > threshold ? Optional.of(ValueSet.all(values.getType())) : Optional.empty(), allOrNone -> Optional.empty()) .orElse(values); builder.put(hiveColumnHandle, Domain.create(compactValueSet, entry.getValue().isNullAllowed())); } }); return TupleDomain.withColumnDomains(builder.build()); }
@Test public void testParquetTupleDomainStruct() { HiveColumnHandle columnHandle = new HiveColumnHandle("my_struct", HiveType.valueOf("struct<a:int,b:int>"), parseTypeSignature(StandardTypes.ROW), 0, REGULAR, Optional.empty()); RowType.Field rowField = new RowType.Field(Optional.of("my_struct"), INTEGER); RowType rowType = RowType.from(ImmutableList.of(rowField)); TupleDomain<HiveColumnHandle> domain = withColumnDomains(ImmutableMap.of(columnHandle, Domain.notNull(rowType))); MessageType fileSchema = new MessageType("hive_schema", new GroupType(OPTIONAL, "my_struct", new PrimitiveType(OPTIONAL, INT32, "a"), new PrimitiveType(OPTIONAL, INT32, "b"))); Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema); TupleDomain<ColumnDescriptor> tupleDomain = getParquetTupleDomain(descriptorsByPath, domain); assertTrue(tupleDomain.getDomains().get().isEmpty()); }
@Test public void testParquetTupleDomainPrimitiveArray() { HiveColumnHandle columnHandle = new HiveColumnHandle("my_array", HiveType.valueOf("array<int>"), parseTypeSignature(StandardTypes.ARRAY), 0, REGULAR, Optional.empty()); TupleDomain<HiveColumnHandle> domain = withColumnDomains(ImmutableMap.of(columnHandle, Domain.notNull(new ArrayType(INTEGER)))); MessageType fileSchema = new MessageType("hive_schema", new GroupType(OPTIONAL, "my_array", new GroupType(REPEATED, "bag", new PrimitiveType(OPTIONAL, INT32, "array_element")))); Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema); TupleDomain<ColumnDescriptor> tupleDomain = getParquetTupleDomain(descriptorsByPath, domain); assertTrue(tupleDomain.getDomains().get().isEmpty()); }
@Test public void testParquetTupleDomainStructArray() { HiveColumnHandle columnHandle = new HiveColumnHandle("my_array_struct", HiveType.valueOf("array<struct<a:int>>"), parseTypeSignature(StandardTypes.ARRAY), 0, REGULAR, Optional.empty()); RowType.Field rowField = new RowType.Field(Optional.of("a"), INTEGER); RowType rowType = RowType.from(ImmutableList.of(rowField)); TupleDomain<HiveColumnHandle> domain = withColumnDomains(ImmutableMap.of(columnHandle, Domain.notNull(new ArrayType(rowType)))); MessageType fileSchema = new MessageType("hive_schema", new GroupType(OPTIONAL, "my_array_struct", new GroupType(REPEATED, "bag", new GroupType(OPTIONAL, "array_element", new PrimitiveType(OPTIONAL, INT32, "a"))))); Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema); TupleDomain<ColumnDescriptor> tupleDomain = getParquetTupleDomain(descriptorsByPath, domain); assertTrue(tupleDomain.getDomains().get().isEmpty()); }