public static DoubleRange union(DoubleRange first, DoubleRange second) { requireNonNull(first, "first is null"); requireNonNull(second, "second is null"); return new DoubleRange(min(first.min, second.min), max(first.max, second.max)); }
private static void assertRange(double min, double max) { DoubleRange range = new DoubleRange(min, max); assertEquals(range.getMin(), min); assertEquals(range.getMax(), max); } }
@Test public void testUnion() { assertEquals(union(new DoubleRange(1, 2), new DoubleRange(4, 5)), new DoubleRange(1, 5)); assertEquals(union(new DoubleRange(1, 2), new DoubleRange(1, 2)), new DoubleRange(1, 2)); assertEquals(union(new DoubleRange(4, 5), new DoubleRange(1, 2)), new DoubleRange(1, 5)); assertEquals(union(new DoubleRange(Double.NEGATIVE_INFINITY, 0), new DoubleRange(0, Double.POSITIVE_INFINITY)), new DoubleRange(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY)); assertEquals(union(new DoubleRange(0, Double.POSITIVE_INFINITY), new DoubleRange(Double.NEGATIVE_INFINITY, 0)), new DoubleRange(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY)); }
private void assertClose(Object actual, Object expected, String comparedValue) { if (actual instanceof Slice) { assertEquals(actual.getClass(), expected.getClass(), comparedValue); assertEquals(((Slice) actual).toStringUtf8(), ((Slice) expected).toStringUtf8()); } else if (actual instanceof DoubleRange) { DoubleRange actualRange = (DoubleRange) actual; DoubleRange expectedRange = (DoubleRange) expected; assertClose(actualRange.getMin(), expectedRange.getMin(), comparedValue); assertClose(actualRange.getMax(), expectedRange.getMax(), comparedValue); } else { double actualDouble = toDouble(actual); double expectedDouble = toDouble(expected); assertEquals(actualDouble, expectedDouble, expectedDouble * tolerance, comparedValue); } }
private SymbolStatsEstimate toSymbolStatistics(TableStatistics tableStatistics, ColumnStatistics columnStatistics) { double nullsFraction = columnStatistics.getNullsFraction().getValue(); double nonNullRowsCount = tableStatistics.getRowCount().getValue() * (1.0 - nullsFraction); double averageRowSize = nonNullRowsCount == 0 ? 0 : columnStatistics.getDataSize().getValue() / nonNullRowsCount; SymbolStatsEstimate.Builder result = SymbolStatsEstimate.builder(); result.setNullsFraction(nullsFraction); result.setDistinctValuesCount(columnStatistics.getDistinctValuesCount().getValue()); result.setAverageRowSize(averageRowSize); columnStatistics.getRange().ifPresent(range -> { result.setLowValue(range.getMin()); result.setHighValue(range.getMax()); }); return result.build(); } }
private static Optional<DoubleRange> toRange(Optional<Object> min, Optional<Object> max, Type columnType) { if (columnType instanceof VarcharType) { return Optional.empty(); } if (!min.isPresent() || !max.isPresent()) { return Optional.empty(); } return Optional.of(new DoubleRange(toDouble(min.get(), columnType), toDouble(max.get(), columnType))); }
private void assertClose(Object actual, Object expected, String comparedValue) { if (actual instanceof Slice) { assertEquals(actual.getClass(), expected.getClass(), comparedValue); assertEquals(((Slice) actual).toStringUtf8(), ((Slice) expected).toStringUtf8()); } else if (actual instanceof DoubleRange) { DoubleRange actualRange = (DoubleRange) actual; DoubleRange expectedRange = (DoubleRange) expected; assertClose(actualRange.getMin(), expectedRange.getMin(), comparedValue); assertClose(actualRange.getMax(), expectedRange.getMax(), comparedValue); } else { double actualDouble = toDouble(actual); double expectedDouble = toDouble(expected); assertEquals(actualDouble, expectedDouble, expectedDouble * tolerance, comparedValue); } }
private static Optional<DoubleRange> toRange(Optional<Object> min, Optional<Object> max, Type columnType) { if (columnType instanceof VarcharType || columnType instanceof CharType || columnType.equals(TIME)) { return Optional.empty(); } if (!min.isPresent() || !max.isPresent()) { return Optional.empty(); } return Optional.of(new DoubleRange(toDouble(min.get(), columnType), toDouble(max.get(), columnType))); }
private static Optional<DoubleRange> createDecimalRange(DecimalStatistics statistics) { if (statistics.getMin().isPresent() && statistics.getMax().isPresent()) { return Optional.of(new DoubleRange(statistics.getMin().get().doubleValue(), statistics.getMax().get().doubleValue())); } return Optional.empty(); }
private static DoubleRange createIntegerRange(Type type, long min, long max) { return new DoubleRange(normalizeIntegerValue(type, min), normalizeIntegerValue(type, max)); }
private static Optional<DoubleRange> createDateRange(DateStatistics statistics) { if (statistics.getMin().isPresent() && statistics.getMax().isPresent()) { return Optional.of(new DoubleRange(statistics.getMin().get().toEpochDay(), statistics.getMax().get().toEpochDay())); } return Optional.empty(); }
@VisibleForTesting static Optional<DoubleRange> calculateRangeForPartitioningKey(HiveColumnHandle column, Type type, List<HivePartition> partitions) { if (!isRangeSupported(type)) { return Optional.empty(); } List<Double> values = partitions.stream() .map(HivePartition::getKeys) .map(keys -> keys.get(column)) .filter(value -> !value.isNull()) .map(NullableValue::getValue) .map(value -> convertPartitionValueToDouble(type, value)) .collect(toImmutableList()); if (values.isEmpty()) { return Optional.empty(); } double min = values.get(0); double max = values.get(0); for (Double value : values) { if (value > max) { max = value; } if (value < min) { min = value; } } return Optional.of(new DoubleRange(min, max)); }
private static Optional<DoubleRange> createDoubleRange(DoubleStatistics statistics) { if (statistics.getMin().isPresent() && statistics.getMax().isPresent() && !isNaN(statistics.getMin().getAsDouble()) && !isNaN(statistics.getMax().getAsDouble())) { return Optional.of(new DoubleRange(statistics.getMin().getAsDouble(), statistics.getMax().getAsDouble())); } return Optional.empty(); }
private ColumnStatistics rangeStatistics(double min, double max) { return createColumnStatistics(Optional.empty(), Optional.of(new DoubleRange(min, max)), Optional.empty()); }
private ColumnStatistics columnStatistics(double distinctValuesCount, double min, double max) { return createColumnStatistics(Optional.of(distinctValuesCount), Optional.of(new DoubleRange(min, max)), Optional.empty()); }
assertRange(Double.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY); assertRange(Float.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY); assertThatThrownBy(() -> new DoubleRange(Double.NaN, 0)).isInstanceOf(IllegalArgumentException.class); assertThatThrownBy(() -> new DoubleRange(0, Double.NaN)).isInstanceOf(IllegalArgumentException.class); assertThatThrownBy(() -> new DoubleRange(Double.NaN, Double.NaN)).isInstanceOf(IllegalArgumentException.class); assertThatThrownBy(() -> new DoubleRange(Float.NaN, Float.NaN)).isInstanceOf(IllegalArgumentException.class); assertThatThrownBy(() -> new DoubleRange(1, 0)).isInstanceOf(IllegalArgumentException.class); assertThatThrownBy(() -> new DoubleRange(0, Double.NEGATIVE_INFINITY)).isInstanceOf(IllegalArgumentException.class); assertThatThrownBy(() -> new DoubleRange(0, Float.NEGATIVE_INFINITY)).isInstanceOf(IllegalArgumentException.class); assertThatThrownBy(() -> new DoubleRange(-1.0 * Double.MAX_VALUE, Double.NEGATIVE_INFINITY)).isInstanceOf(IllegalArgumentException.class); assertThatThrownBy(() -> new DoubleRange(-1.0 * Float.MAX_VALUE, Double.NEGATIVE_INFINITY)).isInstanceOf(IllegalArgumentException.class); assertThatThrownBy(() -> new DoubleRange(-1.0 * Double.MAX_VALUE, Float.NEGATIVE_INFINITY)).isInstanceOf(IllegalArgumentException.class); assertThatThrownBy(() -> new DoubleRange(Double.POSITIVE_INFINITY, Double.MAX_VALUE)).isInstanceOf(IllegalArgumentException.class); assertThatThrownBy(() -> new DoubleRange(Float.POSITIVE_INFINITY, Double.MAX_VALUE)).isInstanceOf(IllegalArgumentException.class); assertThatThrownBy(() -> new DoubleRange(Double.POSITIVE_INFINITY, Float.MAX_VALUE)).isInstanceOf(IllegalArgumentException.class); assertThatThrownBy(() -> new DoubleRange(Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY)).isInstanceOf(IllegalArgumentException.class); assertThatThrownBy(() -> new DoubleRange(Float.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY)).isInstanceOf(IllegalArgumentException.class); assertThatThrownBy(() -> new DoubleRange(Double.POSITIVE_INFINITY, Float.NEGATIVE_INFINITY)).isInstanceOf(IllegalArgumentException.class); assertThatThrownBy(() -> new DoubleRange(Double.POSITIVE_INFINITY, 0)).isInstanceOf(IllegalArgumentException.class); assertThatThrownBy(() -> new DoubleRange(Float.POSITIVE_INFINITY, 0)).isInstanceOf(IllegalArgumentException.class);
assertEquals(calculateRange(VARCHAR, ImmutableList.of(integerRange(OptionalLong.empty(), OptionalLong.empty()))), Optional.empty()); assertEquals(calculateRange(VARCHAR, ImmutableList.of(integerRange(1, 2))), Optional.empty()); assertEquals(calculateRange(BIGINT, ImmutableList.of(integerRange(1, 2))), Optional.of(new DoubleRange(1, 2))); assertEquals(calculateRange(BIGINT, ImmutableList.of(integerRange(Long.MIN_VALUE, Long.MAX_VALUE))), Optional.of(new DoubleRange(Long.MIN_VALUE, Long.MAX_VALUE))); assertEquals(calculateRange(INTEGER, ImmutableList.of(integerRange(Long.MIN_VALUE, Long.MAX_VALUE))), Optional.of(new DoubleRange(Integer.MIN_VALUE, Integer.MAX_VALUE))); assertEquals(calculateRange(SMALLINT, ImmutableList.of(integerRange(Long.MIN_VALUE, Long.MAX_VALUE))), Optional.of(new DoubleRange(Short.MIN_VALUE, Short.MAX_VALUE))); assertEquals(calculateRange(TINYINT, ImmutableList.of(integerRange(Long.MIN_VALUE, Long.MAX_VALUE))), Optional.of(new DoubleRange(Byte.MIN_VALUE, Byte.MAX_VALUE))); assertEquals(calculateRange(BIGINT, ImmutableList.of(integerRange(1, 5), integerRange(3, 7))), Optional.of(new DoubleRange(1, 7))); assertEquals(calculateRange(BIGINT, ImmutableList.of(integerRange(OptionalLong.empty(), OptionalLong.empty()), integerRange(3, 7))), Optional.of(new DoubleRange(3, 7))); assertEquals(calculateRange(BIGINT, ImmutableList.of(integerRange(OptionalLong.empty(), OptionalLong.of(8)), integerRange(3, 7))), Optional.of(new DoubleRange(3, 7))); assertEquals(calculateRange(DOUBLE, ImmutableList.of(integerRange(1, 2))), Optional.empty()); assertEquals(calculateRange(REAL, ImmutableList.of(integerRange(1, 2))), Optional.empty()); assertEquals(calculateRange(DOUBLE, ImmutableList.of(doubleRange(OptionalDouble.empty(), OptionalDouble.empty()))), Optional.empty()); assertEquals(calculateRange(DOUBLE, ImmutableList.of(doubleRange(0.1, 0.2))), Optional.of(new DoubleRange(0.1, 0.2))); assertEquals(calculateRange(BIGINT, ImmutableList.of(doubleRange(0.1, 0.2))), Optional.empty()); assertEquals(calculateRange(DOUBLE, ImmutableList.of(doubleRange(0.1, 0.2), doubleRange(0.15, 0.25))), Optional.of(new DoubleRange(0.1, 0.25))); assertEquals(calculateRange(REAL, ImmutableList.of(doubleRange(0.1, 0.2), doubleRange(0.15, 0.25))), Optional.of(new DoubleRange(0.1, 0.25))); assertEquals(calculateRange(REAL, ImmutableList.of(doubleRange(OptionalDouble.empty(), OptionalDouble.of(0.2)), doubleRange(0.15, 0.25))), Optional.of(new DoubleRange(0.15, 0.25))); assertEquals(calculateRange(DOUBLE, ImmutableList.of(doubleRange(NaN, 0.2))), Optional.empty()); assertEquals(calculateRange(DOUBLE, ImmutableList.of(doubleRange(0.1, NaN))), Optional.empty()); assertEquals(calculateRange(DOUBLE, ImmutableList.of(doubleRange(NaN, NaN))), Optional.empty()); assertEquals(calculateRange(DOUBLE, ImmutableList.of(doubleRange(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY))), Optional.of(new DoubleRange(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY))); assertEquals(calculateRange(REAL, ImmutableList.of(doubleRange(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY))), Optional.of(new DoubleRange(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY))); assertEquals(calculateRange(DOUBLE, ImmutableList.of(doubleRange(Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY))), Optional.of(new DoubleRange(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY))); assertEquals(calculateRange(DOUBLE, ImmutableList.of(doubleRange(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY), doubleRange(0.1, 0.2))), Optional.of(new DoubleRange(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY))); assertEquals(calculateRange(DATE, ImmutableList.of(doubleRange(0.1, 0.2))), Optional.empty()); assertEquals(calculateRange(DATE, ImmutableList.of(dateRange("1970-01-01", "1970-01-02"))), Optional.of(new DoubleRange(0, 1))); assertEquals(calculateRange(DATE, ImmutableList.of(dateRange(Optional.empty(), Optional.empty()))), Optional.empty()); assertEquals(calculateRange(DATE, ImmutableList.of(dateRange(Optional.of("1970-01-01"), Optional.empty()))), Optional.empty());
BIGINT, ImmutableList.of(partition("p1=string1/p2=__HIVE_DEFAULT_PARTITION__"), partition("p1=string1/p2=1"))), Optional.of(new DoubleRange(1, 1))); assertEquals( calculateRangeForPartitioningKey( BIGINT, ImmutableList.of(partition("p1=string1/p2=2"), partition("p1=string1/p2=1"))), Optional.of(new DoubleRange(1, 2))); assertEquals( calculateRangeForPartitioningKey( BIGINT, ImmutableList.of(partition("p1=string1/p2=2"), partition("p1=string1/p2=3"), partition("p1=string1/p2=1"))), Optional.of(new DoubleRange(1, 3)));
@Test public void testNullFraction() { SchemaTableName schemaTableName = new SchemaTableName("sf1", Table.WEB_SITE.getName()); ConnectorTableHandle tableHandle = metadata.getTableHandle(session, schemaTableName); TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, alwaysTrue()); Map<String, ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle); // some null values assertColumnStatistics( tableStatistics.getColumnStatistics().get(columnHandles.get(WebSiteColumn.WEB_REC_END_DATE.getName())), ColumnStatistics.builder() .setNullsFraction(Estimate.of(0.5)) .setDistinctValuesCount(Estimate.of(3)) .setRange(new DoubleRange(10819L, 11549L)) .build()); }
@Test public void testGetTableStatisticsUnpartitioned() { PartitionStatistics statistics = PartitionStatistics.builder() .setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(1000), OptionalLong.empty(), OptionalLong.empty())) .setColumnStatistics(ImmutableMap.of(COLUMN, createIntegerColumnStatistics(OptionalLong.of(-100), OptionalLong.of(100), OptionalLong.of(500), OptionalLong.of(300)))) .build(); MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((table, hivePartitions) -> ImmutableMap.of(UNPARTITIONED_ID, statistics)); TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); HiveColumnHandle columnHandle = new HiveColumnHandle(COLUMN, HIVE_LONG, BIGINT.getTypeSignature(), 2, REGULAR, Optional.empty()); TableStatistics expected = TableStatistics.builder() .setRowCount(Estimate.of(1000)) .setColumnStatistics( columnHandle, ColumnStatistics.builder() .setRange(new DoubleRange(-100, 100)) .setNullsFraction(Estimate.of(0.5)) .setDistinctValuesCount(Estimate.of(300)) .build()) .build(); assertEquals( statisticsProvider.getTableStatistics( session, TABLE, ImmutableMap.of(COLUMN, columnHandle), ImmutableMap.of(COLUMN, BIGINT), ImmutableList.of(new HivePartition(TABLE))), expected); }