private static void testMinStringTruncateAtFirstReplacementCharacter(Slice prefix, Slice suffix) { for (int testCodePoint : TEST_CODE_POINTS) { Slice codePoint = codePointToUtf8(testCodePoint); Slice value = concatSlice(prefix, codePoint, suffix); assertEquals(minStringTruncateToValidRange(value, ORC_HIVE_8732), value); // For ORIGINAL, skip prefixes that truncate if (prefix.equals(minStringTruncateToValidRange(prefix, ORIGINAL))) { if (testCodePoint == REPLACEMENT_CHARACTER_CODE_POINT || testCodePoint >= MIN_SUPPLEMENTARY_CODE_POINT) { // truncate at test code point assertEquals(minStringTruncateToValidRange(value, ORIGINAL), prefix); } else { // truncate in suffix (if at all) assertEquals(minStringTruncateToValidRange(value, ORIGINAL), concatSlice(prefix, codePoint, minStringTruncateToValidRange(suffix, ORIGINAL))); } } } }
private static StringStatistics createExpectedStringStatistics(HiveWriterVersion version, Slice min, Slice max, int sum) { return new StringStatistics( minStringTruncateToValidRange(min, version), maxStringTruncateToValidRange(max, version), sum); } }
private static StringStatistics createExpectedStringStatistics(HiveWriterVersion version, Slice min, Slice max, int sum) { return new StringStatistics( minStringTruncateToValidRange(min, version), maxStringTruncateToValidRange(max, version), sum); }
@VisibleForTesting static StringStatistics toStringStatistics(HiveWriterVersion hiveWriterVersion, DwrfProto.StringStatistics stringStatistics, boolean isRowGroup) { if (hiveWriterVersion == ORIGINAL && !isRowGroup) { return null; } Slice maximum = stringStatistics.hasMaximum() ? maxStringTruncateToValidRange(byteStringToSlice(stringStatistics.getMaximumBytes()), hiveWriterVersion) : null; Slice minimum = stringStatistics.hasMinimum() ? minStringTruncateToValidRange(byteStringToSlice(stringStatistics.getMinimumBytes()), hiveWriterVersion) : null; long sum = stringStatistics.hasSum() ? stringStatistics.getSum() : 0; return new StringStatistics(minimum, maximum, sum); }
assertEquals(minStringTruncateToValidRange(value, ORIGINAL), value); assertEquals(minStringTruncateToValidRange(value, ORIGINAL), minSlice); assertEquals(minStringTruncateToValidRange(value, ORIGINAL), value); assertEquals(minStringTruncateToValidRange(value, ORIGINAL), prefix);
static StringStatistics toStringStatistics(HiveWriterVersion hiveWriterVersion, OrcProto.StringStatistics stringStatistics, boolean isRowGroup) { if (hiveWriterVersion == ORIGINAL && !isRowGroup) { return null; } Slice maximum = stringStatistics.hasMaximum() ? maxStringTruncateToValidRange(byteStringToSlice(stringStatistics.getMaximumBytes()), hiveWriterVersion) : null; Slice minimum = stringStatistics.hasMinimum() ? minStringTruncateToValidRange(byteStringToSlice(stringStatistics.getMinimumBytes()), hiveWriterVersion) : null; long sum = stringStatistics.hasSum() ? stringStatistics.getSum() : 0; return new StringStatistics(minimum, maximum, sum); }
if (expectedStringStatistics != null) { expectedStringStatistics = new StringStatistics( minStringTruncateToValidRange(expectedStringStatistics.getMin(), HiveWriterVersion.ORC_HIVE_8732), maxStringTruncateToValidRange(expectedStringStatistics.getMax(), HiveWriterVersion.ORC_HIVE_8732), expectedStringStatistics.getSum());
private static void testMinStringTruncateAtFirstReplacementCharacter(Slice prefix, Slice suffix) { for (int testCodePoint : TEST_CODE_POINTS) { Slice codePoint = codePointToUtf8(testCodePoint); Slice value = concatSlice(prefix, codePoint, suffix); assertEquals(minStringTruncateToValidRange(value, ORC_HIVE_8732), value); // For ORIGINAL, skip prefixes that truncate if (prefix.equals(minStringTruncateToValidRange(prefix, ORIGINAL))) { if (testCodePoint == REPLACEMENT_CHARACTER_CODE_POINT || testCodePoint >= MIN_SUPPLEMENTARY_CODE_POINT) { // truncate at test code point assertEquals(minStringTruncateToValidRange(value, ORIGINAL), prefix); } else { // truncate in suffix (if at all) assertEquals(minStringTruncateToValidRange(value, ORIGINAL), concatSlice(prefix, codePoint, minStringTruncateToValidRange(suffix, ORIGINAL))); } } } }
private static StringStatistics createExpectedStringStatistics(HiveWriterVersion version, Slice min, Slice max, int sum) { return new StringStatistics( minStringTruncateToValidRange(min, version), maxStringTruncateToValidRange(max, version), sum); }
private static StringStatistics createExpectedStringStatistics(HiveWriterVersion version, Slice min, Slice max, int sum) { return new StringStatistics( minStringTruncateToValidRange(min, version), maxStringTruncateToValidRange(max, version), sum); } }
@VisibleForTesting static StringStatistics toStringStatistics(HiveWriterVersion hiveWriterVersion, DwrfProto.StringStatistics stringStatistics, boolean isRowGroup) { if (hiveWriterVersion == ORIGINAL && !isRowGroup) { return null; } Slice maximum = stringStatistics.hasMaximum() ? maxStringTruncateToValidRange(byteStringToSlice(stringStatistics.getMaximumBytes()), hiveWriterVersion) : null; Slice minimum = stringStatistics.hasMinimum() ? minStringTruncateToValidRange(byteStringToSlice(stringStatistics.getMinimumBytes()), hiveWriterVersion) : null; long sum = stringStatistics.hasSum() ? stringStatistics.getSum() : 0; return new StringStatistics(minimum, maximum, sum); }
assertEquals(minStringTruncateToValidRange(value, ORIGINAL), value); assertEquals(minStringTruncateToValidRange(value, ORIGINAL), minSlice); assertEquals(minStringTruncateToValidRange(value, ORIGINAL), value); assertEquals(minStringTruncateToValidRange(value, ORIGINAL), prefix);
static StringStatistics toStringStatistics(HiveWriterVersion hiveWriterVersion, OrcProto.StringStatistics stringStatistics, boolean isRowGroup) { if (hiveWriterVersion == ORIGINAL && !isRowGroup) { return null; } Slice maximum = stringStatistics.hasMaximum() ? maxStringTruncateToValidRange(byteStringToSlice(stringStatistics.getMaximumBytes()), hiveWriterVersion) : null; Slice minimum = stringStatistics.hasMinimum() ? minStringTruncateToValidRange(byteStringToSlice(stringStatistics.getMinimumBytes()), hiveWriterVersion) : null; long sum = stringStatistics.hasSum() ? stringStatistics.getSum() : 0; return new StringStatistics(minimum, maximum, sum); }
if (expectedStringStatistics != null) { expectedStringStatistics = new StringStatistics( minStringTruncateToValidRange(expectedStringStatistics.getMin(), HiveWriterVersion.ORC_HIVE_8732), maxStringTruncateToValidRange(expectedStringStatistics.getMax(), HiveWriterVersion.ORC_HIVE_8732), expectedStringStatistics.getSum());