public List<ColumnDescriptor> getColumns() { List<String[]> paths = this.getPaths(0); List<ColumnDescriptor> columns = new ArrayList<ColumnDescriptor>(paths.size()); for (String[] path : paths) { // TODO: optimize this PrimitiveType primitiveType = getType(path).asPrimitiveType(); columns.add(new ColumnDescriptor( path, primitiveType, getMaxRepetitionLevel(path), getMaxDefinitionLevel(path))); } return columns; }
public List<ColumnDescriptor> getColumns() { List<String[]> paths = this.getPaths(0); List<ColumnDescriptor> columns = new ArrayList<ColumnDescriptor>(paths.size()); for (String[] path : paths) { // TODO: optimize this PrimitiveType primitiveType = getType(path).asPrimitiveType(); columns.add(new ColumnDescriptor( path, primitiveType, getMaxRepetitionLevel(path), getMaxDefinitionLevel(path))); } return columns; }
@Override void setLevels(int r, int d, String[] fieldPath, int[] fieldIndexPath, List<ColumnIO> repetition, List<ColumnIO> path) { super.setLevels(r, d, fieldPath, fieldIndexPath, repetition, path); PrimitiveType type = getType().asPrimitiveType(); this.columnDescriptor = new ColumnDescriptor( fieldPath, type, getRepetitionLevel(), getDefinitionLevel()); this.path = path.toArray(new ColumnIO[path.size()]); }
@Override void setLevels(int r, int d, String[] fieldPath, int[] fieldIndexPath, List<ColumnIO> repetition, List<ColumnIO> path) { super.setLevels(r, d, fieldPath, fieldIndexPath, repetition, path); PrimitiveType type = getType().asPrimitiveType(); this.columnDescriptor = new ColumnDescriptor( fieldPath, type, getRepetitionLevel(), getDefinitionLevel()); this.path = path.toArray(new ColumnIO[path.size()]); }
public ColumnDescriptor getColumnDescription(String[] path) { int maxRep = getMaxRepetitionLevel(path); int maxDef = getMaxDefinitionLevel(path); PrimitiveType type = getType(path).asPrimitiveType(); return new ColumnDescriptor(path, type, maxRep, maxDef); }
public ColumnDescriptor getColumnDescription(String[] path) { int maxRep = getMaxRepetitionLevel(path); int maxDef = getMaxDefinitionLevel(path); PrimitiveType type = getType(path).asPrimitiveType(); return new ColumnDescriptor(path, type, maxRep, maxDef); }
@SuppressWarnings("unchecked") private <T extends Comparable<T>> Set<T> expandDictionary(ColumnChunkMetaData meta) throws IOException { ColumnDescriptor col = new ColumnDescriptor(meta.getPath().toArray(), meta.getPrimitiveType(), -1, -1); DictionaryPage page = dictionaries.readDictionaryPage(col); // the chunk may not be dictionary-encoded if (page == null) { return null; } Dictionary dict = page.getEncoding().initDictionary(col, page); Set dictSet = new HashSet<T>(); for (int i=0; i<=dict.getMaxId(); i++) { switch(meta.getType()) { case BINARY: dictSet.add(dict.decodeToBinary(i)); break; case INT32: dictSet.add(dict.decodeToInt(i)); break; case INT64: dictSet.add(dict.decodeToLong(i)); break; case FLOAT: dictSet.add(dict.decodeToFloat(i)); break; case DOUBLE: dictSet.add(dict.decodeToDouble(i)); break; default: LOG.warn("Unknown dictionary type{}", meta.getType()); } } return (Set<T>) dictSet; }
@Test public void testMapInt32ToPrestoInteger() { PrimitiveType intType = new PrimitiveType(OPTIONAL, PrimitiveTypeName.INT32, "int_col", OriginalType.INT_32); ColumnDescriptor columnDescriptor = new ColumnDescriptor(new String[] {"int_col"}, PrimitiveTypeName.INT32, 0, 1); RichColumnDescriptor intColumn = new RichColumnDescriptor(columnDescriptor, intType); assertEquals(getPrestoType(TupleDomain.all(), intColumn), INTEGER); }
@Test public void testMapInt32WithoutOriginalTypeToPrestoInteger() { // int32 primitive should default to Presto integer if original type metadata isn't available PrimitiveType intType = new PrimitiveType(OPTIONAL, PrimitiveTypeName.INT32, "int_col"); ColumnDescriptor columnDescriptor = new ColumnDescriptor(new String[] {"int_col"}, PrimitiveTypeName.INT32, 0, 1); RichColumnDescriptor intColumn = new RichColumnDescriptor(columnDescriptor, intType); assertEquals(getPrestoType(TupleDomain.all(), intColumn), INTEGER); }
@Test public void testMapInt32ToPrestoDate() { // int32 primitive with original type of date should map to a Presto date PrimitiveType dateType = new PrimitiveType(OPTIONAL, PrimitiveTypeName.INT32, "date_col", OriginalType.DATE); ColumnDescriptor columnDescriptor = new ColumnDescriptor(new String[] {"date_col"}, PrimitiveTypeName.INT32, 0, 1); RichColumnDescriptor dateColumn = new RichColumnDescriptor(columnDescriptor, dateType); assertEquals(getPrestoType(TupleDomain.all(), dateColumn), DATE); } }
@SuppressWarnings("unchecked") private <T extends Comparable<T>> Set<T> expandDictionary(ColumnChunkMetaData meta) throws IOException { ColumnDescriptor col = new ColumnDescriptor(meta.getPath().toArray(), meta.getPrimitiveType(), -1, -1); DictionaryPage page = dictionaries.readDictionaryPage(col);
private TupleDomain<ColumnDescriptor> getEffectivePredicate(RichColumnDescriptor column, VarcharType type, Slice value) { ColumnDescriptor predicateColumn = new ColumnDescriptor(column.getPath(), column.getType(), 0, 0); Domain predicateDomain = singleValue(type, value); Map<ColumnDescriptor, Domain> predicateColumns = singletonMap(predicateColumn, predicateDomain); return withColumnDomains(predicateColumns); }
@Test public void testMatchesWithDescriptors() throws ParquetCorruptionException { ColumnDescriptor columnDescriptor = new ColumnDescriptor(new String[] {"path"}, BINARY, 0, 0); RichColumnDescriptor column = new RichColumnDescriptor(columnDescriptor, new PrimitiveType(OPTIONAL, BINARY, "Test column")); TupleDomain<ColumnDescriptor> effectivePredicate = getEffectivePredicate(column, createVarcharType(255), EMPTY_SLICE); TupleDomainParquetPredicate parquetPredicate = new TupleDomainParquetPredicate(effectivePredicate, singletonList(column)); DictionaryPage page = new DictionaryPage(Slices.wrappedBuffer(new byte[] {0, 0, 0, 0}), 1, PLAIN_DICTIONARY); assertTrue(parquetPredicate.matches(singletonMap(column, new DictionaryDescriptor(column, Optional.of(page))))); }
@Test public void testMatchesWithStatistics() throws ParquetCorruptionException { String value = "Test"; ColumnDescriptor columnDescriptor = new ColumnDescriptor(new String[] {"path"}, BINARY, 0, 0); RichColumnDescriptor column = new RichColumnDescriptor(columnDescriptor, new PrimitiveType(OPTIONAL, BINARY, "Test column")); TupleDomain<ColumnDescriptor> effectivePredicate = getEffectivePredicate(column, createVarcharType(255), utf8Slice(value)); TupleDomainParquetPredicate parquetPredicate = new TupleDomainParquetPredicate(effectivePredicate, singletonList(column)); Statistics stats = getStatsBasedOnType(column.getType()); stats.setNumNulls(1L); stats.setMinMaxFromBytes(value.getBytes(), value.getBytes()); assertTrue(parquetPredicate.matches(2, singletonMap(column, stats), ID, true)); }
@Test public void testDictionaryFileName() throws Exception { assertEquals("_foo.dict", GlobalDictionaryBuilder.dictionaryFileName("foo")); assertEquals("_a.b.c.dict", GlobalDictionaryBuilder.dictionaryFileName("a.b.c")); assertEquals("_foo.dict", GlobalDictionaryBuilder.dictionaryFileName(new ColumnDescriptor(new String[]{"foo"}, INT64, 0, 1))); assertEquals("_a.b.c.dict", GlobalDictionaryBuilder.dictionaryFileName(new ColumnDescriptor(new String[]{"a", "b", "c"}, INT64, 0, 1))); }