UserDefinedByInstance(Column<T> column, U udpInstance) { super(column); this.udpInstance = checkNotNull(udpInstance, "udpInstance"); String name = getClass().getSimpleName().toLowerCase(Locale.ENGLISH); this.toString = name + "(" + column.getColumnPath().toDotString() + ", " + udpInstance + ")"; }
protected ColumnFilterPredicate(Column<T> column, T value) { this.column = checkNotNull(column, "column"); // Eq and NotEq allow value to be null, Lt, Gt, LtEq, GtEq however do not, so they guard against // null in their own constructors. this.value = value; String name = getClass().getSimpleName().toLowerCase(Locale.ENGLISH); this.toString = name + "(" + column.getColumnPath().toDotString() + ", " + value + ")"; }
UserDefinedByClass(Column<T> column, Class<U> udpClass) { super(column); this.udpClass = checkNotNull(udpClass, "udpClass"); String name = getClass().getSimpleName().toLowerCase(Locale.ENGLISH); this.toString = name + "(" + column.getColumnPath().toDotString() + ", " + udpClass.getName() + ")"; // defensively try to instantiate the class early to make sure that it's possible getUserDefinedPredicate(); }
@Override @SuppressWarnings("unchecked") public <T extends Comparable<T>> Boolean visit(GtEq<T> gtEq) { Column<T> filterColumn = gtEq.getColumn(); ColumnChunkMetaData meta = getColumnChunk(filterColumn.getColumnPath()); if (meta == null) { // the column is missing and always null, which is never greater than or // equal to a value. for all x, null is never >= x. return BLOCK_CANNOT_MATCH; } Statistics<T> stats = meta.getStatistics(); if (stats.isEmpty()) { // we have no statistics available, we cannot drop any chunks return BLOCK_MIGHT_MATCH; } if (isAllNulls(meta)) { // we are looking for records where v >= someValue // this chunk is all nulls, so we can drop it return BLOCK_CANNOT_MATCH; } if (!stats.hasNonNullValue()) { // stats does not contain min/max values, we cannot drop any chunks return BLOCK_MIGHT_MATCH; } T value = gtEq.getValue(); // drop if value > max return stats.compareMaxToValue(value) < 0; }
@Override @SuppressWarnings("unchecked") public <T extends Comparable<T>> Boolean visit(Lt<T> lt) { Column<T> filterColumn = lt.getColumn(); ColumnChunkMetaData meta = getColumnChunk(filterColumn.getColumnPath()); if (meta == null) { // the column is missing and always null, which is never less than a // value. for all x, null is never < x. return BLOCK_CANNOT_MATCH; } Statistics<T> stats = meta.getStatistics(); if (stats.isEmpty()) { // we have no statistics available, we cannot drop any chunks return BLOCK_MIGHT_MATCH; } if (isAllNulls(meta)) { // we are looking for records where v < someValue // this chunk is all nulls, so we can drop it return BLOCK_CANNOT_MATCH; } if (!stats.hasNonNullValue()) { // stats does not contain min/max values, we cannot drop any chunks return BLOCK_MIGHT_MATCH; } T value = lt.getValue(); // drop if value <= min return stats.compareMinToValue(value) >= 0; }
private <T extends Comparable<T>, U extends UserDefinedPredicate<T>> Boolean visit(UserDefined<T, U> ud, boolean inverted) { Column<T> filterColumn = ud.getColumn(); ColumnChunkMetaData meta = getColumnChunk(filterColumn.getColumnPath()); U udp = ud.getUserDefinedPredicate(); // The column is missing, thus all null. Check if the predicate keeps null. if (meta == null) { if (inverted) { return udp.keep(null); } else { return !udp.keep(null); } } if (hasNonDictionaryPages(meta)) { return BLOCK_MIGHT_MATCH; } try { Set<T> dictSet = expandDictionary(meta); if (dictSet == null) { return BLOCK_MIGHT_MATCH; } for (T entry : dictSet) { boolean keep = udp.keep(entry); if ((keep && !inverted) || (!keep && inverted)) return BLOCK_MIGHT_MATCH; } return BLOCK_CANNOT_MATCH; } catch (IOException e) { LOG.warn("Failed to process dictionary for filter evaluation.", e); } return BLOCK_MIGHT_MATCH; }
@Override @SuppressWarnings("unchecked") public <T extends Comparable<T>> Boolean visit(Gt<T> gt) { Column<T> filterColumn = gt.getColumn(); ColumnChunkMetaData meta = getColumnChunk(filterColumn.getColumnPath()); if (meta == null) { // the column is missing and always null, which is never greater than a // value. for all x, null is never > x. return BLOCK_CANNOT_MATCH; } Statistics<T> stats = meta.getStatistics(); if (stats.isEmpty()) { // we have no statistics available, we cannot drop any chunks return BLOCK_MIGHT_MATCH; } if (isAllNulls(meta)) { // we are looking for records where v > someValue // this chunk is all nulls, so we can drop it return BLOCK_CANNOT_MATCH; } if (!stats.hasNonNullValue()) { // stats does not contain min/max values, we cannot drop any chunks return BLOCK_MIGHT_MATCH; } T value = gt.getValue(); // drop if value >= max return stats.compareMaxToValue(value) <= 0; }
@Override @SuppressWarnings("unchecked") public <T extends Comparable<T>> Boolean visit(Lt<T> lt) { Column<T> filterColumn = lt.getColumn(); ColumnChunkMetaData meta = getColumnChunk(filterColumn.getColumnPath()); if (meta == null) { // the column is missing and always null, which is never less than a // value. for all x, null is never < x. return BLOCK_CANNOT_MATCH; } Statistics<T> stats = meta.getStatistics(); if (stats.isEmpty()) { // we have no statistics available, we cannot drop any chunks return BLOCK_MIGHT_MATCH; } if (isAllNulls(meta)) { // we are looking for records where v < someValue // this chunk is all nulls, so we can drop it return BLOCK_CANNOT_MATCH; } if (!stats.hasNonNullValue()) { // stats does not contain min/max values, we cannot drop any chunks return BLOCK_MIGHT_MATCH; } T value = lt.getValue(); // drop if value <= min return stats.compareMinToValue(value) >= 0; }
@Override public <T extends Comparable<T>> Boolean visit(LtEq<T> ltEq) { Column<T> filterColumn = ltEq.getColumn(); ColumnChunkMetaData meta = getColumnChunk(filterColumn.getColumnPath()); filterColumn.getColumnPath();
@Override public <T extends Comparable<T>> Boolean visit(Gt<T> gt) { Column<T> filterColumn = gt.getColumn(); ColumnChunkMetaData meta = getColumnChunk(filterColumn.getColumnPath());
@Override @SuppressWarnings("unchecked") public <T extends Comparable<T>> Boolean visit(Gt<T> gt) { Column<T> filterColumn = gt.getColumn(); ColumnChunkMetaData meta = getColumnChunk(filterColumn.getColumnPath()); if (meta == null) { // the column is missing and always null, which is never greater than a // value. for all x, null is never > x. return BLOCK_CANNOT_MATCH; } Statistics<T> stats = meta.getStatistics(); if (stats.isEmpty()) { // we have no statistics available, we cannot drop any chunks return BLOCK_MIGHT_MATCH; } if (isAllNulls(meta)) { // we are looking for records where v > someValue // this chunk is all nulls, so we can drop it return BLOCK_CANNOT_MATCH; } if (!stats.hasNonNullValue()) { // stats does not contain min/max values, we cannot drop any chunks return BLOCK_MIGHT_MATCH; } T value = gt.getValue(); // drop if value >= max return stats.compareMaxToValue(value) <= 0; }
@Override @SuppressWarnings("unchecked") public <T extends Comparable<T>> Boolean visit(LtEq<T> ltEq) { Column<T> filterColumn = ltEq.getColumn(); ColumnChunkMetaData meta = getColumnChunk(filterColumn.getColumnPath()); if (meta == null) { // the column is missing and always null, which is never less than or // equal to a value. for all x, null is never <= x. return BLOCK_CANNOT_MATCH; } Statistics<T> stats = meta.getStatistics(); if (stats.isEmpty()) { // we have no statistics available, we cannot drop any chunks return BLOCK_MIGHT_MATCH; } if (isAllNulls(meta)) { // we are looking for records where v <= someValue // this chunk is all nulls, so we can drop it return BLOCK_CANNOT_MATCH; } if (!stats.hasNonNullValue()) { // stats does not contain min/max values, we cannot drop any chunks return BLOCK_MIGHT_MATCH; } T value = ltEq.getValue(); // drop if value < min return stats.compareMinToValue(value) > 0; }
@Override @SuppressWarnings("unchecked") public <T extends Comparable<T>> Boolean visit(GtEq<T> gtEq) { Column<T> filterColumn = gtEq.getColumn(); ColumnChunkMetaData meta = getColumnChunk(filterColumn.getColumnPath()); if (meta == null) { // the column is missing and always null, which is never greater than or // equal to a value. for all x, null is never >= x. return BLOCK_CANNOT_MATCH; } Statistics<T> stats = meta.getStatistics(); if (stats.isEmpty()) { // we have no statistics available, we cannot drop any chunks return BLOCK_MIGHT_MATCH; } if (isAllNulls(meta)) { // we are looking for records where v >= someValue // this chunk is all nulls, so we can drop it return BLOCK_CANNOT_MATCH; } if (!stats.hasNonNullValue()) { // stats does not contain min/max values, we cannot drop any chunks return BLOCK_MIGHT_MATCH; } T value = gtEq.getValue(); // drop if value > max return stats.compareMaxToValue(value) < 0; }
@Override @SuppressWarnings("unchecked") public <T extends Comparable<T>> Boolean visit(LtEq<T> ltEq) { Column<T> filterColumn = ltEq.getColumn(); ColumnChunkMetaData meta = getColumnChunk(filterColumn.getColumnPath()); if (meta == null) { // the column is missing and always null, which is never less than or // equal to a value. for all x, null is never <= x. return BLOCK_CANNOT_MATCH; } Statistics<T> stats = meta.getStatistics(); if (stats.isEmpty()) { // we have no statistics available, we cannot drop any chunks return BLOCK_MIGHT_MATCH; } if (isAllNulls(meta)) { // we are looking for records where v <= someValue // this chunk is all nulls, so we can drop it return BLOCK_CANNOT_MATCH; } if (!stats.hasNonNullValue()) { // stats does not contain min/max values, we cannot drop any chunks return BLOCK_MIGHT_MATCH; } T value = ltEq.getValue(); // drop if value < min return stats.compareMinToValue(value) > 0; }
private <T extends Comparable<T>> void validateColumn(Column<T> column) { ColumnPath path = column.getColumnPath(); Class<?> alreadySeen = columnTypesEncountered.get(path); if (alreadySeen != null && !alreadySeen.equals(column.getColumnType())) { throw new IllegalArgumentException("Column: " + path.toDotString() + " was provided with different types in the same predicate." + " Found both: (" + alreadySeen + ", " + column.getColumnType() + ")"); } if (alreadySeen == null) { columnTypesEncountered.put(path, column.getColumnType()); } ColumnDescriptor descriptor = getColumnDescriptor(path); if (descriptor == null) { // the column is missing from the schema. evaluation uses calls // updateNull() a value is missing, so this will be handled correctly. return; } if (descriptor.getMaxRepetitionLevel() > 0) { throw new IllegalArgumentException("FilterPredicates do not currently support repeated columns. " + "Column " + path.toDotString() + " is repeated."); } ValidTypeMap.assertTypeValid(column, descriptor.getType()); }
private <T extends Comparable<T>> void validateColumn(Column<T> column) { ColumnPath path = column.getColumnPath(); Class<?> alreadySeen = columnTypesEncountered.get(path); if (alreadySeen != null && !alreadySeen.equals(column.getColumnType())) { throw new IllegalArgumentException("Column: " + path.toDotString() + " was provided with different types in the same predicate." + " Found both: (" + alreadySeen + ", " + column.getColumnType() + ")"); } if (alreadySeen == null) { columnTypesEncountered.put(path, column.getColumnType()); } ColumnDescriptor descriptor = getColumnDescriptor(path); if (descriptor == null) { // the column is missing from the schema. evaluation uses calls // updateNull() a value is missing, so this will be handled correctly. return; } if (descriptor.getMaxRepetitionLevel() > 0) { throw new IllegalArgumentException("FilterPredicates do not currently support repeated columns. " + "Column " + path.toDotString() + " is repeated."); } ValidTypeMap.assertTypeValid(column, descriptor.getType()); }
private RowRanges applyPredicate(Column<?> column, Function<ColumnIndex, PrimitiveIterator.OfInt> func, RowRanges rangesForMissingColumns) { ColumnPath columnPath = column.getColumnPath(); if (!columns.contains(columnPath)) { return rangesForMissingColumns; } OffsetIndex oi = columnIndexStore.getOffsetIndex(columnPath); ColumnIndex ci = columnIndexStore.getColumnIndex(columnPath); if (ci == null) { LOGGER.info("No column index for column {} is available; Unable to filter on this column", columnPath); return allRows(); } return RowRanges.create(rowCount, func.apply(ci), oi); }
UserDefinedByInstance(Column<T> column, U udpInstance) { super(column); this.udpInstance = checkNotNull(udpInstance, "udpInstance"); String name = getClass().getSimpleName().toLowerCase(Locale.ENGLISH); this.toString = name + "(" + column.getColumnPath().toDotString() + ", " + udpInstance + ")"; }
UserDefinedByClass(Column<T> column, Class<U> udpClass) { super(column); this.udpClass = checkNotNull(udpClass, "udpClass"); String name = getClass().getSimpleName().toLowerCase(Locale.ENGLISH); this.toString = name + "(" + column.getColumnPath().toDotString() + ", " + udpClass.getName() + ")"; // defensively try to instantiate the class early to make sure that it's possible getUserDefinedPredicate(); }
protected ColumnFilterPredicate(Column<T> column, T value) { this.column = checkNotNull(column, "column"); // Eq and NotEq allow value to be null, Lt, Gt, LtEq, GtEq however do not, so they guard against // null in their own constructors. this.value = value; String name = getClass().getSimpleName().toLowerCase(Locale.ENGLISH); this.toString = name + "(" + column.getColumnPath().toDotString() + ", " + value + ")"; }