/** * LE (<=) predicate. */ private static <C extends Comparable<C>> LogicalExpression createLEPredicate( LogicalExpression left, LogicalExpression right ) { return new ParquetComparisonPredicate<C>(left, right, (leftStat, rightStat) -> { if (rightStat.compareMaxToValue(leftStat.genericGetMin()) < 0) { return RowsMatch.NONE; } return leftStat.compareMaxToValue(rightStat.genericGetMin()) <= 0 ? checkNull(leftStat, rightStat) : RowsMatch.SOME; }); }
/** * LT (<) predicate. */ private static <C extends Comparable<C>> LogicalExpression createLTPredicate( LogicalExpression left, LogicalExpression right ) { return new ParquetComparisonPredicate<C>(left, right, (leftStat, rightStat) -> { if (rightStat.compareMaxToValue(leftStat.genericGetMin()) <= 0) { return RowsMatch.NONE; } return leftStat.compareMaxToValue(rightStat.genericGetMin()) < 0 ? checkNull(leftStat, rightStat) : RowsMatch.SOME; }); }
/** * NE (!=) predicate. */ private static <C extends Comparable<C>> LogicalExpression createNEPredicate( LogicalExpression left, LogicalExpression right ) { return new ParquetComparisonPredicate<C>(left, right, (leftStat, rightStat) -> { if (leftStat.compareMaxToValue(rightStat.genericGetMin()) < 0 || rightStat.compareMaxToValue(leftStat.genericGetMin()) < 0) { return checkNull(leftStat, rightStat); } return leftStat.compareMaxToValue(rightStat.genericGetMax()) == 0 && leftStat.compareMinToValue(rightStat.genericGetMin()) == 0 ? RowsMatch.NONE : RowsMatch.SOME; }); }
/** * EQ (=) predicate */ private static <C extends Comparable<C>> LogicalExpression createEqualPredicate( LogicalExpression left, LogicalExpression right ) { return new ParquetComparisonPredicate<C>(left, right, (leftStat, rightStat) -> { // compare left max and right min int leftToRightComparison = leftStat.compareMaxToValue(rightStat.genericGetMin()); // compare right max and left min int rightToLeftComparison = rightStat.compareMaxToValue(leftStat.genericGetMin()); // if both comparison results are equal to 0 and both statistics have no nulls, // it means that min and max values in each statistics are the same and match each other, // return that all rows match the condition if (leftToRightComparison == 0 && rightToLeftComparison == 0 && hasNoNulls(leftStat) && hasNoNulls(rightStat)) { return RowsMatch.ALL; } // if at least one comparison result is negative, it means that none of the rows match the condition return leftToRightComparison < 0 || rightToLeftComparison < 0 ? RowsMatch.NONE : RowsMatch.SOME; }) { @Override public String toString() { return left + " = " + right; } }; }
/** * GE (>=) predicate. */ private static <C extends Comparable<C>> LogicalExpression createGEPredicate( LogicalExpression left, LogicalExpression right ) { return new ParquetComparisonPredicate<C>(left, right, (leftStat, rightStat) -> { if (leftStat.compareMaxToValue(rightStat.genericGetMin()) < 0) { return RowsMatch.NONE; } return leftStat.compareMinToValue(rightStat.genericGetMax()) >= 0 ? checkNull(leftStat, rightStat) : RowsMatch.SOME; }); }
/** * GT (>) predicate. */ private static <C extends Comparable<C>> LogicalExpression createGTPredicate( LogicalExpression left, LogicalExpression right ) { return new ParquetComparisonPredicate<C>(left, right, (leftStat, rightStat) -> { if (leftStat.compareMaxToValue(rightStat.genericGetMin()) <= 0) { return RowsMatch.NONE; } return leftStat.compareMinToValue(rightStat.genericGetMax()) > 0 ? checkNull(leftStat, rightStat) : RowsMatch.SOME; }); }
@Override @SuppressWarnings("unchecked") public <T extends Comparable<T>> Boolean visit(Gt<T> gt) { Column<T> filterColumn = gt.getColumn(); ColumnChunkMetaData meta = getColumnChunk(filterColumn.getColumnPath()); if (meta == null) { // the column is missing and always null, which is never greater than a // value. for all x, null is never > x. return BLOCK_CANNOT_MATCH; } Statistics<T> stats = meta.getStatistics(); if (stats.isEmpty()) { // we have no statistics available, we cannot drop any chunks return BLOCK_MIGHT_MATCH; } if (isAllNulls(meta)) { // we are looking for records where v > someValue // this chunk is all nulls, so we can drop it return BLOCK_CANNOT_MATCH; } if (!stats.hasNonNullValue()) { // stats does not contain min/max values, we cannot drop any chunks return BLOCK_MIGHT_MATCH; } T value = gt.getValue(); // drop if value >= max return stats.compareMaxToValue(value) <= 0; }
@Override @SuppressWarnings("unchecked") public <T extends Comparable<T>> Boolean visit(Gt<T> gt) { Column<T> filterColumn = gt.getColumn(); ColumnChunkMetaData meta = getColumnChunk(filterColumn.getColumnPath()); if (meta == null) { // the column is missing and always null, which is never greater than a // value. for all x, null is never > x. return BLOCK_CANNOT_MATCH; } Statistics<T> stats = meta.getStatistics(); if (stats.isEmpty()) { // we have no statistics available, we cannot drop any chunks return BLOCK_MIGHT_MATCH; } if (isAllNulls(meta)) { // we are looking for records where v > someValue // this chunk is all nulls, so we can drop it return BLOCK_CANNOT_MATCH; } if (!stats.hasNonNullValue()) { // stats does not contain min/max values, we cannot drop any chunks return BLOCK_MIGHT_MATCH; } T value = gt.getValue(); // drop if value >= max return stats.compareMaxToValue(value) <= 0; }
@Override @SuppressWarnings("unchecked") public <T extends Comparable<T>> Boolean visit(GtEq<T> gtEq) { Column<T> filterColumn = gtEq.getColumn(); ColumnChunkMetaData meta = getColumnChunk(filterColumn.getColumnPath()); if (meta == null) { // the column is missing and always null, which is never greater than or // equal to a value. for all x, null is never >= x. return BLOCK_CANNOT_MATCH; } Statistics<T> stats = meta.getStatistics(); if (stats.isEmpty()) { // we have no statistics available, we cannot drop any chunks return BLOCK_MIGHT_MATCH; } if (isAllNulls(meta)) { // we are looking for records where v >= someValue // this chunk is all nulls, so we can drop it return BLOCK_CANNOT_MATCH; } if (!stats.hasNonNullValue()) { // stats does not contain min/max values, we cannot drop any chunks return BLOCK_MIGHT_MATCH; } T value = gtEq.getValue(); // drop if value > max return stats.compareMaxToValue(value) < 0; }
@Override @SuppressWarnings("unchecked") public <T extends Comparable<T>> Boolean visit(GtEq<T> gtEq) { Column<T> filterColumn = gtEq.getColumn(); ColumnChunkMetaData meta = getColumnChunk(filterColumn.getColumnPath()); if (meta == null) { // the column is missing and always null, which is never greater than or // equal to a value. for all x, null is never >= x. return BLOCK_CANNOT_MATCH; } Statistics<T> stats = meta.getStatistics(); if (stats.isEmpty()) { // we have no statistics available, we cannot drop any chunks return BLOCK_MIGHT_MATCH; } if (isAllNulls(meta)) { // we are looking for records where v >= someValue // this chunk is all nulls, so we can drop it return BLOCK_CANNOT_MATCH; } if (!stats.hasNonNullValue()) { // stats does not contain min/max values, we cannot drop any chunks return BLOCK_MIGHT_MATCH; } T value = gtEq.getValue(); // drop if value > max return stats.compareMaxToValue(value) < 0; }
return stats.compareMinToValue(value) == 0 && stats.compareMaxToValue(value) == 0;
return stats.compareMinToValue(value) == 0 && stats.compareMaxToValue(value) == 0;
return stats.compareMinToValue(value) > 0 || stats.compareMaxToValue(value) < 0;
return stats.compareMinToValue(value) > 0 || stats.compareMaxToValue(value) < 0;