private static FilterPredicate buildFilterPredicateFromPredicateLeaf (PredicateLeaf leaf, Type parquetType) throws Exception { LeafFilterFactory leafFilterFactory = new LeafFilterFactory(); FilterPredicateLeafBuilder builder; try { builder = leafFilterFactory .getLeafFilterBuilderByType(leaf.getType(), parquetType); if (isMultiLiteralsOperator(leaf.getOperator())) { return builder.buildPredicate(leaf.getOperator(), leaf.getLiteralList(), leaf.getColumnName()); } else { return builder .buildPredict(leaf.getOperator(), leaf.getLiteral(), leaf.getColumnName()); } } catch (Exception e) { LOG.error("fail to build predicate filter leaf with errors" + e, e); throw e; } }
private boolean addLiteral(List<Object> literals, PredicateLeaf leaf) { switch (leaf.getOperator()) { case EQUALS: return literals.add(leaf.getLiteral()); case IN: return literals.addAll(leaf.getLiteralList()); default: return false; } } }
private static TruthValue evaluatePredicateBloomFilter(PredicateLeaf predicate, final Object predObj, BloomFilterIO bloomFilter, boolean hasNull) { switch (predicate.getOperator()) { case NULL_SAFE_EQUALS: // null safe equals does not return *_NULL variant. So set hasNull to false return checkInBloomFilter(bloomFilter, predObj, false); case EQUALS: return checkInBloomFilter(bloomFilter, predObj, hasNull); case IN: for (Object arg : predicate.getLiteralList()) { // if atleast one value in IN list exist in bloom filter, qualify the row group/stripe Object predObjItem = getBaseObjectForComparison(predicate.getType(), arg); TruthValue result = checkInBloomFilter(bloomFilter, predObjItem, hasNull); if (result == TruthValue.YES_NO_NULL || result == TruthValue.YES_NO) { return result; } } return hasNull ? TruthValue.NO_NULL : TruthValue.NO; default: return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO; } }
static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min, Object max, boolean hasNull, BloomFilter bloomFilter, boolean useUTCTimestamp) { // if we didn't have any values, everything must have been null if (min == null) { if (predicate.getOperator() == PredicateLeaf.Operator.IS_NULL) { return TruthValue.YES; } else { return TruthValue.NULL; } } else if (min == UNKNOWN_VALUE) { return TruthValue.YES_NO_NULL; } TruthValue result; Object baseObj = predicate.getLiteral(); // Predicate object and stats objects are converted to the type of the predicate object. Object minValue = getBaseObjectForComparison(predicate.getType(), min); Object maxValue = getBaseObjectForComparison(predicate.getType(), max); Object predObj = getBaseObjectForComparison(predicate.getType(), baseObj); result = evaluatePredicateMinMax(predicate, predObj, minValue, maxValue, hasNull); if (shouldEvaluateBloomFilter(predicate, result, bloomFilter)) { return evaluatePredicateBloomFilter(predicate, predObj, bloomFilter, hasNull, useUTCTimestamp); } else { return result; } }
Set<PredicateLeaf> bucketLeaves = new HashSet<PredicateLeaf>(); for (PredicateLeaf l : leaves) { if (bucketCol.equals(l.getColumnName())) { switch (l.getOperator()) { case EQUALS: case IN:
assertEquals(p.toString(), expected); assertEquals(PredicateLeaf.Type.STRING, leaves.get(0).getType()); assertEquals(PredicateLeaf.Operator.BETWEEN, leaves.get(0).getOperator()); assertEquals("first_name", leaves.get(0).getColumnName());
for (int i = 0; i < leaves.size(); ++i) { PredicateLeaf pl = leaves.get(i); Integer colId = nameIdMap.get(pl.getColumnName()); String newColName = RecordReaderImpl.encodeTranslatedSargColumn(rootColumn, colId); SearchArgumentFactory.setPredicateLeafColumn(pl, newColName);
@SuppressWarnings({ "rawtypes", "unchecked" }) private Evaluator<?> lessThanEvaluator(PredicateLeaf predicateLeaf, PrimitiveCategory category) { Comparable<?> literal = toComparable(category, predicateLeaf.getLiteral()); return new LessThanEvaluator(predicateLeaf.getColumnName(), literal, predicateLeaf.getOperator()); }
@SuppressWarnings({ "rawtypes", "unchecked" }) private Evaluator<?> inEvaluator(PredicateLeaf predicateLeaf, PrimitiveCategory category) { List<Comparable<?>> literals = new ArrayList<>(predicateLeaf.getLiteralList().size()); for (Object literalItem : predicateLeaf.getLiteralList()) { literals.add(toComparable(category, literalItem)); } return new InEvaluator(predicateLeaf.getColumnName(), literals); }
@Test public void TestBooleanSarg() throws Exception { String serialAst = "AQEAamF2YS51dGlsLkFycmF5TGlz9AECAQFvcmcuYXBhY2hlLmhhZG9vcC5oaXZlLnFsLnBsYW4uRXh" + "wck5vZGVHZW5lcmljRnVuY0Rlc+MBAQABAgECb3JnLmFwYWNoZS5oYWRvb3AuaGl2ZS5xbC5wbGFuLk" + "V4cHJOb2RlQ29sdW1uRGVz4wEBYrEAAAFib29sb3LjAQNvcmcuYXBhY2hlLmhhZG9vcC5oaXZlLnNlc" + "mRlMi50eXBlaW5mby5QcmltaXRpdmVUeXBlSW5m7wEBYm9vbGVh7gEEb3JnLmFwYWNoZS5oYWRvb3Au" + "aGl2ZS5xbC5wbGFuLkV4cHJOb2RlQ29uc3RhbnREZXPjAQEDCQUBAQVvcmcuYXBhY2hlLmhhZG9vcC5" + "oaXZlLnFsLnVkZi5nZW5lcmljLkdlbmVyaWNVREZPUEVxdWHsAQAAAYI9AUVRVUHMAQZvcmcuYXBhY2" + "hlLmhhZG9vcC5pby5Cb29sZWFuV3JpdGFibOUBAAABAwkBAgEBYrIAAAgBAwkBB29yZy5hcGFjaGUua" + "GFkb29wLmhpdmUucWwudWRmLmdlbmVyaWMuR2VuZXJpY1VERk9QQW7kAQEGAQAAAQMJ"; SearchArgument sarg = new ConvertAstToSearchArg(conf, SerializationUtilities.deserializeExpression(serialAst)) .buildSearchArgument(); assertEquals("(and leaf-0 leaf-1)", sarg.getExpression().toString()); assertEquals(2, sarg.getLeaves().size()); PredicateLeaf leaf = sarg.getLeaves().get(0); assertEquals(PredicateLeaf.Type.BOOLEAN, leaf.getType()); assertEquals("(EQUALS b1 true)", leaf.toString()); leaf = sarg.getLeaves().get(1); assertEquals(PredicateLeaf.Type.BOOLEAN, leaf.getType()); assertEquals("(EQUALS b2 true)", leaf.toString()); }
@SuppressWarnings({ "rawtypes", "unchecked" }) private Evaluator<?> equalsEvaluator(PredicateLeaf predicateLeaf, PrimitiveCategory category) { Comparable<?> literal = toComparable(category, predicateLeaf.getLiteral()); return new EqualsEvaluator(predicateLeaf.getColumnName(), literal); }
} catch (NoDynamicValuesException dve) { LOG.debug("Dynamic values are not available here {}", dve.getMessage()); boolean hasNulls = stats.hasNull() || leaf.getOperator() != Operator.NULL_SAFE_EQUALS; truthValues[pred] = hasNulls ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
LOG.debug("Not using predication pushdown on {} because it doesn't " + "include ORC-135. Writer version: {}", predicate.getColumnName(), writerVersion); return TruthValue.YES_NO_NULL; if (predicate.getType() != PredicateLeaf.Type.TIMESTAMP && predicate.getType() != PredicateLeaf.Type.DATE && predicate.getType() != PredicateLeaf.Type.STRING) { return TruthValue.YES_NO_NULL;
@Test public void testTimestampSerialization() throws Exception { // There is a kryo which after serialize/deserialize, // Timestamp becomes Date. We get around this issue in // SearchArgumentImpl.getLiteral. Once kryo fixed the issue // We can simplify SearchArgumentImpl.getLiteral Timestamp now = new Timestamp(new java.util.Date().getTime()); SearchArgument sarg = SearchArgumentFactory.newBuilder() .startAnd() .lessThan("x", PredicateLeaf.Type.TIMESTAMP, now) .end() .build(); String serializedSarg = TestInputOutputFormat.toKryo(sarg); SearchArgument sarg2 = ConvertAstToSearchArg.create(serializedSarg); Field literalField = PredicateLeafImpl.class.getDeclaredField("literal"); literalField.setAccessible(true); assertTrue(literalField.get(sarg2.getLeaves().get(0)) instanceof java.util.Date); Timestamp ts = (Timestamp)sarg2.getLeaves().get(0).getLiteral(); assertEquals(ts, now); }
private static TruthValue evaluatePredicateBloomFilter(PredicateLeaf predicate, final Object predObj, BloomFilterIO bloomFilter, boolean hasNull) { switch (predicate.getOperator()) { case NULL_SAFE_EQUALS: // null safe equals does not return *_NULL variant. So set hasNull to false return checkInBloomFilter(bloomFilter, predObj, false); case EQUALS: return checkInBloomFilter(bloomFilter, predObj, hasNull); case IN: for (Object arg : predicate.getLiteralList()) { // if atleast one value in IN list exist in bloom filter, qualify the row group/stripe Object predObjItem = getBaseObjectForComparison(predicate.getType(), arg); TruthValue result = checkInBloomFilter(bloomFilter, predObjItem, hasNull); if (result == TruthValue.YES_NO_NULL || result == TruthValue.YES_NO) { return result; } } return hasNull ? TruthValue.NO_NULL : TruthValue.NO; default: return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO; } }
private boolean addLiteral(List<Object> literals, PredicateLeaf leaf) { switch (leaf.getOperator()) { case EQUALS: return literals.add( convertLiteral(leaf.getLiteral())); case IN: return literals.addAll( leaf.getLiteralList().stream().map(l -> convertLiteral(l)).collect(Collectors.toList())); default: return false; } }
if (predicate.getOperator() == PredicateLeaf.Operator.IS_NULL) { return TruthValue.YES; } else { try { Object baseObj = predicate.getLiteral(); Object minValue = getBaseObjectForComparison(predicate.getType(), min); Object maxValue = getBaseObjectForComparison(predicate.getType(), max); Object predObj = getBaseObjectForComparison(predicate.getType(), baseObj); " Exception: " + ExceptionUtils.getStackTrace(e)); if (predicate.getOperator().equals(PredicateLeaf.Operator.NULL_SAFE_EQUALS) || !hasNull) { result = TruthValue.YES_NO; } else {
Set<PredicateLeaf> bucketLeaves = new HashSet<PredicateLeaf>(); for (PredicateLeaf l : leaves) { if (bucketCol.equals(l.getColumnName())) { switch (l.getOperator()) { case EQUALS: case IN:
for (int i = 0; i < leaves.size(); ++i) { PredicateLeaf pl = leaves.get(i); Integer colId = nameIdMap.get(pl.getColumnName()); String newColName = RecordReaderImpl.encodeTranslatedSargColumn(rootColumn, colId); SearchArgumentFactory.setPredicateLeafColumn(pl, newColName);
@SuppressWarnings({ "rawtypes", "unchecked" }) private Evaluator<?> betweenEvaluator(PredicateLeaf predicateLeaf, PrimitiveCategory category) { List<Object> literalList = predicateLeaf.getLiteralList(); Comparable<?> minLiteral = toComparable(category, literalList.get(0)); Comparable<?> maxLiteral = toComparable(category, literalList.get(1)); return new BetweenEvaluator(predicateLeaf.getColumnName(), minLiteral, maxLiteral); }