/** * Translate the search argument to the filter predicate parquet uses. It includes * only the columns from the passed schema. * @return a filter predicate translated from search argument. null is returned * if failed to convert. */ public static FilterPredicate toFilterPredicate(SearchArgument sarg, MessageType schema) { Set<String> columns = null; if (schema != null) { columns = new HashSet<String>(); for (Type field : schema.getFields()) { columns.add(field.getName()); } } try { return translate(sarg.getExpression(), sarg.getLeaves(), columns, schema); } catch(Exception e) { return null; } }
private static boolean isStripeSatisfyPredicate( StripeStatistics stripeStatistics, SearchArgument sarg, int[] filterColumns, final SchemaEvolution evolution) { List<PredicateLeaf> predLeaves = sarg.getLeaves(); TruthValue[] truthValues = new TruthValue[predLeaves.size()]; for (int pred = 0; pred < truthValues.length; pred++) { if (filterColumns[pred] != -1) { if (evolution != null && !evolution.isPPDSafeConversion(filterColumns[pred])) { truthValues[pred] = TruthValue.YES_NO_NULL; } else { // column statistics at index 0 contains only the number of rows ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred]]; truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, predLeaves.get(pred), null); } } else { // parition column case. // partition filter will be evaluated by partition pruner so // we will not evaluate partition filter here. truthValues[pred] = TruthValue.YES_NO_NULL; } } return sarg.evaluate(truthValues).isNeeded(); }
private static boolean[] pickStripes(SearchArgument sarg, OrcFile.WriterVersion writerVersion, List<StripeStatistics> stripeStats, int stripeCount, Path filePath, final SchemaEvolution evolution) { if (stripeStats == null || writerVersion == OrcFile.WriterVersion.ORIGINAL) { return null; // only do split pruning if HIVE-8732 has been fixed in the writer } // eliminate stripes that doesn't satisfy the predicate condition List<PredicateLeaf> sargLeaves = sarg.getLeaves(); int[] filterColumns = RecordReaderImpl.mapSargColumnsToOrcInternalColIdx(sargLeaves, evolution); return pickStripesInternal(sarg, filterColumns, stripeStats, stripeCount, filePath, evolution); }
@Override public boolean accept(Corc corc) throws IOException { TruthValue[] truthValues = new TruthValue[evaluators.size()]; for (int i = 0; i < evaluators.size(); i++) { truthValues[i] = evaluators.get(i).evaluate(corc); } TruthValue truthValue = searchArgument.evaluate(truthValues); switch (truthValue) { case YES: return true; default: return false; } }
/** * Sets the SearchArgument predicate pushdown in the configuration */ public static void setSearchArgument(Configuration conf, SearchArgument searchArgument) { if (searchArgument != null) { setSearchArgumentKryo(conf, searchArgument.toKryo()); } }
public FilterCompat.Filter setFilter(final JobConf conf) { String serializedPushdown = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR); String columnNamesString = conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR); if (serializedPushdown == null || columnNamesString == null || serializedPushdown.isEmpty() || columnNamesString.isEmpty()) { return null; } FilterPredicate p = SearchArgumentFactory.create(Utilities.deserializeExpression(serializedPushdown)) .toFilterPredicate(); if (p != null) { LOG.debug("Predicate filter for parquet is " + p.toString()); ParquetInputFormat.setFilterPredicate(conf, p); return FilterCompat.get(p); } else { LOG.debug("No predicate filter can be generated for " + TableScanDesc.FILTER_EXPR_CONF_STR + " with the value of " + serializedPushdown); return null; } }
private static boolean[] pickStripes(SearchArgument sarg, OrcFile.WriterVersion writerVersion, List<StripeStatistics> stripeStats, int stripeCount, Path filePath, final SchemaEvolution evolution) { if (sarg == null || stripeStats == null || writerVersion == OrcFile.WriterVersion.ORIGINAL) { return null; // only do split pruning if HIVE-8732 has been fixed in the writer } // eliminate stripes that doesn't satisfy the predicate condition List<PredicateLeaf> sargLeaves = sarg.getLeaves(); int[] filterColumns = RecordReaderImpl.mapSargColumnsToOrcInternalColIdx(sargLeaves, evolution); return pickStripesInternal(sarg, filterColumns, stripeStats, stripeCount, filePath, evolution); }
result[rowGroup] = sarg.evaluate(leafValues).isNeeded(); hasSelected = hasSelected || result[rowGroup]; hasSkipped = hasSkipped || (!result[rowGroup]);
@Override public void setPushdownPredicate(Expression expr) throws IOException { SearchArgument sArg = getSearchArgument(expr); if (sArg != null) { log.info("Pushdown predicate expression is " + expr); log.info("Pushdown predicate SearchArgument is:\n" + sArg); Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass()); try { p.setProperty(signature + SearchArgsSuffix, sArg.toKryo()); } catch (Exception e) { throw new IOException("Cannot serialize SearchArgument: " + sArg); } } }
/** * Translate the search argument to the filter predicate parquet uses. It includes * only the columns from the passed schema. * @return a filter predicate translated from search argument. null is returned * if failed to convert. */ public static FilterPredicate toFilterPredicate(SearchArgument sarg, MessageType schema) { Set<String> columns = null; if (schema != null) { columns = new HashSet<String>(); for (Type field : schema.getFields()) { columns.add(field.getName()); } } try { return translate(sarg.getExpression(), sarg.getLeaves(), columns, schema); } catch(Exception e) { return null; } }
List<PredicateLeaf> leaves = sarg.getLeaves(); for (int i = 0; i < leaves.size(); ++i) { PredicateLeaf pl = leaves.get(i);
private static boolean isStripeSatisfyPredicate( StripeStatistics stripeStatistics, SearchArgument sarg, int[] filterColumns, final SchemaEvolution evolution) { List<PredicateLeaf> predLeaves = sarg.getLeaves(); TruthValue[] truthValues = new TruthValue[predLeaves.size()]; for (int pred = 0; pred < truthValues.length; pred++) { return sarg.evaluate(truthValues).isNeeded();
result[rowGroup] = sarg.evaluate(leafValues).isNeeded(); hasSelected = hasSelected || result[rowGroup]; hasSkipped = hasSkipped || (!result[rowGroup]);
@Test public void TestFloatSarg() throws Exception { String serialAst = "AQEAamF2YS51dGlsLkFycmF5TGlz9AECAQFvcmcuYXBhY2hlLmhhZG9vcC5oaXZlLnFsLnBsYW4uRXh" + "wck5vZGVDb2x1bW5EZXPjAQFmbPQAAAFiaWdvcuMBAm9yZy5hcGFjaGUuaGFkb29wLmhpdmUuc2VyZG" + "UyLnR5cGVpbmZvLlByaW1pdGl2ZVR5cGVJbmbvAQFmbG9h9AEDb3JnLmFwYWNoZS5oYWRvb3AuaGl2Z" + "S5xbC5wbGFuLkV4cHJOb2RlQ29uc3RhbnREZXPjAQECBwQ/jMzNAQRvcmcuYXBhY2hlLmhhZG9vcC5o" + "aXZlLnFsLnVkZi5nZW5lcmljLkdlbmVyaWNVREZPUEVxdWHsAQAAAYI9AUVRVUHMAQVvcmcuYXBhY2h" + "lLmhhZG9vcC5pby5Cb29sZWFuV3JpdGFibOUBAAABAgEBYm9vbGVh7g=="; SearchArgument sarg = new ConvertAstToSearchArg(conf, SerializationUtilities.deserializeExpression(serialAst)) .buildSearchArgument(); assertEquals("leaf-0", sarg.getExpression().toString()); assertEquals(1, sarg.getLeaves().size()); PredicateLeaf leaf = sarg.getLeaves().get(0); assertEquals(PredicateLeaf.Type.FLOAT, leaf.getType()); assertEquals("(EQUALS flt " + ((Number) 1.1f).doubleValue() + ")", leaf.toString()); }
List<PredicateLeaf> leaves = sarg.getLeaves(); for (int i = 0; i < leaves.size(); ++i) { PredicateLeaf pl = leaves.get(i);
private boolean isStripeSatisfyPredicate(StripeStatistics stripeStatistics, SearchArgument sarg, int[] filterColumns) { List<PredicateLeaf> predLeaves = sarg.getLeaves(); TruthValue[] truthValues = new TruthValue[predLeaves.size()]; for (int pred = 0; pred < truthValues.length; pred++) { if (filterColumns[pred] != -1) { // column statistics at index 0 contains only the number of rows ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred]]; truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, predLeaves.get(pred), null); } else { // parition column case. // partition filter will be evaluated by partition pruner so // we will not evaluate partition filter here. truthValues[pred] = TruthValue.YES_NO_NULL; } } return sarg.evaluate(truthValues).isNeeded(); } }
result[rowGroup] = sarg.evaluate(leafValues).isNeeded(); if (LOG.isDebugEnabled()) { LOG.debug("Row group " + (rowIndexStride * rowGroup) + " to " +
@Test public void TestDateSarg() throws Exception { String serialAst = "AQEAamF2YS51dGlsLkFycmF5TGlz9AECAQFvcmcuYXBhY2hlLmhhZG9vcC5oaXZlLnFsLnBsYW4uRXh" + "wck5vZGVDb2x1bW5EZXPjAQFk9AAAAWJpZ29y4wECb3JnLmFwYWNoZS5oYWRvb3AuaGl2ZS5zZXJkZT" + "IudHlwZWluZm8uUHJpbWl0aXZlVHlwZUluZu8BAWRhdOUBA29yZy5hcGFjaGUuaGFkb29wLmhpdmUuc" + "WwucGxhbi5FeHByTm9kZUNvbnN0YW50RGVz4wEBAgEBc3RyaW7nAwEyMDE1LTA1LTC1AQRvcmcuYXBh" + "Y2hlLmhhZG9vcC5oaXZlLnFsLnVkZi5nZW5lcmljLkdlbmVyaWNVREZPUEVxdWHsAQAAAYI9AUVRVUH" + "MAQVvcmcuYXBhY2hlLmhhZG9vcC5pby5Cb29sZWFuV3JpdGFibOUBAAABAgEBYm9vbGVh7g=="; SearchArgument sarg = new ConvertAstToSearchArg(conf, SerializationUtilities.deserializeExpression(serialAst)) .buildSearchArgument(); assertEquals("leaf-0", sarg.getExpression().toString()); assertEquals(1, sarg.getLeaves().size()); PredicateLeaf leaf = sarg.getLeaves().get(0); assertEquals(PredicateLeaf.Type.DATE, leaf.getType()); assertEquals("(EQUALS dt 2015-05-05)", leaf.toString()); }
public static boolean[] pickStripesViaTranslatedSarg(SearchArgument sarg, OrcFile.WriterVersion writerVersion, List<OrcProto.Type> types, List<StripeStatistics> stripeStats, int stripeCount) throws FileFormatException { LOG.info("Translated ORC pushdown predicate: " + sarg); assert sarg != null; if (stripeStats == null || writerVersion == OrcFile.WriterVersion.ORIGINAL) { return null; // only do split pruning if HIVE-8732 has been fixed in the writer } // eliminate stripes that doesn't satisfy the predicate condition List<PredicateLeaf> sargLeaves = sarg.getLeaves(); int[] filterColumns = RecordReaderImpl.mapTranslatedSargColumns(types, sargLeaves); TypeDescription schema = OrcUtils.convertTypeFromProtobuf(types, 0); SchemaEvolution evolution = new SchemaEvolution(schema, null); return pickStripesInternal(sarg, filterColumns, stripeStats, stripeCount, null, evolution); }
@Test public void TestBigintSarg() throws Exception { String serialAst = "AQEAamF2YS51dGlsLkFycmF5TGlz9AECAQFvcmcuYXBhY2hlLmhhZG9vcC5oaXZlLnFsLnBsYW4uRXh" + "wck5vZGVDb2x1bW5EZXPjAQFi6QAAAWJpZ29y4wECb3JnLmFwYWNoZS5oYWRvb3AuaGl2ZS5zZXJkZT" + "IudHlwZWluZm8uUHJpbWl0aXZlVHlwZUluZu8BAWJpZ2lu9AEDb3JnLmFwYWNoZS5oYWRvb3AuaGl2Z" + "S5xbC5wbGFuLkV4cHJOb2RlQ29uc3RhbnREZXPjAQECBwnywAEBBG9yZy5hcGFjaGUuaGFkb29wLmhp" + "dmUucWwudWRmLmdlbmVyaWMuR2VuZXJpY1VERk9QRXF1YewBAAABgj0BRVFVQcwBBW9yZy5hcGFjaGU" + "uaGFkb29wLmlvLkJvb2xlYW5Xcml0YWJs5QEAAAECAQFib29sZWHu"; SearchArgument sarg = new ConvertAstToSearchArg(conf, SerializationUtilities.deserializeExpression(serialAst)) .buildSearchArgument(); assertEquals("leaf-0", sarg.getExpression().toString()); assertEquals(1, sarg.getLeaves().size()); PredicateLeaf leaf = sarg.getLeaves().get(0); assertEquals(PredicateLeaf.Type.LONG, leaf.getType()); assertEquals("(EQUALS bi 12345)", leaf.toString()); }