private static boolean[] pickStripes(SearchArgument sarg, OrcFile.WriterVersion writerVersion, List<StripeStatistics> stripeStats, int stripeCount, Path filePath, final SchemaEvolution evolution) { if (stripeStats == null || writerVersion == OrcFile.WriterVersion.ORIGINAL) { return null; // only do split pruning if HIVE-8732 has been fixed in the writer } // eliminate stripes that doesn't satisfy the predicate condition List<PredicateLeaf> sargLeaves = sarg.getLeaves(); int[] filterColumns = RecordReaderImpl.mapSargColumnsToOrcInternalColIdx(sargLeaves, evolution); return pickStripesInternal(sarg, filterColumns, stripeStats, stripeCount, filePath, evolution); }
private static boolean[] pickStripes(SearchArgument sarg, OrcFile.WriterVersion writerVersion, List<StripeStatistics> stripeStats, int stripeCount, Path filePath, final SchemaEvolution evolution) { if (sarg == null || stripeStats == null || writerVersion == OrcFile.WriterVersion.ORIGINAL) { return null; // only do split pruning if HIVE-8732 has been fixed in the writer } // eliminate stripes that doesn't satisfy the predicate condition List<PredicateLeaf> sargLeaves = sarg.getLeaves(); int[] filterColumns = RecordReaderImpl.mapSargColumnsToOrcInternalColIdx(sargLeaves, evolution); return pickStripesInternal(sarg, filterColumns, stripeStats, stripeCount, filePath, evolution); }
List<PredicateLeaf> leaves = sarg.getLeaves(); for (int i = 0; i < leaves.size(); ++i) { PredicateLeaf pl = leaves.get(i);
/** * Translate the search argument to the filter predicate parquet uses. It includes * only the columns from the passed schema. * @return a filter predicate translated from search argument. null is returned * if failed to convert. */ public static FilterPredicate toFilterPredicate(SearchArgument sarg, MessageType schema) { Set<String> columns = null; if (schema != null) { columns = new HashSet<String>(); for (Type field : schema.getFields()) { columns.add(field.getName()); } } try { return translate(sarg.getExpression(), sarg.getLeaves(), columns, schema); } catch(Exception e) { return null; } }
public static boolean[] pickStripesViaTranslatedSarg(SearchArgument sarg, OrcFile.WriterVersion writerVersion, List<OrcProto.Type> types, List<StripeStatistics> stripeStats, int stripeCount) throws FileFormatException { LOG.info("Translated ORC pushdown predicate: " + sarg); assert sarg != null; if (stripeStats == null || writerVersion == OrcFile.WriterVersion.ORIGINAL) { return null; // only do split pruning if HIVE-8732 has been fixed in the writer } // eliminate stripes that doesn't satisfy the predicate condition List<PredicateLeaf> sargLeaves = sarg.getLeaves(); int[] filterColumns = RecordReaderImpl.mapTranslatedSargColumns(types, sargLeaves); TypeDescription schema = OrcUtils.convertTypeFromProtobuf(types, 0); SchemaEvolution evolution = new SchemaEvolution(schema, null); return pickStripesInternal(sarg, filterColumns, stripeStats, stripeCount, null, evolution); }
/** * Translate the search argument to the filter predicate parquet uses. It includes * only the columns from the passed schema. * @return a filter predicate translated from search argument. null is returned * if failed to convert. */ public static FilterPredicate toFilterPredicate(SearchArgument sarg, MessageType schema) { Set<String> columns = null; if (schema != null) { columns = new HashSet<String>(); for (Type field : schema.getFields()) { columns.add(field.getName()); } } try { return translate(sarg.getExpression(), sarg.getLeaves(), columns, schema); } catch(Exception e) { return null; } }
public static boolean[] pickStripesViaTranslatedSarg(SearchArgument sarg, OrcFile.WriterVersion writerVersion, List<OrcProto.Type> types, List<StripeStatistics> stripeStats, int stripeCount) { LOG.info("Translated ORC pushdown predicate: " + sarg); assert sarg != null; if (stripeStats == null || writerVersion == OrcFile.WriterVersion.ORIGINAL) { return null; // only do split pruning if HIVE-8732 has been fixed in the writer } // eliminate stripes that doesn't satisfy the predicate condition List<PredicateLeaf> sargLeaves = sarg.getLeaves(); int[] filterColumns = RecordReaderImpl.mapTranslatedSargColumns(types, sargLeaves); TypeDescription schema = OrcUtils.convertTypeFromProtobuf(types, 0); SchemaEvolution evolution = new SchemaEvolution(schema, null); return pickStripesInternal(sarg, filterColumns, stripeStats, stripeCount, null, evolution); }
private static boolean isStripeSatisfyPredicate( StripeStatistics stripeStatistics, SearchArgument sarg, int[] filterColumns, final SchemaEvolution evolution) { List<PredicateLeaf> predLeaves = sarg.getLeaves(); TruthValue[] truthValues = new TruthValue[predLeaves.size()]; for (int pred = 0; pred < truthValues.length; pred++) { if (filterColumns[pred] != -1) { if (evolution != null && !evolution.isPPDSafeConversion(filterColumns[pred])) { truthValues[pred] = TruthValue.YES_NO_NULL; } else { // column statistics at index 0 contains only the number of rows ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred]]; truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, predLeaves.get(pred), null); } } else { // parition column case. // partition filter will be evaluated by partition pruner so // we will not evaluate partition filter here. truthValues[pred] = TruthValue.YES_NO_NULL; } } return sarg.evaluate(truthValues).isNeeded(); }
@Test public void TestBooleanSarg() throws Exception { String serialAst = "AQEAamF2YS51dGlsLkFycmF5TGlz9AECAQFvcmcuYXBhY2hlLmhhZG9vcC5oaXZlLnFsLnBsYW4uRXh" + "wck5vZGVHZW5lcmljRnVuY0Rlc+MBAQABAgECb3JnLmFwYWNoZS5oYWRvb3AuaGl2ZS5xbC5wbGFuLk" + "V4cHJOb2RlQ29sdW1uRGVz4wEBYrEAAAFib29sb3LjAQNvcmcuYXBhY2hlLmhhZG9vcC5oaXZlLnNlc" + "mRlMi50eXBlaW5mby5QcmltaXRpdmVUeXBlSW5m7wEBYm9vbGVh7gEEb3JnLmFwYWNoZS5oYWRvb3Au" + "aGl2ZS5xbC5wbGFuLkV4cHJOb2RlQ29uc3RhbnREZXPjAQEDCQUBAQVvcmcuYXBhY2hlLmhhZG9vcC5" + "oaXZlLnFsLnVkZi5nZW5lcmljLkdlbmVyaWNVREZPUEVxdWHsAQAAAYI9AUVRVUHMAQZvcmcuYXBhY2" + "hlLmhhZG9vcC5pby5Cb29sZWFuV3JpdGFibOUBAAABAwkBAgEBYrIAAAgBAwkBB29yZy5hcGFjaGUua" + "GFkb29wLmhpdmUucWwudWRmLmdlbmVyaWMuR2VuZXJpY1VERk9QQW7kAQEGAQAAAQMJ"; SearchArgument sarg = new ConvertAstToSearchArg(conf, SerializationUtilities.deserializeExpression(serialAst)) .buildSearchArgument(); assertEquals("(and leaf-0 leaf-1)", sarg.getExpression().toString()); assertEquals(2, sarg.getLeaves().size()); PredicateLeaf leaf = sarg.getLeaves().get(0); assertEquals(PredicateLeaf.Type.BOOLEAN, leaf.getType()); assertEquals("(EQUALS b1 true)", leaf.toString()); leaf = sarg.getLeaves().get(1); assertEquals(PredicateLeaf.Type.BOOLEAN, leaf.getType()); assertEquals("(EQUALS b2 true)", leaf.toString()); }
@Test public void TestFloatSarg() throws Exception { String serialAst = "AQEAamF2YS51dGlsLkFycmF5TGlz9AECAQFvcmcuYXBhY2hlLmhhZG9vcC5oaXZlLnFsLnBsYW4uRXh" + "wck5vZGVDb2x1bW5EZXPjAQFmbPQAAAFiaWdvcuMBAm9yZy5hcGFjaGUuaGFkb29wLmhpdmUuc2VyZG" + "UyLnR5cGVpbmZvLlByaW1pdGl2ZVR5cGVJbmbvAQFmbG9h9AEDb3JnLmFwYWNoZS5oYWRvb3AuaGl2Z" + "S5xbC5wbGFuLkV4cHJOb2RlQ29uc3RhbnREZXPjAQECBwQ/jMzNAQRvcmcuYXBhY2hlLmhhZG9vcC5o" + "aXZlLnFsLnVkZi5nZW5lcmljLkdlbmVyaWNVREZPUEVxdWHsAQAAAYI9AUVRVUHMAQVvcmcuYXBhY2h" + "lLmhhZG9vcC5pby5Cb29sZWFuV3JpdGFibOUBAAABAgEBYm9vbGVh7g=="; SearchArgument sarg = new ConvertAstToSearchArg(conf, SerializationUtilities.deserializeExpression(serialAst)) .buildSearchArgument(); assertEquals("leaf-0", sarg.getExpression().toString()); assertEquals(1, sarg.getLeaves().size()); PredicateLeaf leaf = sarg.getLeaves().get(0); assertEquals(PredicateLeaf.Type.FLOAT, leaf.getType()); assertEquals("(EQUALS flt " + ((Number) 1.1f).doubleValue() + ")", leaf.toString()); }
@Test public void TestDateSarg() throws Exception { String serialAst = "AQEAamF2YS51dGlsLkFycmF5TGlz9AECAQFvcmcuYXBhY2hlLmhhZG9vcC5oaXZlLnFsLnBsYW4uRXh" + "wck5vZGVDb2x1bW5EZXPjAQFk9AAAAWJpZ29y4wECb3JnLmFwYWNoZS5oYWRvb3AuaGl2ZS5zZXJkZT" + "IudHlwZWluZm8uUHJpbWl0aXZlVHlwZUluZu8BAWRhdOUBA29yZy5hcGFjaGUuaGFkb29wLmhpdmUuc" + "WwucGxhbi5FeHByTm9kZUNvbnN0YW50RGVz4wEBAgEBc3RyaW7nAwEyMDE1LTA1LTC1AQRvcmcuYXBh" + "Y2hlLmhhZG9vcC5oaXZlLnFsLnVkZi5nZW5lcmljLkdlbmVyaWNVREZPUEVxdWHsAQAAAYI9AUVRVUH" + "MAQVvcmcuYXBhY2hlLmhhZG9vcC5pby5Cb29sZWFuV3JpdGFibOUBAAABAgEBYm9vbGVh7g=="; SearchArgument sarg = new ConvertAstToSearchArg(conf, SerializationUtilities.deserializeExpression(serialAst)) .buildSearchArgument(); assertEquals("leaf-0", sarg.getExpression().toString()); assertEquals(1, sarg.getLeaves().size()); PredicateLeaf leaf = sarg.getLeaves().get(0); assertEquals(PredicateLeaf.Type.DATE, leaf.getType()); assertEquals("(EQUALS dt 2015-05-05)", leaf.toString()); }
@Test public void TestBigintSarg() throws Exception { String serialAst = "AQEAamF2YS51dGlsLkFycmF5TGlz9AECAQFvcmcuYXBhY2hlLmhhZG9vcC5oaXZlLnFsLnBsYW4uRXh" + "wck5vZGVDb2x1bW5EZXPjAQFi6QAAAWJpZ29y4wECb3JnLmFwYWNoZS5oYWRvb3AuaGl2ZS5zZXJkZT" + "IudHlwZWluZm8uUHJpbWl0aXZlVHlwZUluZu8BAWJpZ2lu9AEDb3JnLmFwYWNoZS5oYWRvb3AuaGl2Z" + "S5xbC5wbGFuLkV4cHJOb2RlQ29uc3RhbnREZXPjAQECBwnywAEBBG9yZy5hcGFjaGUuaGFkb29wLmhp" + "dmUucWwudWRmLmdlbmVyaWMuR2VuZXJpY1VERk9QRXF1YewBAAABgj0BRVFVQcwBBW9yZy5hcGFjaGU" + "uaGFkb29wLmlvLkJvb2xlYW5Xcml0YWJs5QEAAAECAQFib29sZWHu"; SearchArgument sarg = new ConvertAstToSearchArg(conf, SerializationUtilities.deserializeExpression(serialAst)) .buildSearchArgument(); assertEquals("leaf-0", sarg.getExpression().toString()); assertEquals(1, sarg.getLeaves().size()); PredicateLeaf leaf = sarg.getLeaves().get(0); assertEquals(PredicateLeaf.Type.LONG, leaf.getType()); assertEquals("(EQUALS bi 12345)", leaf.toString()); }
@Test public void TestDecimalSarg() throws Exception { String serialAst = "AQEAamF2YS51dGlsLkFycmF5TGlz9AECAQFvcmcuYXBhY2hlLmhhZG9vcC5oaXZlLnFsLnBsYW4uRXh" + "wck5vZGVDb2x1bW5EZXPjAQFkZeMAAAFiaWdvcuMBAm9yZy5hcGFjaGUuaGFkb29wLmhpdmUuc2VyZG" + "UyLnR5cGVpbmZvLkRlY2ltYWxUeXBlSW5m7wEUAAFkZWNpbWHsAQNvcmcuYXBhY2hlLmhhZG9vcC5oa" + "XZlLnFsLnBsYW4uRXhwck5vZGVDb25zdGFudERlc+MBAQRvcmcuYXBhY2hlLmhhZG9vcC5oaXZlLnNl" + "cmRlMi50eXBlaW5mby5QcmltaXRpdmVUeXBlSW5m7wEBaW70AvYBAQVvcmcuYXBhY2hlLmhhZG9vcC5" + "oaXZlLnFsLnVkZi5nZW5lcmljLkdlbmVyaWNVREZPUEVxdWHsAQAAAYI9AUVRVUHMAQZvcmcuYXBhY2" + "hlLmhhZG9vcC5pby5Cb29sZWFuV3JpdGFibOUBAAABBAEBYm9vbGVh7g=="; SearchArgument sarg = new ConvertAstToSearchArg(conf, SerializationUtilities.deserializeExpression(serialAst)) .buildSearchArgument(); assertEquals("leaf-0", sarg.getExpression().toString()); assertEquals(1, sarg.getLeaves().size()); PredicateLeaf leaf = sarg.getLeaves().get(0); assertEquals(PredicateLeaf.Type.DECIMAL, leaf.getType()); assertEquals("(EQUALS dec 123)", leaf.toString()); }
@Test public void TestCharSarg() throws Exception { String serialAst = "AQEAamF2YS51dGlsLkFycmF5TGlz9AECAQFvcmcuYXBhY2hlLmhhZG9vcC5oaXZlLnFsLnBsYW4uRXh" + "wck5vZGVDb2x1bW5EZXPjAQFj6AAAAWJpZ29y4wECb3JnLmFwYWNoZS5oYWRvb3AuaGl2ZS5zZXJkZT" + "IudHlwZWluZm8uQ2hhclR5cGVJbmbvARQBY2hh8gEDb3JnLmFwYWNoZS5oYWRvb3AuaGl2ZS5xbC5wb" + "GFuLkV4cHJOb2RlQ29uc3RhbnREZXPjAQEEb3JnLmFwYWNoZS5oYWRvb3AuaGl2ZS5zZXJkZTIudHlw" + "ZWluZm8uUHJpbWl0aXZlVHlwZUluZu8BAXN0cmlu5wMBY2hhciAgICAgoAEFb3JnLmFwYWNoZS5oYWR" + "vb3AuaGl2ZS5xbC51ZGYuZ2VuZXJpYy5HZW5lcmljVURGT1BFcXVh7AEAAAGCPQFFUVVBzAEGb3JnLm" + "FwYWNoZS5oYWRvb3AuaW8uQm9vbGVhbldyaXRhYmzlAQAAAQQBAWJvb2xlYe4="; SearchArgument sarg = new ConvertAstToSearchArg(conf, SerializationUtilities.deserializeExpression(serialAst)) .buildSearchArgument(); assertEquals("leaf-0", sarg.getExpression().toString()); assertEquals(1, sarg.getLeaves().size()); PredicateLeaf leaf = sarg.getLeaves().get(0); assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); assertEquals("(EQUALS ch char )", leaf.toString()); }
@Test public void TestVarcharSarg() throws Exception { String serialAst = "AQEAamF2YS51dGlsLkFycmF5TGlz9AECAQFvcmcuYXBhY2hlLmhhZG9vcC5oaXZlLnFsLnBsYW4uRXh" + "wck5vZGVDb2x1bW5EZXPjAQF24wAAAWJpZ29y4wECb3JnLmFwYWNoZS5oYWRvb3AuaGl2ZS5zZXJkZT" + "IudHlwZWluZm8uVmFyY2hhclR5cGVJbmbvAcgBAXZhcmNoYfIBA29yZy5hcGFjaGUuaGFkb29wLmhpd" + "mUucWwucGxhbi5FeHByTm9kZUNvbnN0YW50RGVz4wEBBG9yZy5hcGFjaGUuaGFkb29wLmhpdmUuc2Vy" + "ZGUyLnR5cGVpbmZvLlByaW1pdGl2ZVR5cGVJbmbvAQFzdHJpbucDAXZhcmlhYmzlAQVvcmcuYXBhY2h" + "lLmhhZG9vcC5oaXZlLnFsLnVkZi5nZW5lcmljLkdlbmVyaWNVREZPUEVxdWHsAQAAAYI9AUVRVUHMAQ" + "ZvcmcuYXBhY2hlLmhhZG9vcC5pby5Cb29sZWFuV3JpdGFibOUBAAABBAEBYm9vbGVh7g=="; SearchArgument sarg = new ConvertAstToSearchArg(conf, SerializationUtilities.deserializeExpression(serialAst)) .buildSearchArgument(); assertEquals("leaf-0", sarg.getExpression().toString()); assertEquals(1, sarg.getLeaves().size()); PredicateLeaf leaf = sarg.getLeaves().get(0); assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); assertEquals("(EQUALS vc variable)", leaf.toString()); }
@Test public void TestDoubleSarg() throws Exception { String serialAst = "AQEAamF2YS51dGlsLkFycmF5TGlz9AECAQFvcmcuYXBhY2hlLmhhZG9vcC5oaXZlLnFsLnBsYW4uRXh" + "wck5vZGVDb2x1bW5EZXPjAQFkYuwAAAFiaWdvcuMBAm9yZy5hcGFjaGUuaGFkb29wLmhpdmUuc2VyZG" + "UyLnR5cGVpbmZvLlByaW1pdGl2ZVR5cGVJbmbvAQFkb3VibOUBA29yZy5hcGFjaGUuaGFkb29wLmhpd" + "mUucWwucGxhbi5FeHByTm9kZUNvbnN0YW50RGVz4wEBAgcKQAGZmZmZmZoBBG9yZy5hcGFjaGUuaGFk" + "b29wLmhpdmUucWwudWRmLmdlbmVyaWMuR2VuZXJpY1VERk9QRXF1YewBAAABgj0BRVFVQcwBBW9yZy5" + "hcGFjaGUuaGFkb29wLmlvLkJvb2xlYW5Xcml0YWJs5QEAAAECAQFib29sZWHu"; SearchArgument sarg = new ConvertAstToSearchArg(conf, SerializationUtilities.deserializeExpression(serialAst)) .buildSearchArgument(); assertEquals("leaf-0", sarg.getExpression().toString()); assertEquals(1, sarg.getLeaves().size()); PredicateLeaf leaf = sarg.getLeaves().get(0); assertEquals(PredicateLeaf.Type.FLOAT, leaf.getType()); assertEquals("(EQUALS dbl 2.2)", leaf.toString()); } }
@Test public void testTimestampSerialization() throws Exception { // There is a kryo which after serialize/deserialize, // Timestamp becomes Date. We get around this issue in // SearchArgumentImpl.getLiteral. Once kryo fixed the issue // We can simplify SearchArgumentImpl.getLiteral Timestamp now = new Timestamp(new java.util.Date().getTime()); SearchArgument sarg = SearchArgumentFactory.newBuilder() .startAnd() .lessThan("x", PredicateLeaf.Type.TIMESTAMP, now) .end() .build(); String serializedSarg = TestInputOutputFormat.toKryo(sarg); SearchArgument sarg2 = ConvertAstToSearchArg.create(serializedSarg); Field literalField = PredicateLeafImpl.class.getDeclaredField("literal"); literalField.setAccessible(true); assertTrue(literalField.get(sarg2.getLeaves().get(0)) instanceof java.util.Date); Timestamp ts = (Timestamp)sarg2.getLeaves().get(0).getLiteral(); assertEquals(ts, now); }
@Test public void TestTimestampSarg() throws Exception { String serialAst = "AQEAamF2YS51dGlsLkFycmF5TGlz9AECAQFvcmcuYXBhY2hlLmhhZG9vcC5oaXZlLnFsLn" + "BsYW4uRXhwck5vZGVDb2x1bW5EZXPjAQF08wAAAWJpZ29y4wECb3JnLmFwYWNoZS5o" + "YWRvb3AuaGl2ZS5zZXJkZTIudHlwZWluZm8uUHJpbWl0aXZlVHlwZUluZu8BAXRpbW" + "VzdGFt8AEDb3JnLmFwYWNoZS5oYWRvb3AuaGl2ZS5xbC5wbGFuLkV4cHJOb2RlQ29u" + "c3RhbnREZXPjAQECAQFzdHJpbucDATIwMTUtMDMtMTcgMTI6MzQ6NbYBBG9yZy5hcG" + "FjaGUuaGFkb29wLmhpdmUucWwudWRmLmdlbmVyaWMuR2VuZXJpY1VERk9QRXF1YewB" + "AAABgj0BRVFVQcwBBW9yZy5hcGFjaGUuaGFkb29wLmlvLkJvb2xlYW5Xcml0YWJs5Q" + "EAAAECAQFib29sZWHu"; SearchArgument sarg = new ConvertAstToSearchArg(conf, SerializationUtilities.deserializeExpression(serialAst)) .buildSearchArgument(); assertEquals("leaf-0", sarg.getExpression().toString()); assertEquals(1, sarg.getLeaves().size()); PredicateLeaf leaf = sarg.getLeaves().get(0); assertEquals(PredicateLeaf.Type.TIMESTAMP, leaf.getType()); assertEquals("(EQUALS ts 2015-03-17 12:34:56.0)", leaf.toString()); }
private static boolean isStripeSatisfyPredicate( StripeStatistics stripeStatistics, SearchArgument sarg, int[] filterColumns, final SchemaEvolution evolution) { List<PredicateLeaf> predLeaves = sarg.getLeaves(); TruthValue[] truthValues = new TruthValue[predLeaves.size()]; for (int pred = 0; pred < truthValues.length; pred++) {
assertEquals(null, colNames[4]); SearchArgument arg = options.getSearchArgument(); List<PredicateLeaf> leaves = arg.getLeaves(); assertEquals("cost", leaves.get(0).getColumnName()); assertEquals(PredicateLeaf.Operator.IS_NULL, leaves.get(0).getOperator());