private void pushFilters(final JobConf jobConf, RowSchema rowSchema, ExprNodeGenericFuncDesc filterExpr) { // construct column name list for reference by filter push down Utilities.setColumnNameList(jobConf, rowSchema); // push down filters if (filterExpr == null) { LOG.debug("Not pushing filters because FilterExpr is null"); return; } final String filterText = filterExpr.getExprString(); final String filterExprSerialized = SerializationUtilities.serializeExpression(filterExpr); jobConf.set( TableScanDesc.FILTER_TEXT_CONF_STR, filterText); jobConf.set( TableScanDesc.FILTER_EXPR_CONF_STR, filterExprSerialized); }
private void pushFilters(final JobConf jobConf, RowSchema rowSchema, ExprNodeGenericFuncDesc filterExpr) { // construct column name list for reference by filter push down Utilities.setColumnNameList(jobConf, rowSchema); // push down filters if (filterExpr == null) { LOG.debug("Not pushing filters because FilterExpr is null"); return; } final String filterText = filterExpr.getExprString(); final String filterExprSerialized = SerializationUtilities.serializeExpression(filterExpr); jobConf.set( TableScanDesc.FILTER_TEXT_CONF_STR, filterText); jobConf.set( TableScanDesc.FILTER_EXPR_CONF_STR, filterExprSerialized); }
private void createTestSarg( ObjectInspector inspector, GenericUDF udf, List<ExprNodeDesc> childExpr) { childExpr.add(new ExprNodeColumnDesc(Long.class, "userid", "T", false)); childExpr.add(new ExprNodeConstantDesc(100)); conf.set("hive.io.filter.expr.serialized", SerializationUtilities.serializeExpression( new ExprNodeGenericFuncDesc(inspector, udf, childExpr))); }
serializedFilterExpr = SerializationUtilities.serializeExpression(filterExpr);
SerializationUtilities.serializeExpression(ts.getConf().getFilterExpr()));
serializedFilterExpr = SerializationUtilities.serializeExpression(filterExpr);
SerializationUtilities.serializeExpression(ts.getConf().getFilterExpr()));
@Test public void computeOptimizedScanOrAndCombinedFilter() { KafkaScanTrimmer kafkaScanTrimmer = new KafkaScanTrimmer(fullHouse, null); // partition = 0 and 30 <= offset < 35 or partition = 3 and 35 <= offset < 75 or (partition = 0 and offset = 40) ExprNodeGenericFuncDesc part1 = and(Lists.newArrayList(greaterThanEq(Lists.newArrayList(offsetColumn, thirtyLong)), eq(Lists.newArrayList(partitionColumn, zeroInt)), lessThan(Lists.newArrayList(offsetColumn, thirtyFiveLong)))); ExprNodeGenericFuncDesc part2 = and(Lists.newArrayList(greaterThanEq(Lists.newArrayList(offsetColumn, thirtyFiveLong)), eq(Lists.newArrayList(partitionColumn, threeInt)), lessThan(Lists.newArrayList(offsetColumn, seventyFiveLong)))); ExprNodeGenericFuncDesc part3 = and(Lists.newArrayList(eq(Lists.newArrayList(offsetColumn, fortyLong)), eq(Lists.newArrayList(partitionColumn, zeroInt)))); ExprNodeGenericFuncDesc orExpression = or(Lists.newArrayList(part1, part2, part3)); assertNotNull(orExpression); Map actual = kafkaScanTrimmer.computeOptimizedScan(SerializationUtilities .deserializeExpression(SerializationUtilities.serializeExpression(orExpression))); TopicPartition tpZero = new TopicPartition(topic, 0); TopicPartition toThree = new TopicPartition(topic, 3); KafkaInputSplit split1 = new KafkaInputSplit(topic, 0, 30, 41, PATH); KafkaInputSplit split2 = new KafkaInputSplit(topic, 3, 35, 75, PATH); Map expected = ImmutableMap.of(tpZero, split1, toThree, split2); Assert.assertEquals(expected, actual); }
@Test public void testSerializeTimestamp() { Timestamp ts = Timestamp.ofEpochMilli(1374554702000L, 123456); ExprNodeConstantDesc constant = new ExprNodeConstantDesc(ts); List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>(1); children.add(constant); ExprNodeGenericFuncDesc desc = new ExprNodeGenericFuncDesc(TypeInfoFactory.timestampTypeInfo, new GenericUDFFromUtcTimestamp(), children); assertEquals(desc.getExprString(), SerializationUtilities.deserializeExpression( SerializationUtilities.serializeExpression(desc)).getExprString()); }
actual = kafkaScanTrimmer.computeOptimizedScan(SerializationUtilities .deserializeExpression(SerializationUtilities.serializeExpression(falseFilter))); Assert.assertTrue(actual.isEmpty()); actual = kafkaScanTrimmer.computeOptimizedScan(SerializationUtilities .deserializeExpression(SerializationUtilities.serializeExpression(falseFilter2))); Assert.assertTrue(actual.isEmpty()); actual = kafkaScanTrimmer.computeOptimizedScan(SerializationUtilities .deserializeExpression(SerializationUtilities.serializeExpression(filter3))); Assert.assertTrue(actual.isEmpty()); actual = kafkaScanTrimmer.computeOptimizedScan(SerializationUtilities .deserializeExpression(SerializationUtilities.serializeExpression(filter4))); Assert.assertTrue(actual.isEmpty());
@Test public void computeOptimizedScanPartitionOrAndCombinedFilter() { KafkaScanTrimmer kafkaScanTrimmer = new KafkaScanTrimmer(fullHouse, null); // partition = 1 or (partition >2 and <= 3) ExprNodeGenericFuncDesc eq = eq(Lists.newArrayList(partitionColumn, ConstantExprBuilder.build(1))); ExprNodeGenericFuncDesc lessEq = lessThanEq(Lists.newArrayList(partitionColumn, ConstantExprBuilder.build(3))); ExprNodeGenericFuncDesc greater = greaterThan(Lists.newArrayList(partitionColumn, ConstantExprBuilder.build(2))); ExprNodeGenericFuncDesc orNode = or(Lists.newArrayList(and(Lists.newArrayList(lessEq, greater)), eq)); Map actual = kafkaScanTrimmer.computeOptimizedScan(SerializationUtilities .deserializeExpression(SerializationUtilities.serializeExpression(orNode))); Map expected = Maps.filterValues(fullHouse, tp -> Objects.requireNonNull(tp).getPartition() == 1 || tp.getPartition() == 3); Assert.assertEquals(expected, actual); assertNotNull(orNode); }
childExpr.set(1, con); en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr); sargStr = SerializationUtilities.serializeExpression(en); conf.set("hive.io.filter.expr.serialized", sargStr); splits = in.getSplits(conf, 1); childExpr.set(1, con); en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr); sargStr = SerializationUtilities.serializeExpression(en); conf.set("hive.io.filter.expr.serialized", sargStr); splits = in.getSplits(conf, 1); childExpr.set(1, con); en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr); sargStr = SerializationUtilities.serializeExpression(en); conf.set("hive.io.filter.expr.serialized", sargStr); splits = in.getSplits(conf, 1); childExpr.set(1, con); en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr); sargStr = SerializationUtilities.serializeExpression(en); conf.set("hive.io.filter.expr.serialized", sargStr); splits = in.getSplits(conf, 1); childExpr.set(1, con); en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr); sargStr = SerializationUtilities.serializeExpression(en); conf.set("hive.io.filter.expr.serialized", sargStr); splits = in.getSplits(conf, 1);
childExpr.set(1, con); en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr); sargStr = SerializationUtilities.serializeExpression(en); conf.set("hive.io.filter.expr.serialized", sargStr); splits = in.getSplits(conf, 1); childExpr.set(1, con); en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr); sargStr = SerializationUtilities.serializeExpression(en); conf.set("hive.io.filter.expr.serialized", sargStr); splits = in.getSplits(conf, 1); childExpr.set(1, con); en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr); sargStr = SerializationUtilities.serializeExpression(en); conf.set("hive.io.filter.expr.serialized", sargStr); splits = in.getSplits(conf, 1); childExpr.set(1, con); en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr); sargStr = SerializationUtilities.serializeExpression(en); conf.set("hive.io.filter.expr.serialized", sargStr); splits = in.getSplits(conf, 1); childExpr.set(1, con); en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr); sargStr = SerializationUtilities.serializeExpression(en); conf.set("hive.io.filter.expr.serialized", sargStr); splits = in.getSplits(conf, 1);
children.add(constantDesc); ExprNodeGenericFuncDesc genericFuncDesc = new ExprNodeGenericFuncDesc(inspector, udf, children); String searchArgumentStr = SerializationUtilities.serializeExpression(genericFuncDesc); conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, searchArgumentStr); children.set(1, constantDesc); genericFuncDesc = new ExprNodeGenericFuncDesc(inspector, udf, children); searchArgumentStr = SerializationUtilities.serializeExpression(genericFuncDesc); conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, searchArgumentStr);
ExprNodeGenericFuncDesc en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2); String sargStr = SerializationUtilities.serializeExpression(en2); conf.set("hive.io.filter.expr.serialized", sargStr); InputSplit[] splits = in.getSplits(conf, 1); en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2); sargStr = SerializationUtilities.serializeExpression(en2); conf.set("hive.io.filter.expr.serialized", sargStr); splits = in.getSplits(conf, 1); en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2); sargStr = SerializationUtilities.serializeExpression(en2); conf.set("hive.io.filter.expr.serialized", sargStr); splits = in.getSplits(conf, 1); en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2); sargStr = SerializationUtilities.serializeExpression(en2); conf.set("hive.io.filter.expr.serialized", sargStr); splits = in.getSplits(conf, 1); sargStr = SerializationUtilities.serializeExpression(en2); conf.set("hive.io.filter.expr.serialized", sargStr); splits = in.getSplits(conf, 1);
@Test public void computeOptimizedScanPartitionBinaryOpFilter() { KafkaScanTrimmer kafkaScanTrimmer = new KafkaScanTrimmer(fullHouse, null); int partitionId = 2; ExprNodeDesc constant = ConstantExprBuilder.build(partitionId); final List<ExprNodeDesc> children = Lists.newArrayList(partitionColumn, constant); ExprNodeGenericFuncDesc node = eq(children); assertNotNull(node); Map actual = kafkaScanTrimmer.computeOptimizedScan(SerializationUtilities .deserializeExpression(SerializationUtilities.serializeExpression(node))); Map expected = Maps.filterValues(fullHouse, tp -> Objects.requireNonNull(tp).getPartition() == partitionId); Assert.assertEquals(expected, actual); ExprNodeGenericFuncDesc lessNode = lessThan(children); assertNotNull(lessNode); actual = kafkaScanTrimmer.computeOptimizedScan(SerializationUtilities .deserializeExpression(SerializationUtilities.serializeExpression(lessNode))); expected = Maps.filterValues(fullHouse, tp -> Objects.requireNonNull(tp).getPartition() < partitionId); Assert.assertEquals(expected, actual); ExprNodeGenericFuncDesc lessEqNode = lessThanEq(children); assertNotNull(lessEqNode); actual = kafkaScanTrimmer.computeOptimizedScan(SerializationUtilities .deserializeExpression(SerializationUtilities.serializeExpression(lessEqNode))); expected = Maps.filterValues(fullHouse, tp -> Objects.requireNonNull(tp).getPartition() <= partitionId); Assert.assertEquals(expected, actual); }
OrcInputFormatForTest.caches.resetCounts(); childExpr.set(1, new ExprNodeConstantDesc(5)); conf.set("hive.io.filter.expr.serialized", SerializationUtilities.serializeExpression( new ExprNodeGenericFuncDesc(inspector, udf, childExpr))); splits = in.getSplits(conf, -1);