@Test public void testFilterWithNegativeScalar() throws HiveException { ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Integer.class, "col1", "table", false); ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(new Integer(-10)); GenericUDFOPGreaterThan udf = new GenericUDFOPGreaterThan(); ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(); exprDesc.setGenericUDF(udf); List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2); children1.add(col1Expr); children1.add(constDesc); exprDesc.setChildren(children1); List<String> columns = new ArrayList<String>(); columns.add("col0"); columns.add("col1"); columns.add("col2"); VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); assertTrue(ve instanceof FilterLongColGreaterLongScalar); }
fnName = "isnotnull"; ((ExprNodeGenericFuncDesc) origin).setGenericUDF( FunctionRegistry.getFunctionInfo(fnName).getGenericUDF()); } else {
public static void replaceNullFiltersWithDefaultPartition(ExprNodeDesc origin, String defaultPartitionName) throws SemanticException { // Convert "ptn_col isnull" to "ptn_col = default_partition" and // "ptn_col isnotnull" to "ptn_col <> default_partition" String fnName = null; if (origin instanceof ExprNodeGenericFuncDesc) { if (((ExprNodeGenericFuncDesc) origin).getGenericUDF() instanceof GenericUDFOPNull) { fnName = "="; } else if (((ExprNodeGenericFuncDesc) origin).getGenericUDF() instanceof GenericUDFOPNotNull) { fnName = "<>"; } } // Found an expression for function "isnull" or "isnotnull" if (fnName != null) { List<ExprNodeDesc> children = origin.getChildren(); assert(children.size() == 1); ExprNodeConstantDesc defaultPartition = new ExprNodeConstantDesc(defaultPartitionName); children.add(defaultPartition); ((ExprNodeGenericFuncDesc) origin).setChildren(children); ((ExprNodeGenericFuncDesc) origin).setGenericUDF( FunctionRegistry.getFunctionInfo(fnName).getGenericUDF()); } else { if (origin.getChildren() != null) { for (ExprNodeDesc child : origin.getChildren()) { replaceNullFiltersWithDefaultPartition(child, defaultPartitionName); } } } }
@Test public void testStringFilterExpressions() throws HiveException { ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(String.class, "col1", "table", false); ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc("Alpha"); GenericUDFOPGreaterThan udf = new GenericUDFOPGreaterThan(); ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(); exprDesc.setGenericUDF(udf); List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2); children1.add(col1Expr); children1.add(constDesc); exprDesc.setChildren(children1); List<String> columns = new ArrayList<String>(); columns.add("col0"); columns.add("col1"); columns.add("col2"); VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); assertTrue(ve instanceof FilterStringGroupColGreaterStringScalar); }
private Timestamp getTimestampScalar(ExprNodeDesc expr) throws HiveException { if (expr instanceof ExprNodeGenericFuncDesc && ((ExprNodeGenericFuncDesc) expr).getGenericUDF() instanceof GenericUDFTimestamp) { return evaluateCastToTimestamp(expr); } if (!(expr instanceof ExprNodeConstantDesc)) { throw new HiveException("Constant timestamp value expected for expression argument. " + "Non-constant argument not supported for vectorization."); } ExprNodeConstantDesc constExpr = (ExprNodeConstantDesc) expr; String constTypeString = constExpr.getTypeString(); if (isStringFamily(constTypeString) || isDatetimeFamily(constTypeString)) { // create expression tree with type cast from string to timestamp ExprNodeGenericFuncDesc expr2 = new ExprNodeGenericFuncDesc(); GenericUDFTimestamp f = new GenericUDFTimestamp(); expr2.setGenericUDF(f); ArrayList<ExprNodeDesc> children = new ArrayList<>(); children.add(expr); expr2.setChildren(children); // initialize and evaluate return evaluateCastToTimestamp(expr2); } throw new HiveException("Udf: unhandled constant type for scalar argument. " + "Expecting string/date/timestamp."); }
@Test public void testFilterScalarCompareColumn() throws HiveException { ExprNodeGenericFuncDesc scalarGreaterColExpr = new ExprNodeGenericFuncDesc(); GenericUDFOPGreaterThan gudf = new GenericUDFOPGreaterThan(); scalarGreaterColExpr.setGenericUDF(gudf); List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>(2); ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(TypeInfoFactory.longTypeInfo, 20); ExprNodeColumnDesc colDesc = new ExprNodeColumnDesc(Long.class, "a", "table", false); children.add(constDesc); children.add(colDesc); scalarGreaterColExpr.setChildren(children); List<String> columns = new ArrayList<String>(); columns.add("a"); VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(scalarGreaterColExpr, VectorExpressionDescriptor.Mode.FILTER); assertEquals(FilterLongScalarGreaterLongColumn.class, ve.getClass()); }
@Test public void testFilterBooleanColumnCompareBooleanScalar() throws HiveException { ExprNodeGenericFuncDesc colEqualScalar = new ExprNodeGenericFuncDesc(); GenericUDFOPEqual gudf = new GenericUDFOPEqual(); colEqualScalar.setGenericUDF(gudf); List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>(2); ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, 20); ExprNodeColumnDesc colDesc = new ExprNodeColumnDesc(Boolean.class, "a", "table", false); children.add(colDesc); children.add(constDesc); colEqualScalar.setChildren(children); List<String> columns = new ArrayList<String>(); columns.add("a"); VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(colEqualScalar, VectorExpressionDescriptor.Mode.FILTER); assertEquals(FilterLongColEqualLongScalar.class, ve.getClass()); }
ExprNodeGenericFuncDesc equalExprDesc = new ExprNodeGenericFuncDesc(); equalExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo); equalExprDesc.setGenericUDF(udf); List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2); children1.add(new ExprNodeColumnDesc(Integer.class, "col2", "T1", false));
private Timestamp getTimestampScalar(ExprNodeDesc expr) throws HiveException { if (expr instanceof ExprNodeGenericFuncDesc && ((ExprNodeGenericFuncDesc) expr).getGenericUDF() instanceof GenericUDFTimestamp) { return evaluateCastToTimestamp(expr); } if (!(expr instanceof ExprNodeConstantDesc)) { throw new HiveException("Constant timestamp value expected for expression argument. " + "Non-constant argument not supported for vectorization."); } ExprNodeConstantDesc constExpr = (ExprNodeConstantDesc) expr; String constTypeString = constExpr.getTypeString(); if (isStringFamily(constTypeString) || isDatetimeFamily(constTypeString)) { // create expression tree with type cast from string to timestamp ExprNodeGenericFuncDesc expr2 = new ExprNodeGenericFuncDesc(); GenericUDFTimestamp f = new GenericUDFTimestamp(); expr2.setGenericUDF(f); ArrayList<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>(); children.add(expr); expr2.setChildren(children); // initialize and evaluate return evaluateCastToTimestamp(expr2); } throw new HiveException("Udf: unhandled constant type for scalar argument. " + "Expecting string/date/timestamp."); }
@Test public void testTimeStampUdfs() throws HiveException { ExprNodeGenericFuncDesc tsFuncExpr = new ExprNodeGenericFuncDesc(); tsFuncExpr.setTypeInfo(TypeInfoFactory.intTypeInfo); ExprNodeColumnDesc colDesc1 = new ExprNodeColumnDesc( TypeInfoFactory.timestampTypeInfo, "a", "table", false); List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>(); children.add(colDesc1); List<String> columns = new ArrayList<String>(); columns.add("b"); columns.add("a"); VectorizationContext vc = new VectorizationContext("name", columns); //UDFYear GenericUDFBridge gudfBridge = new GenericUDFBridge("year", false, UDFYear.class.getName()); tsFuncExpr.setGenericUDF(gudfBridge); tsFuncExpr.setChildren(children); VectorExpression ve = vc.getVectorExpression(tsFuncExpr); Assert.assertEquals(VectorUDFYearTimestamp.class, ve.getClass()); //GenericUDFToUnixTimeStamp GenericUDFToUnixTimeStamp gudf = new GenericUDFToUnixTimeStamp(); tsFuncExpr.setGenericUDF(gudf); tsFuncExpr.setTypeInfo(TypeInfoFactory.longTypeInfo); ve = vc.getVectorExpression(tsFuncExpr); Assert.assertEquals(VectorUDFUnixTimeStampTimestamp.class, ve.getClass()); }
@Test public void testBooleanColumnCompareBooleanScalar() throws HiveException { ExprNodeGenericFuncDesc colEqualScalar = new ExprNodeGenericFuncDesc(); GenericUDFOPEqual gudf = new GenericUDFOPEqual(); colEqualScalar.setGenericUDF(gudf); List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>(2); ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, 20); ExprNodeColumnDesc colDesc = new ExprNodeColumnDesc(Boolean.class, "a", "table", false); children.add(colDesc); children.add(constDesc); colEqualScalar.setChildren(children); colEqualScalar.setTypeInfo(TypeInfoFactory.booleanTypeInfo); List<String> columns = new ArrayList<String>(); columns.add("a"); VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(colEqualScalar, VectorExpressionDescriptor.Mode.PROJECTION); assertEquals(LongColEqualLongScalar.class, ve.getClass()); }
@Test public void testValidateNestedExpressions() { ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Integer.class, "col1", "table", false); ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(new Integer(10)); GenericUDFOPGreaterThan udf = new GenericUDFOPGreaterThan(); ExprNodeGenericFuncDesc greaterExprDesc = new ExprNodeGenericFuncDesc(); greaterExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo); greaterExprDesc.setGenericUDF(udf); List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2); children1.add(col1Expr); children1.add(constDesc); greaterExprDesc.setChildren(children1); FakeGenericUDF udf2 = new FakeGenericUDF(); ExprNodeGenericFuncDesc nonSupportedExpr = new ExprNodeGenericFuncDesc(); nonSupportedExpr.setTypeInfo(TypeInfoFactory.booleanTypeInfo); nonSupportedExpr.setGenericUDF(udf2); GenericUDFOPAnd andUdf = new GenericUDFOPAnd(); ExprNodeGenericFuncDesc andExprDesc = new ExprNodeGenericFuncDesc(); andExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo); andExprDesc.setGenericUDF(andUdf); List<ExprNodeDesc> children3 = new ArrayList<ExprNodeDesc>(2); children3.add(greaterExprDesc); children3.add(nonSupportedExpr); andExprDesc.setChildren(children3); Vectorizer v = new Vectorizer(); v.testSetCurrentBaseWork(new MapWork()); Assert.assertTrue(v.validateExprNodeDesc(andExprDesc, "test", VectorExpressionDescriptor.Mode.FILTER, false)); }
@Test public void testNotNullExpressions() throws HiveException { ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Integer.class, "col1", "table", false); ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(new Integer(10)); GenericUDFOPGreaterThan udf = new GenericUDFOPGreaterThan(); ExprNodeGenericFuncDesc greaterExprDesc = new ExprNodeGenericFuncDesc(); greaterExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo); greaterExprDesc.setGenericUDF(udf); List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2); children1.add(col1Expr); children1.add(constDesc); greaterExprDesc.setChildren(children1); ExprNodeGenericFuncDesc isNotNullExpr = new ExprNodeGenericFuncDesc(); isNotNullExpr.setTypeInfo(TypeInfoFactory.booleanTypeInfo); GenericUDFOPNotNull notNullUdf = new GenericUDFOPNotNull(); isNotNullExpr.setGenericUDF(notNullUdf); List<ExprNodeDesc> childOfNot = new ArrayList<ExprNodeDesc>(); childOfNot.add(greaterExprDesc); isNotNullExpr.setChildren(childOfNot); List<String> columns = new ArrayList<String>(); columns.add("col1"); columns.add("col2"); VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(isNotNullExpr, VectorExpressionDescriptor.Mode.FILTER); assertEquals(ve.getClass(), SelectColumnIsNotNull.class); assertEquals(ve.getChildExpressions()[0].getClass(), LongColGreaterLongScalar.class); ve = vc.getVectorExpression(isNotNullExpr, VectorExpressionDescriptor.Mode.PROJECTION); assertEquals(ve.getClass(), IsNotNull.class); assertEquals(ve.getChildExpressions()[0].getClass(), LongColGreaterLongScalar.class); }
ExprNodeGenericFuncDesc greaterExprDesc = new ExprNodeGenericFuncDesc(); greaterExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo); greaterExprDesc.setGenericUDF(udf); List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2); children1.add(col1Expr); notExpr.setTypeInfo(TypeInfoFactory.booleanTypeInfo); GenericUDFOPNot notUdf = new GenericUDFOPNot(); notExpr.setGenericUDF(notUdf); List<ExprNodeDesc> childOfNot = new ArrayList<ExprNodeDesc>(); childOfNot.add(greaterExprDesc);
ExprNodeGenericFuncDesc greaterExprDesc = new ExprNodeGenericFuncDesc(); greaterExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo); greaterExprDesc.setGenericUDF(udf); List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2); children1.add(col1Expr); ExprNodeGenericFuncDesc lessExprDesc = new ExprNodeGenericFuncDesc(); lessExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo); lessExprDesc.setGenericUDF(udf2); List<ExprNodeDesc> children2 = new ArrayList<ExprNodeDesc>(2); children2.add(col2Expr); ExprNodeGenericFuncDesc andExprDesc = new ExprNodeGenericFuncDesc(); andExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo); andExprDesc.setGenericUDF(andUdf); List<ExprNodeDesc> children3 = new ArrayList<ExprNodeDesc>(2); children3.add(greaterExprDesc); ExprNodeGenericFuncDesc orExprDesc = new ExprNodeGenericFuncDesc(); orExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo); orExprDesc.setGenericUDF(orUdf); List<ExprNodeDesc> children4 = new ArrayList<ExprNodeDesc>(2); children4.add(greaterExprDesc);
ExprNodeGenericFuncDesc greaterExprDesc = new ExprNodeGenericFuncDesc(); greaterExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo); greaterExprDesc.setGenericUDF(udf); List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2); children1.add(col1Expr); isNullExpr.setTypeInfo(TypeInfoFactory.booleanTypeInfo); GenericUDFOPNull isNullUdf = new GenericUDFOPNull(); isNullExpr.setGenericUDF(isNullUdf); List<ExprNodeDesc> childOfIsNull = new ArrayList<ExprNodeDesc>(); childOfIsNull.add(greaterExprDesc);
GenericUDF gudf = new GenericUDFOPPlus(); plusDesc.setGenericUDF(gudf); List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>(); children.add(colDesc1);
GenericUDF inBloomFilterUdf = new GenericUDFInBloomFilter(); inBloomFilterExpr.setTypeInfo(TypeInfoFactory.booleanTypeInfo); inBloomFilterExpr.setGenericUDF(inBloomFilterUdf); List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2); children1.add(colExpr);
VectorizationContext vc = new VectorizationContext("name", columns); GenericUDF stringLower = new GenericUDFLower(); stringUnary.setGenericUDF(stringLower); anotherUnary.setChildren(children2); GenericUDFBridge udfbridge = new GenericUDFBridge("ltrim", false, GenericUDFLTrim.class.getName()); anotherUnary.setGenericUDF(udfbridge);
@Test public void testExprNodeBetweenWithDynamicValue() { ExprNodeDesc notBetween = new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, Boolean.FALSE); ExprNodeColumnDesc colExpr = new ExprNodeColumnDesc(String.class, "col1", "table", false); ExprNodeDesc minExpr = new ExprNodeDynamicValueDesc(new DynamicValue("id1", TypeInfoFactory.stringTypeInfo)); ExprNodeDesc maxExpr = new ExprNodeDynamicValueDesc(new DynamicValue("id2", TypeInfoFactory.stringTypeInfo)); ExprNodeGenericFuncDesc betweenExpr = new ExprNodeGenericFuncDesc(); GenericUDF betweenUdf = new GenericUDFBetween(); betweenExpr.setTypeInfo(TypeInfoFactory.booleanTypeInfo); betweenExpr.setGenericUDF(betweenUdf); List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2); children1.add(notBetween); children1.add(colExpr); children1.add(minExpr); children1.add(maxExpr); betweenExpr.setChildren(children1); Vectorizer v = new Vectorizer(); v.testSetCurrentBaseWork(new MapWork()); boolean valid = v.validateExprNodeDesc(betweenExpr, "Test", Mode.FILTER, false); Assert.assertTrue(valid); } }