DataFrame happyPeopleSchemaRDD = sqlCtx.applySchema(happyPeopleRDD, HappyPerson.class); happyPeopleSchemaRDD.registerTempTable("happy_people"); sqlCtx.udf().register("stringLengthJava", new UDF1<String, Integer>() { @Override public Integer call(String str) throws Exception {
public void registerUDAF(String name, Class<? extends UserDefinedAggregateFunction> udafClass, SQLContext sqlContext) throws AnalyticsUDFException { try { sqlContext.udf().register(name, udafClass.newInstance()); } catch (InstantiationException | IllegalAccessException e) { throw new AnalyticsUDFException("Error registering UDAF: " + e.getMessage(), e); } }
case 0: { UDF1 udfAdapter = new UDF0Adaptor(udfClass, methodName, parameterTypes); sqlContext.udf().register(methodName, udfAdapter, AnalyticsCommonUtils.getDataType(returnType)); break; sqlContext.udf().register(methodName, udfAdapter, AnalyticsCommonUtils.getDataType(returnType)); break; sqlContext.udf().register(methodName, udfAdapter, AnalyticsCommonUtils.getDataType(returnType)); break; sqlContext.udf().register(methodName, udfAdapter, AnalyticsCommonUtils.getDataType(returnType)); break; sqlContext.udf().register(methodName, udfAdapter, AnalyticsCommonUtils.getDataType(returnType)); break; sqlContext.udf().register(methodName, udfAdapter, AnalyticsCommonUtils.getDataType(returnType)); break; sqlContext.udf().register(methodName, udfAdapter, AnalyticsCommonUtils.getDataType(returnType)); break; sqlContext.udf().register(methodName, udfAdapter, AnalyticsCommonUtils.getDataType(returnType)); break; sqlContext.udf().register(methodName, udfAdapter, AnalyticsCommonUtils.getDataType(returnType)); break;
@Test public void testUDAF() { Dataset<Row> df = hc.range(0, 100).union(hc.range(0, 100)).select(col("id").as("value")); UserDefinedAggregateFunction udaf = new MyDoubleSum(); UserDefinedAggregateFunction registeredUDAF = hc.udf().register("mydoublesum", udaf); // Create Columns for the UDAF. For now, callUDF does not take an argument to specific if // we want to use distinct aggregation. Dataset<Row> aggregatedDF = df.groupBy() .agg( udaf.distinct(col("value")), udaf.apply(col("value")), registeredUDAF.apply(col("value")), callUDF("mydoublesum", col("value"))); List<Row> expectedResult = new ArrayList<>(); expectedResult.add(RowFactory.create(4950.0, 9900.0, 9900.0, 9900.0)); checkAnswer( aggregatedDF, expectedResult); } }
@Test public void testUDAF() { Dataset<Row> df = hc.range(0, 100).union(hc.range(0, 100)).select(col("id").as("value")); UserDefinedAggregateFunction udaf = new MyDoubleSum(); UserDefinedAggregateFunction registeredUDAF = hc.udf().register("mydoublesum", udaf); // Create Columns for the UDAF. For now, callUDF does not take an argument to specific if // we want to use distinct aggregation. Dataset<Row> aggregatedDF = df.groupBy() .agg( udaf.distinct(col("value")), udaf.apply(col("value")), registeredUDAF.apply(col("value")), callUDF("mydoublesum", col("value"))); List<Row> expectedResult = new ArrayList<>(); expectedResult.add(RowFactory.create(4950.0, 9900.0, 9900.0, 9900.0)); checkAnswer( aggregatedDF, expectedResult); } }