DataFrame happyPeopleSchemaRDD = sqlCtx.applySchema(happyPeopleRDD, HappyPerson.class); happyPeopleSchemaRDD.registerTempTable("happy_people"); sqlCtx.udf().register("stringLengthJava", new UDF1<String, Integer>() { @Override public Integer call(String str) throws Exception {
@SuppressWarnings("unchecked") @Test public void udf3Test() { spark.udf().registerJava("stringLengthTest", StringLengthTest.class.getName(), DataTypes.IntegerType); Row result = spark.sql("SELECT stringLengthTest('test', 'test2')").head(); Assert.assertEquals(9, result.getInt(0)); // returnType is not provided spark.udf().registerJava("stringLengthTest2", StringLengthTest.class.getName(), null); result = spark.sql("SELECT stringLengthTest('test', 'test2')").head(); Assert.assertEquals(9, result.getInt(0)); }
@SuppressWarnings("unchecked") @Test public void udf1Test() { spark.range(1, 10).toDF("value").createOrReplaceTempView("df"); spark.udf().registerJavaUDAF("myDoubleAvg", MyDoubleAvg.class.getName()); Row result = spark.sql("SELECT myDoubleAvg(value) as my_avg from df").head(); Assert.assertEquals(105.0, result.getDouble(0), 1.0e-6); }
@SuppressWarnings("unchecked") @Test public void udf1Test() { spark.range(1, 10).toDF("value").createOrReplaceTempView("df"); spark.udf().registerJavaUDAF("myDoubleAvg", MyDoubleAvg.class.getName()); Row result = spark.sql("SELECT myDoubleAvg(value) as my_avg from df").head(); Assert.assertEquals(105.0, result.getDouble(0), 1.0e-6); }
@SuppressWarnings("unchecked") @Test public void udf2Test() { spark.udf().register("stringLengthTest", (String str1, String str2) -> str1.length() + str2.length(), DataTypes.IntegerType); Row result = spark.sql("SELECT stringLengthTest('test', 'test2')").head(); Assert.assertEquals(9, result.getInt(0)); }
@SuppressWarnings("unchecked") @Test public void udf3Test() { spark.udf().registerJava("stringLengthTest", StringLengthTest.class.getName(), DataTypes.IntegerType); Row result = spark.sql("SELECT stringLengthTest('test', 'test2')").head(); Assert.assertEquals(9, result.getInt(0)); // returnType is not provided spark.udf().registerJava("stringLengthTest2", StringLengthTest.class.getName(), null); result = spark.sql("SELECT stringLengthTest('test', 'test2')").head(); Assert.assertEquals(9, result.getInt(0)); }
@SuppressWarnings("unchecked") @Test public void udf2Test() { spark.udf().register("stringLengthTest", (String str1, String str2) -> str1.length() + str2.length(), DataTypes.IntegerType); Row result = spark.sql("SELECT stringLengthTest('test', 'test2')").head(); Assert.assertEquals(9, result.getInt(0)); }
@SuppressWarnings("unchecked") @Test public void udf3Test() { spark.udf().registerJava("stringLengthTest", StringLengthTest.class.getName(), DataTypes.IntegerType); Row result = spark.sql("SELECT stringLengthTest('test', 'test2')").head(); Assert.assertEquals(9, result.getInt(0)); // returnType is not provided spark.udf().registerJava("stringLengthTest2", StringLengthTest.class.getName(), null); result = spark.sql("SELECT stringLengthTest('test', 'test2')").head(); Assert.assertEquals(9, result.getInt(0)); }
@SuppressWarnings("unchecked") @Test public void udf2Test() { spark.udf().register("stringLengthTest", (String str1, String str2) -> str1.length() + str2.length(), DataTypes.IntegerType); Row result = spark.sql("SELECT stringLengthTest('test', 'test2')").head(); Assert.assertEquals(9, result.getInt(0)); }
static void initializeUDFs(Config config) { if (!config.hasPath(UDFS_SECTION_CONFIG)) return; ConfigList udfList = config.getList(UDFS_SECTION_CONFIG); for (ConfigValue udfValue : udfList) { ConfigValueType udfValueType = udfValue.valueType(); if (!udfValueType.equals(ConfigValueType.OBJECT)) { throw new RuntimeException("UDF list must contain UDF objects"); } Config udfConfig = ((ConfigObject)udfValue).toConfig(); for (String path : Lists.newArrayList(UDFS_NAME, UDFS_CLASS)) { if (!udfConfig.hasPath(path)) { throw new RuntimeException("UDF entries must provide '" + path + "'"); } } String name = udfConfig.getString(UDFS_NAME); String className = udfConfig.getString(UDFS_CLASS); // null third argument means that registerJava will infer the return type Contexts.getSparkSession().udf().registerJava(name, className, null); LOG.info("Registered Spark SQL UDF: " + name); } }
@SuppressWarnings("unchecked") @Test(expected = AnalysisException.class) public void udf5Test() { spark.udf().register("inc", (Long i) -> i + 1, DataTypes.LongType); List<Row> results = spark.sql("SELECT inc(1, 5)").collectAsList(); }
@SuppressWarnings("unchecked") @Test(expected = AnalysisException.class) public void udf5Test() { spark.udf().register("inc", (Long i) -> i + 1, DataTypes.LongType); List<Row> results = spark.sql("SELECT inc(1, 5)").collectAsList(); }
@SuppressWarnings("unchecked") @Test public void udf1Test() { spark.udf().register("stringLengthTest", (String str) -> str.length(), DataTypes.IntegerType); Row result = spark.sql("SELECT stringLengthTest('test')").head(); Assert.assertEquals(4, result.getInt(0)); }
@SuppressWarnings("unchecked") @Test public void udf1Test() { spark.udf().register("stringLengthTest", (String str) -> str.length(), DataTypes.IntegerType); Row result = spark.sql("SELECT stringLengthTest('test')").head(); Assert.assertEquals(4, result.getInt(0)); }
@SuppressWarnings("unchecked") @Test public void udf1Test() { spark.udf().register("stringLengthTest", (String str) -> str.length(), DataTypes.IntegerType); Row result = spark.sql("SELECT stringLengthTest('test')").head(); Assert.assertEquals(4, result.getInt(0)); }
@SuppressWarnings("unchecked") @Test public void udf6Test() { spark.udf().register("returnOne", () -> 1, DataTypes.IntegerType); Row result = spark.sql("SELECT returnOne()").head(); Assert.assertEquals(1, result.getInt(0)); } }
@SuppressWarnings("unchecked") @Test public void udf6Test() { spark.udf().register("returnOne", () -> 1, DataTypes.IntegerType); Row result = spark.sql("SELECT returnOne()").head(); Assert.assertEquals(1, result.getInt(0)); } }
@SuppressWarnings("unchecked") @Test public void udf4Test() { spark.udf().register("inc", (Long i) -> i + 1, DataTypes.LongType); spark.range(10).toDF("x").createOrReplaceTempView("tmp"); // This tests when Java UDFs are required to be the semantically same (See SPARK-9435). List<Row> results = spark.sql("SELECT inc(x) FROM tmp GROUP BY inc(x)").collectAsList(); Assert.assertEquals(10, results.size()); long sum = 0; for (Row result : results) { sum += result.getLong(0); } Assert.assertEquals(55, sum); }
@SuppressWarnings("unchecked") @Test public void udf4Test() { spark.udf().register("inc", (Long i) -> i + 1, DataTypes.LongType); spark.range(10).toDF("x").createOrReplaceTempView("tmp"); // This tests when Java UDFs are required to be the semantically same (See SPARK-9435). List<Row> results = spark.sql("SELECT inc(x) FROM tmp GROUP BY inc(x)").collectAsList(); Assert.assertEquals(10, results.size()); long sum = 0; for (Row result : results) { sum += result.getLong(0); } Assert.assertEquals(55, sum); }
@SuppressWarnings("unchecked") @Test public void udf4Test() { spark.udf().register("inc", (Long i) -> i + 1, DataTypes.LongType); spark.range(10).toDF("x").createOrReplaceTempView("tmp"); // This tests when Java UDFs are required to be the semantically same (See SPARK-9435). List<Row> results = spark.sql("SELECT inc(x) FROM tmp GROUP BY inc(x)").collectAsList(); Assert.assertEquals(10, results.size()); long sum = 0; for (Row result : results) { sum += result.getLong(0); } Assert.assertEquals(55, sum); } }