DataFrame happyPeopleSchemaRDD = sqlCtx.applySchema(happyPeopleRDD, HappyPerson.class); happyPeopleSchemaRDD.registerTempTable("happy_people"); sqlCtx.udf().register("stringLengthJava", new UDF1<String, Integer>() { @Override public Integer call(String str) throws Exception {
@SuppressWarnings("unchecked") @Test public void udf2Test() { spark.udf().register("stringLengthTest", (String str1, String str2) -> str1.length() + str2.length(), DataTypes.IntegerType); Row result = spark.sql("SELECT stringLengthTest('test', 'test2')").head(); Assert.assertEquals(9, result.getInt(0)); }
@SuppressWarnings("unchecked") @Test public void udf2Test() { spark.udf().register("stringLengthTest", (String str1, String str2) -> str1.length() + str2.length(), DataTypes.IntegerType); Row result = spark.sql("SELECT stringLengthTest('test', 'test2')").head(); Assert.assertEquals(9, result.getInt(0)); }
@SuppressWarnings("unchecked") @Test public void udf2Test() { spark.udf().register("stringLengthTest", (String str1, String str2) -> str1.length() + str2.length(), DataTypes.IntegerType); Row result = spark.sql("SELECT stringLengthTest('test', 'test2')").head(); Assert.assertEquals(9, result.getInt(0)); }
@SuppressWarnings("unchecked") @Test(expected = AnalysisException.class) public void udf5Test() { spark.udf().register("inc", (Long i) -> i + 1, DataTypes.LongType); List<Row> results = spark.sql("SELECT inc(1, 5)").collectAsList(); }
@SuppressWarnings("unchecked") @Test(expected = AnalysisException.class) public void udf5Test() { spark.udf().register("inc", (Long i) -> i + 1, DataTypes.LongType); List<Row> results = spark.sql("SELECT inc(1, 5)").collectAsList(); }
@SuppressWarnings("unchecked") @Test public void udf1Test() { spark.udf().register("stringLengthTest", (String str) -> str.length(), DataTypes.IntegerType); Row result = spark.sql("SELECT stringLengthTest('test')").head(); Assert.assertEquals(4, result.getInt(0)); }
@SuppressWarnings("unchecked") @Test public void udf1Test() { spark.udf().register("stringLengthTest", (String str) -> str.length(), DataTypes.IntegerType); Row result = spark.sql("SELECT stringLengthTest('test')").head(); Assert.assertEquals(4, result.getInt(0)); }
@SuppressWarnings("unchecked") @Test public void udf1Test() { spark.udf().register("stringLengthTest", (String str) -> str.length(), DataTypes.IntegerType); Row result = spark.sql("SELECT stringLengthTest('test')").head(); Assert.assertEquals(4, result.getInt(0)); }
@SuppressWarnings("unchecked") @Test public void udf6Test() { spark.udf().register("returnOne", () -> 1, DataTypes.IntegerType); Row result = spark.sql("SELECT returnOne()").head(); Assert.assertEquals(1, result.getInt(0)); } }
@SuppressWarnings("unchecked") @Test public void udf6Test() { spark.udf().register("returnOne", () -> 1, DataTypes.IntegerType); Row result = spark.sql("SELECT returnOne()").head(); Assert.assertEquals(1, result.getInt(0)); } }
@SuppressWarnings("unchecked") @Test public void udf4Test() { spark.udf().register("inc", (Long i) -> i + 1, DataTypes.LongType); spark.range(10).toDF("x").createOrReplaceTempView("tmp"); // This tests when Java UDFs are required to be the semantically same (See SPARK-9435). List<Row> results = spark.sql("SELECT inc(x) FROM tmp GROUP BY inc(x)").collectAsList(); Assert.assertEquals(10, results.size()); long sum = 0; for (Row result : results) { sum += result.getLong(0); } Assert.assertEquals(55, sum); }
@SuppressWarnings("unchecked") @Test public void udf4Test() { spark.udf().register("inc", (Long i) -> i + 1, DataTypes.LongType); spark.range(10).toDF("x").createOrReplaceTempView("tmp"); // This tests when Java UDFs are required to be the semantically same (See SPARK-9435). List<Row> results = spark.sql("SELECT inc(x) FROM tmp GROUP BY inc(x)").collectAsList(); Assert.assertEquals(10, results.size()); long sum = 0; for (Row result : results) { sum += result.getLong(0); } Assert.assertEquals(55, sum); } }
@SuppressWarnings("unchecked") @Test public void udf4Test() { spark.udf().register("inc", (Long i) -> i + 1, DataTypes.LongType); spark.range(10).toDF("x").createOrReplaceTempView("tmp"); // This tests when Java UDFs are required to be the semantically same (See SPARK-9435). List<Row> results = spark.sql("SELECT inc(x) FROM tmp GROUP BY inc(x)").collectAsList(); Assert.assertEquals(10, results.size()); long sum = 0; for (Row result : results) { sum += result.getLong(0); } Assert.assertEquals(55, sum); }
public void registerUDAF(String name, Class<? extends UserDefinedAggregateFunction> udafClass, SQLContext sqlContext) throws AnalyticsUDFException { try { sqlContext.udf().register(name, udafClass.newInstance()); } catch (InstantiationException | IllegalAccessException e) { throw new AnalyticsUDFException("Error registering UDAF: " + e.getMessage(), e); } }
/** * Pushes an "in_valueset" UDF that uses an already broadcast instance of * {@link BroadcastableValueSets} for its content. * * @param spark the spark session * @param broadcast the broadcast valuesets to use in the UDF */ public static synchronized void pushUdf(SparkSession spark, Broadcast<BroadcastableValueSets> broadcast) { spark.udf() .register("in_valueset", new InValuesetUdf(broadcast), DataTypes.BooleanType); // Push the broadcast variable valueSetStack.push(broadcast); }
/** * Pushes an "in_valueset" UDF that uses an already broadcast instance of * {@link BroadcastableValueSets} for its content. * * @param spark the spark session * @param broadcast the broadcast valuesets to use in the UDF */ public static synchronized void pushUdf(SparkSession spark, Broadcast<BroadcastableValueSets> broadcast) { spark.udf() .register("in_valueset", new InValuesetUdf(broadcast), DataTypes.BooleanType); // Push the broadcast variable valueSetStack.push(broadcast); }
public static void registerGeometryFunctions(SparkSession spark) { // Distance UDF is only exception to GeomFunction interface since it // returns Double spark.udf().register("GeomDistance", geomDistanceInstance, DataTypes.DoubleType); spark.udf().register("GeomFromWKT", geomWKTInstance, GeoWaveSpatialEncoders.geometryUDT); // Register all UDF functions from RegistrySPI UDFNameAndConstructor[] supportedUDFs = UDFRegistrySPI.getSupportedUDFs(); for (int iUDF = 0; iUDF < supportedUDFs.length; iUDF += 1) { UDFNameAndConstructor udf = supportedUDFs[iUDF]; GeomFunction funcInstance = udf.getPredicateConstructor().get(); spark.udf().register(funcInstance.getRegisterName(), funcInstance, DataTypes.BooleanType); } } }
@BeforeClass public static void startSpark() { TestFilteredScan.spark = SparkSession.builder().master("local[2]").getOrCreate(); // define UDFs used by partition tests Transform<Long, Integer> bucket4 = Transforms.bucket(Types.LongType.get(), 4); spark.udf().register("bucket4", (UDF1<Long, Integer>) bucket4::apply, IntegerType$.MODULE$); Transform<Long, Integer> day = Transforms.day(Types.TimestampType.withZone()); spark.udf().register("ts_day", (UDF1<Timestamp, Integer>) timestamp -> day.apply(fromJavaTimestamp(timestamp)), IntegerType$.MODULE$); Transform<Long, Integer> hour = Transforms.hour(Types.TimestampType.withZone()); spark.udf().register("ts_hour", (UDF1<Timestamp, Integer>) timestamp -> hour.apply(fromJavaTimestamp(timestamp)), IntegerType$.MODULE$); Transform<CharSequence, CharSequence> trunc1 = Transforms.truncate(Types.StringType.get(), 1); spark.udf().register("trunc1", (UDF1<CharSequence, CharSequence>) str -> trunc1.apply(str.toString()), StringType$.MODULE$); }
private void start() { SparkSession spark = SparkSession.builder().appName("CSV to Dataset") .master("local").getOrCreate(); spark.udf().register("x2Multiplier", new Multiplier2(), DataTypes.IntegerType); String filename = "data/tuple-data-file.csv"; Dataset<Row> df = spark.read().format("csv").option("inferSchema", "true") .option("header", "false").load(filename); df = df.withColumn("label", df.col("_c0")).drop("_c0"); df = df.withColumn("value", df.col("_c1")).drop("_c1"); df = df.withColumn("x2", callUDF("x2Multiplier", df.col("value").cast( DataTypes.IntegerType))); df.show(); } }