How to use
callUDF
method
in
org.apache.spark.sql.functions

Best Java code snippets using org.apache.spark.sql.functions.callUDF (Showing top 6 results out of 315)

df.printSchema();
df = df.withColumn("features", callUDF("vectorBuilder", df.col(
  "valuefeatures")));
df.printSchema();

private File buildPartitionedTable(String desc, PartitionSpec spec, String udf, String partitionColumn) {
 File location = new File(parent, desc);
 Table byId = TABLES.create(SCHEMA, spec, location.toString());
 // do not combine splits because the tests expect a split per partition
 byId.updateProperties().set("read.split.target-size", "1").commit();
 // copy the unpartitioned table into the partitioned table to produce the partitioned data
 Dataset<Row> allRows = spark.read()
   .format("iceberg")
   .load(unpartitioned.toString());
 allRows
   .coalesce(1) // ensure only 1 file per partition is written
   .withColumn("part", callUDF(udf, column(partitionColumn)))
   .sortWithinPartitions("part")
   .drop("part")
   .write()
   .format("iceberg")
   .mode("append")
   .save(byId.location());
 return location;
}

 private void start() {
  SparkSession spark = SparkSession.builder().appName("CSV to Dataset")
    .master("local").getOrCreate();

  spark.udf().register("x2Multiplier", new Multiplier2(),
    DataTypes.IntegerType);

  String filename = "data/tuple-data-file.csv";
  Dataset<Row> df = spark.read().format("csv").option("inferSchema", "true")
    .option("header", "false").load(filename);
  df = df.withColumn("label", df.col("_c0")).drop("_c0");
  df = df.withColumn("value", df.col("_c1")).drop("_c1");
  df = df.withColumn("x2", callUDF("x2Multiplier", df.col("value").cast(
    DataTypes.IntegerType)));
  df.show();
 }
}

 private void start() {
  SparkSession spark = SparkSession.builder().appName("CSV to Dataset")
    .master("local").getOrCreate();

  // registers a new internal UDF
  spark.udf().register("x2Multiplier", new UDF1<Integer, Integer>() {
   private static final long serialVersionUID = -5372447039252716846L;

   @Override
   public Integer call(Integer x) {
    return x * 2;
   }
  }, DataTypes.IntegerType);

  String filename = "data/tuple-data-file.csv";
  Dataset<Row> df = spark.read().format("csv").option("inferSchema", "true")
    .option("header", "false").load(filename);
  df = df.withColumn("label", df.col("_c0")).drop("_c0");
  df = df.withColumn("value", df.col("_c1")).drop("_c1");
  df = df.withColumn("x2", callUDF("x2Multiplier", df.col("value").cast(
    DataTypes.IntegerType)));
  df.show();
 }
}

 @Test
 public void testUDAF() {
  Dataset<Row> df = hc.range(0, 100).union(hc.range(0, 100)).select(col("id").as("value"));
  UserDefinedAggregateFunction udaf = new MyDoubleSum();
  UserDefinedAggregateFunction registeredUDAF = hc.udf().register("mydoublesum", udaf);
  // Create Columns for the UDAF. For now, callUDF does not take an argument to specific if
  // we want to use distinct aggregation.
  Dataset<Row> aggregatedDF =
   df.groupBy()
    .agg(
     udaf.distinct(col("value")),
     udaf.apply(col("value")),
     registeredUDAF.apply(col("value")),
     callUDF("mydoublesum", col("value")));

  List<Row> expectedResult = new ArrayList<>();
  expectedResult.add(RowFactory.create(4950.0, 9900.0, 9900.0, 9900.0));
  checkAnswer(
   aggregatedDF,
   expectedResult);
 }
}

 @Test
 public void testUDAF() {
  Dataset<Row> df = hc.range(0, 100).union(hc.range(0, 100)).select(col("id").as("value"));
  UserDefinedAggregateFunction udaf = new MyDoubleSum();
  UserDefinedAggregateFunction registeredUDAF = hc.udf().register("mydoublesum", udaf);
  // Create Columns for the UDAF. For now, callUDF does not take an argument to specific if
  // we want to use distinct aggregation.
  Dataset<Row> aggregatedDF =
   df.groupBy()
    .agg(
     udaf.distinct(col("value")),
     udaf.apply(col("value")),
     registeredUDAF.apply(col("value")),
     callUDF("mydoublesum", col("value")));

  List<Row> expectedResult = new ArrayList<>();
  expectedResult.add(RowFactory.create(4950.0, 9900.0, 9900.0, 9900.0));
  checkAnswer(
   aggregatedDF,
   expectedResult);
 }
}

Popular methods of functions

Popular in Java

Reactive rest calls using spring rest template
getSharedPreferences (Context)
getContentResolver (Context)
getApplicationContext (Context)
Thread (java.lang)
A thread is a thread of execution in a program. The Java Virtual Machine allows an application to ha
BigDecimal (java.math)
An immutable arbitrary-precision signed decimal.A value is represented by an arbitrary-precision "un
MessageDigest (java.security)
Uses a one-way hash function to turn an arbitrary number of bytes into a fixed-length byte sequence.
ReentrantLock (java.util.concurrent.locks)
A reentrant mutual exclusion Lock with the same basic behavior and semantics as the implicit monitor
Logger (org.slf4j)
The org.slf4j.Logger interface is the main user entry point of SLF4J API. It is expected that loggin
JButton (javax.swing)
From CI to AI: The AI layer in your organization

How to use callUDFmethodin org.apache.spark.sql.functions

Best Java code snippets using org.apache.spark.sql.functions.callUDF (Showing top 6 results out of 315)

How to use
callUDF
method
in
org.apache.spark.sql.functions