org.apache.spark.sql.Dataset.head java code examples

@SuppressWarnings("unchecked")
@Test
public void udf2Test() {
 spark.udf().register("stringLengthTest",
   (String str1, String str2) -> str1.length() + str2.length(), DataTypes.IntegerType);
 Row result = spark.sql("SELECT stringLengthTest('test', 'test2')").head();
 Assert.assertEquals(9, result.getInt(0));
}

@SuppressWarnings("unchecked")
@Test
public void udf2Test() {
 spark.udf().register("stringLengthTest",
   (String str1, String str2) -> str1.length() + str2.length(), DataTypes.IntegerType);
 Row result = spark.sql("SELECT stringLengthTest('test', 'test2')").head();
 Assert.assertEquals(9, result.getInt(0));
}

@SuppressWarnings("unchecked")
@Test
public void udf2Test() {
 spark.udf().register("stringLengthTest",
   (String str1, String str2) -> str1.length() + str2.length(), DataTypes.IntegerType);
 Row result = spark.sql("SELECT stringLengthTest('test', 'test2')").head();
 Assert.assertEquals(9, result.getInt(0));
}

@SuppressWarnings("unchecked")
@Test
public void udf3Test() {
 spark.udf().registerJava("stringLengthTest", StringLengthTest.class.getName(),
   DataTypes.IntegerType);
 Row result = spark.sql("SELECT stringLengthTest('test', 'test2')").head();
 Assert.assertEquals(9, result.getInt(0));
 // returnType is not provided
 spark.udf().registerJava("stringLengthTest2", StringLengthTest.class.getName(), null);
 result = spark.sql("SELECT stringLengthTest('test', 'test2')").head();
 Assert.assertEquals(9, result.getInt(0));
}

@SuppressWarnings("unchecked")
@Test
public void udf3Test() {
 spark.udf().registerJava("stringLengthTest", StringLengthTest.class.getName(),
   DataTypes.IntegerType);
 Row result = spark.sql("SELECT stringLengthTest('test', 'test2')").head();
 Assert.assertEquals(9, result.getInt(0));
 // returnType is not provided
 spark.udf().registerJava("stringLengthTest2", StringLengthTest.class.getName(), null);
 result = spark.sql("SELECT stringLengthTest('test', 'test2')").head();
 Assert.assertEquals(9, result.getInt(0));
}

@SuppressWarnings("unchecked")
@Test
public void udf1Test() {
 spark.udf().register("stringLengthTest", (String str) -> str.length(), DataTypes.IntegerType);
 Row result = spark.sql("SELECT stringLengthTest('test')").head();
 Assert.assertEquals(4, result.getInt(0));
}

@SuppressWarnings("unchecked")
@Test
public void udf3Test() {
 spark.udf().registerJava("stringLengthTest", StringLengthTest.class.getName(),
   DataTypes.IntegerType);
 Row result = spark.sql("SELECT stringLengthTest('test', 'test2')").head();
 Assert.assertEquals(9, result.getInt(0));
 // returnType is not provided
 spark.udf().registerJava("stringLengthTest2", StringLengthTest.class.getName(), null);
 result = spark.sql("SELECT stringLengthTest('test', 'test2')").head();
 Assert.assertEquals(9, result.getInt(0));
}

@SuppressWarnings("unchecked")
@Test
public void udf1Test() {
 spark.udf().register("stringLengthTest", (String str) -> str.length(), DataTypes.IntegerType);
 Row result = spark.sql("SELECT stringLengthTest('test')").head();
 Assert.assertEquals(4, result.getInt(0));
}

@SuppressWarnings("unchecked")
@Test
public void udf1Test() {
 spark.udf().register("stringLengthTest", (String str) -> str.length(), DataTypes.IntegerType);
 Row result = spark.sql("SELECT stringLengthTest('test')").head();
 Assert.assertEquals(4, result.getInt(0));
}

    .mapPartitions(new HBaseWriterFunction(profilerProps), Encoders.INT())
    .agg(sum("value"))
    .head()
    .getLong(0);
LOG.debug("{} profile measurement(s) written to HBase", count);

 @SuppressWarnings("unchecked")
 @Test
 public void udf6Test() {
  spark.udf().register("returnOne", () -> 1, DataTypes.IntegerType);
  Row result = spark.sql("SELECT returnOne()").head();
  Assert.assertEquals(1, result.getInt(0));
 }
}

 @SuppressWarnings("unchecked")
 @Test
 public void udf6Test() {
  spark.udf().register("returnOne", () -> 1, DataTypes.IntegerType);
  Row result = spark.sql("SELECT returnOne()").head();
  Assert.assertEquals(1, result.getInt(0));
 }
}

@SuppressWarnings("unchecked")
@Test
public void udf1Test() {
 spark.range(1, 10).toDF("value").createOrReplaceTempView("df");
 spark.udf().registerJavaUDAF("myDoubleAvg", MyDoubleAvg.class.getName());
 Row result = spark.sql("SELECT myDoubleAvg(value) as my_avg from df").head();
 Assert.assertEquals(105.0, result.getDouble(0), 1.0e-6);
}

@SuppressWarnings("unchecked")
@Test
public void udf1Test() {
 spark.range(1, 10).toDF("value").createOrReplaceTempView("df");
 spark.udf().registerJavaUDAF("myDoubleAvg", MyDoubleAvg.class.getName());
 Row result = spark.sql("SELECT myDoubleAvg(value) as my_avg from df").head();
 Assert.assertEquals(105.0, result.getDouble(0), 1.0e-6);
}

 @Test
 public void testKSTestNamedDistribution() {
  double pThreshold = 0.05;

  // Comparing a standard normal sample to a standard normal distribution
  Row results = KolmogorovSmirnovTest
      .test(dataset, "sample", "norm", 0.0, 1.0).head();
  double pValue1 = results.getDouble(0);
  // Cannot reject null hypothesis
  assert(pValue1 > pThreshold);
 }
}

 @Test
 public void testKSTestNamedDistribution() {
  double pThreshold = 0.05;

  // Comparing a standard normal sample to a standard normal distribution
  Row results = KolmogorovSmirnovTest
      .test(dataset, "sample", "norm", 0.0, 1.0).head();
  double pValue1 = results.getDouble(0);
  // Cannot reject null hypothesis
  assert(pValue1 > pThreshold);
 }
}

/**
 * Returns the concept map with the given uri and version, or null if there is no such map.
 *
 * @param uri the uri of the map to return
 * @param version the version of the map to return
 * @return the specified concept map.
 */
public T getConceptMap(String uri, String version) {
 // Load the concept maps, which may contain zero items
 // if the map does not exist.
 // Typecast necessary to placate the Java compiler calling this Scala function.
 T[] maps = (T[]) this.conceptMaps.filter(
   functions.col("url").equalTo(lit(uri))
     .and(functions.col("version").equalTo(lit(version))))
   .head(1);
 if (maps.length == 0) {
  return null;
 } else {
  T map = maps[0];
  Dataset<Mapping> filteredMappings = getMappings(uri, version);
  addToConceptMap(map, filteredMappings);
  return map;
 }
}

/**
 * Returns the value set with the given uri and version, or null if there is no such value set.
 *
 * @param uri the uri of the value set to return
 * @param version the version of the value set to return
 * @return the specified value set.
 */
public T getValueSet(String uri, String version) {
 // Load the value sets, which may contain zero items if the value set does not exist
 // Typecast necessary to placate the Java compiler calling this Scala function
 T[] valueSets = (T[]) this.valueSets.filter(
   col("url").equalTo(lit(uri))
     .and(col("version").equalTo(lit(version))))
   .head(1);
 if (valueSets.length == 0) {
  return null;
 } else {
  T valueSet = valueSets[0];
  Dataset<Value> filteredValues = getValues(uri, version);
  addToValueSet(valueSet, filteredValues);
  return valueSet;
 }
}

@Test
public void testKSTestCDF() {
 // Create theoretical distributions
 NormalDistribution stdNormalDist = new NormalDistribution(0, 1);
 // set seeds
 Long seed = 10L;
 stdNormalDist.reseedRandomGenerator(seed);
 Function<Double, Double> stdNormalCDF = (x) -> stdNormalDist.cumulativeProbability(x);
 double pThreshold = 0.05;
 // Comparing a standard normal sample to a standard normal distribution
 Row results = KolmogorovSmirnovTest
  .test(dataset, "sample", stdNormalCDF).head();
 double pValue1 = results.getDouble(0);
 // Cannot reject null hypothesis
 assert(pValue1 > pThreshold);
}

@Test
public void testKSTestCDF() {
 // Create theoretical distributions
 NormalDistribution stdNormalDist = new NormalDistribution(0, 1);
 // set seeds
 Long seed = 10L;
 stdNormalDist.reseedRandomGenerator(seed);
 Function<Double, Double> stdNormalCDF = (x) -> stdNormalDist.cumulativeProbability(x);
 double pThreshold = 0.05;
 // Comparing a standard normal sample to a standard normal distribution
 Row results = KolmogorovSmirnovTest
  .test(dataset, "sample", stdNormalCDF).head();
 double pValue1 = results.getDouble(0);
 // Cannot reject null hypothesis
 assert(pValue1 > pThreshold);
}

Popular methods of Dataset

Popular in Java

Running tasks concurrently on multiple threads
scheduleAtFixedRate (ScheduledExecutorService)
setScale (BigDecimal)
scheduleAtFixedRate (Timer)
Socket (java.net)
Provides a client-side TCP socket.
NoSuchElementException (java.util)
Thrown when trying to retrieve an element past the end of an Enumeration or Iterator.
Random (java.util)
This class provides methods that return pseudo-random values.It is dangerous to seed Random with the
SortedMap (java.util)
A map that has its keys ordered. The sorting is according to either the natural ordering of its keys
Semaphore (java.util.concurrent)
A counting semaphore. Conceptually, a semaphore maintains a set of permits. Each #acquire blocks if
SAXParseException (org.xml.sax)
Encapsulate an XML parse error or warning.> This module, both source code and documentation, is in t
Top Vim plugins

How to use headmethodin org.apache.spark.sql.Dataset

Best Java code snippets using org.apache.spark.sql.Dataset.head (Showing top 20 results out of 315)

How to use
head
method
in
org.apache.spark.sql.Dataset