org.apache.spark.sql.SparkSession.createDataset java code examples

@Before
public void setUp() throws IOException {
 spark = SparkSession.builder()
  .master("local[*]")
  .appName("testing")
  .getOrCreate();
 path =
  Utils.createTempDir(System.getProperty("java.io.tmpdir"), "datasource").getCanonicalFile();
 if (path.exists()) {
  path.delete();
 }
 List<String> jsonObjects = new ArrayList<>(10);
 for (int i = 0; i < 10; i++) {
  jsonObjects.add("{\"a\":" + i + ", \"b\":\"str" + i + "\"}");
 }
 Dataset<String> ds = spark.createDataset(jsonObjects, Encoders.STRING());
 df = spark.read().json(ds);
 df.createOrReplaceTempView("jsonTable");
}

@Before
public void setUp() throws IOException {
 spark = SparkSession.builder()
  .master("local[*]")
  .appName("testing")
  .getOrCreate();
 path =
  Utils.createTempDir(System.getProperty("java.io.tmpdir"), "datasource").getCanonicalFile();
 if (path.exists()) {
  path.delete();
 }
 List<String> jsonObjects = new ArrayList<>(10);
 for (int i = 0; i < 10; i++) {
  jsonObjects.add("{\"a\":" + i + ", \"b\":\"str" + i + "\"}");
 }
 Dataset<String> ds = spark.createDataset(jsonObjects, Encoders.STRING());
 df = spark.read().json(ds);
 df.createOrReplaceTempView("jsonTable");
}

@Before
public void setUp() throws IOException {
 spark = SparkSession.builder()
  .master("local[*]")
  .appName("testing")
  .getOrCreate();
 path =
  Utils.createTempDir(System.getProperty("java.io.tmpdir"), "datasource").getCanonicalFile();
 if (path.exists()) {
  path.delete();
 }
 List<String> jsonObjects = new ArrayList<>(10);
 for (int i = 0; i < 10; i++) {
  jsonObjects.add("{\"a\":" + i + ", \"b\":\"str" + i + "\"}");
 }
 Dataset<String> ds = spark.createDataset(jsonObjects, Encoders.STRING());
 df = spark.read().json(ds);
 df.createOrReplaceTempView("jsonTable");
}

@Test
public void applySchemaToJSON() {
 Dataset<String> jsonDS = spark.createDataset(Arrays.asList(
  "{\"string\":\"this is a simple string.\", \"integer\":10, \"long\":21474836470, " +
   "\"bigInteger\":92233720368547758070, \"double\":1.7976931348623157E308, " +

@Test
public void applySchemaToJSON() {
 Dataset<String> jsonDS = spark.createDataset(Arrays.asList(
  "{\"string\":\"this is a simple string.\", \"integer\":10, \"long\":21474836470, " +
   "\"bigInteger\":92233720368547758070, \"double\":1.7976931348623157E308, " +

@Test
public void applySchemaToJSON() {
 Dataset<String> jsonDS = spark.createDataset(Arrays.asList(
  "{\"string\":\"this is a simple string.\", \"integer\":10, \"long\":21474836470, " +
   "\"bigInteger\":92233720368547758070, \"double\":1.7976931348623157E308, " +

/**
 * Returns a new ConceptMaps instance that includes the given maps.
 *
 * @param conceptMaps concept maps to add
 * @return a new ConceptMaps instance with the values added.
 */
public C withConceptMaps(List<T> conceptMaps) {
 return withConceptMaps(this.spark.createDataset(conceptMaps, conceptMapEncoder));
}

/**
 * Returns a new ValueSets instance that includes the given value sets.
 *
 * @param valueSets the value sets to add to the returned collection.
 * @return a new ValueSets instance with the added value sets.
 */
public C withValueSets(List<T> valueSets) {
 return withValueSets(this.spark.createDataset(valueSets, valueSetEncoder));
}

/**
 * Returns a new ConceptMaps instance that includes the given maps.
 *
 * @param conceptMaps concept maps to add
 * @return a new ConceptMaps instance with the values added.
 */
public C withConceptMaps(List<T> conceptMaps) {
 return withConceptMaps(this.spark.createDataset(conceptMaps, conceptMapEncoder));
}

/**
 * Returns a new ValueSets instance that includes the given value sets.
 *
 * @param valueSets the value sets to add to the returned collection.
 * @return a new ValueSets instance with the added value sets.
 */
public C withValueSets(List<T> valueSets) {
 return withValueSets(this.spark.createDataset(valueSets, valueSetEncoder));
}

public UtilTable(@NonNull final Class<T> type,
  @NonNull final JavaRDD<T> javaRDD,
  @NonNull final Path destPath,
  final boolean isDatePartitioned,
  @NonNull final SparkSession sparkSession) {
  this.spark = sparkSession;
  final RDD<T> rdd = javaRDD.rdd();
  final Encoder<T> bean = Encoders.bean(type);
  this.dataset = this.spark.createDataset(rdd, bean);
  this.destPath = destPath;
  this.isDatePartitioned = isDatePartitioned;
}

@Override
public <T> Dataset<T> createDataset(final RDD<T> data, final Encoder<T> evidence) {
 final boolean userTriggered = initializeFunction(data, evidence);
 final Dataset<T> result = Dataset.from(super.createDataset(data, evidence));
 this.setIsUserTriggered(userTriggered);
 return result;
}

@Override
public <T> Dataset<T> createDataset(final scala.collection.Seq<T> data, final Encoder<T> evidence) {
 final boolean userTriggered = initializeFunction(data, evidence);
 final Dataset<T> result = Dataset.from(super.createDataset(data, evidence));
 this.setIsUserTriggered(userTriggered);
 return result;
}

 private void start() {
  SparkSession spark = SparkSession.builder()
    .appName("Array to Dataset<String>")
    .master("local")
    .getOrCreate();

  String[] l = new String[] { "a", "b", "c", "d" };
  List<String> data = Arrays.asList(l);
  Dataset<String> df = spark.createDataset(data, Encoders.STRING());
  df.show();
 }
}

 private void start() {
  SparkSession spark = SparkSession.builder().master("local").getOrCreate();

  List<Integer> data = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
  Dataset<Integer> df = spark.createDataset(data, Encoders.INT());
  df.show();
  df.printSchema();
  Integer sumByReduce = df.reduce(new SumByReduce());
  System.out.println("Sum should be 55 and it is... " + sumByReduce);
 }
}

 private void start() {
  SparkSession spark = SparkSession.builder()
    .appName("Array to Dataframe (Dataset<Row>)")
    .master("local")
    .getOrCreate();

  String[] l = new String[] { "a", "b", "c", "d" };
  List<String> data = Arrays.asList(l);
  Dataset<String> ds = spark.createDataset(data, Encoders.STRING());
  Dataset<Row> df = ds.toDF();
  df.show();
 }
}

@Override
public void setUp() throws IOException {
 super.setUp();
 List<java.lang.Double> points = Arrays.asList(0.1, 1.1, 10.1, -1.1);
 dataset = spark.createDataset(points, Encoders.DOUBLE()).toDF("sample");
}

@Override
public void setUp() throws IOException {
 super.setUp();
 List<java.lang.Double> points = Arrays.asList(0.1, 1.1, 10.1, -1.1);
 dataset = spark.createDataset(points, Encoders.DOUBLE()).toDF("sample");
}

/**
 * Returns a dataset of ValueSet from the content stored at the given directory.
 */
protected Dataset<T> valueSetDatasetFromDirectory(String path) {
 JavaRDD<Tuple2<String,String>> fileNamesAndContents = this.spark.sparkContext()
   .wholeTextFiles(path, 1)
   .toJavaRDD();
 return this.spark.createDataset(fileNamesAndContents
   .map(new ToValueSet(fhirVersion))
   .rdd(), valueSetEncoder);
}

private Dataset<T> conceptMapsDatasetFromDirectory(String path) {
 JavaRDD<Tuple2<String,String>> fileNamesAndContents = this.spark.sparkContext()
   .wholeTextFiles(path, 1)
   .toJavaRDD();
 return this.spark.createDataset(fileNamesAndContents
   .map(new ToConceptMap(fhirVersion))
   .rdd(), conceptMapEncoder);
}

Popular methods of SparkSession

Popular in Java

Parsing JSON documents to java classes using gson
getSupportFragmentManager (FragmentActivity)
getResourceAsStream (ClassLoader)
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
Thread (java.lang)
A thread is a thread of execution in a program. The Java Virtual Machine allows an application to ha
Collections (java.util)
This class consists exclusively of static methods that operate on or return collections. It contains
LogFactory (org.apache.commons.logging)
Factory for creating Log instances, with discovery and configuration features similar to that employ
LoggerFactory (org.slf4j)
The LoggerFactory is a utility class producing Loggers for various logging APIs, most notably for lo
GridLayout (java.awt)
The GridLayout class is a layout manager that lays out a container's components in a rectangular gri
BoxLayout (javax.swing)
Top 12 Jupyter Notebook extensions

How to use createDatasetmethodin org.apache.spark.sql.SparkSession

Best Java code snippets using org.apache.spark.sql.SparkSession.createDataset (Showing top 20 results out of 315)

How to use
createDataset
method
in
org.apache.spark.sql.SparkSession