@Before public void setUp() throws IOException { spark = SparkSession.builder() .master("local[*]") .appName("testing") .getOrCreate(); path = Utils.createTempDir(System.getProperty("java.io.tmpdir"), "datasource").getCanonicalFile(); if (path.exists()) { path.delete(); } List<String> jsonObjects = new ArrayList<>(10); for (int i = 0; i < 10; i++) { jsonObjects.add("{\"a\":" + i + ", \"b\":\"str" + i + "\"}"); } Dataset<String> ds = spark.createDataset(jsonObjects, Encoders.STRING()); df = spark.read().json(ds); df.createOrReplaceTempView("jsonTable"); }
@Before public void setUp() throws IOException { spark = SparkSession.builder() .master("local[*]") .appName("testing") .getOrCreate(); path = Utils.createTempDir(System.getProperty("java.io.tmpdir"), "datasource").getCanonicalFile(); if (path.exists()) { path.delete(); } List<String> jsonObjects = new ArrayList<>(10); for (int i = 0; i < 10; i++) { jsonObjects.add("{\"a\":" + i + ", \"b\":\"str" + i + "\"}"); } Dataset<String> ds = spark.createDataset(jsonObjects, Encoders.STRING()); df = spark.read().json(ds); df.createOrReplaceTempView("jsonTable"); }
@Before public void setUp() throws IOException { spark = SparkSession.builder() .master("local[*]") .appName("testing") .getOrCreate(); path = Utils.createTempDir(System.getProperty("java.io.tmpdir"), "datasource").getCanonicalFile(); if (path.exists()) { path.delete(); } List<String> jsonObjects = new ArrayList<>(10); for (int i = 0; i < 10; i++) { jsonObjects.add("{\"a\":" + i + ", \"b\":\"str" + i + "\"}"); } Dataset<String> ds = spark.createDataset(jsonObjects, Encoders.STRING()); df = spark.read().json(ds); df.createOrReplaceTempView("jsonTable"); }
@Test public void applySchemaToJSON() { Dataset<String> jsonDS = spark.createDataset(Arrays.asList( "{\"string\":\"this is a simple string.\", \"integer\":10, \"long\":21474836470, " + "\"bigInteger\":92233720368547758070, \"double\":1.7976931348623157E308, " +
@Test public void applySchemaToJSON() { Dataset<String> jsonDS = spark.createDataset(Arrays.asList( "{\"string\":\"this is a simple string.\", \"integer\":10, \"long\":21474836470, " + "\"bigInteger\":92233720368547758070, \"double\":1.7976931348623157E308, " +
@Test public void applySchemaToJSON() { Dataset<String> jsonDS = spark.createDataset(Arrays.asList( "{\"string\":\"this is a simple string.\", \"integer\":10, \"long\":21474836470, " + "\"bigInteger\":92233720368547758070, \"double\":1.7976931348623157E308, " +
/** * Returns a new ConceptMaps instance that includes the given maps. * * @param conceptMaps concept maps to add * @return a new ConceptMaps instance with the values added. */ public C withConceptMaps(List<T> conceptMaps) { return withConceptMaps(this.spark.createDataset(conceptMaps, conceptMapEncoder)); }
/** * Returns a new ValueSets instance that includes the given value sets. * * @param valueSets the value sets to add to the returned collection. * @return a new ValueSets instance with the added value sets. */ public C withValueSets(List<T> valueSets) { return withValueSets(this.spark.createDataset(valueSets, valueSetEncoder)); }
/** * Returns a new ConceptMaps instance that includes the given maps. * * @param conceptMaps concept maps to add * @return a new ConceptMaps instance with the values added. */ public C withConceptMaps(List<T> conceptMaps) { return withConceptMaps(this.spark.createDataset(conceptMaps, conceptMapEncoder)); }
/** * Returns a new ValueSets instance that includes the given value sets. * * @param valueSets the value sets to add to the returned collection. * @return a new ValueSets instance with the added value sets. */ public C withValueSets(List<T> valueSets) { return withValueSets(this.spark.createDataset(valueSets, valueSetEncoder)); }
public UtilTable(@NonNull final Class<T> type, @NonNull final JavaRDD<T> javaRDD, @NonNull final Path destPath, final boolean isDatePartitioned, @NonNull final SparkSession sparkSession) { this.spark = sparkSession; final RDD<T> rdd = javaRDD.rdd(); final Encoder<T> bean = Encoders.bean(type); this.dataset = this.spark.createDataset(rdd, bean); this.destPath = destPath; this.isDatePartitioned = isDatePartitioned; }
@Override public <T> Dataset<T> createDataset(final RDD<T> data, final Encoder<T> evidence) { final boolean userTriggered = initializeFunction(data, evidence); final Dataset<T> result = Dataset.from(super.createDataset(data, evidence)); this.setIsUserTriggered(userTriggered); return result; }
@Override public <T> Dataset<T> createDataset(final scala.collection.Seq<T> data, final Encoder<T> evidence) { final boolean userTriggered = initializeFunction(data, evidence); final Dataset<T> result = Dataset.from(super.createDataset(data, evidence)); this.setIsUserTriggered(userTriggered); return result; }
private void start() { SparkSession spark = SparkSession.builder() .appName("Array to Dataset<String>") .master("local") .getOrCreate(); String[] l = new String[] { "a", "b", "c", "d" }; List<String> data = Arrays.asList(l); Dataset<String> df = spark.createDataset(data, Encoders.STRING()); df.show(); } }
private void start() { SparkSession spark = SparkSession.builder().master("local").getOrCreate(); List<Integer> data = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10); Dataset<Integer> df = spark.createDataset(data, Encoders.INT()); df.show(); df.printSchema(); Integer sumByReduce = df.reduce(new SumByReduce()); System.out.println("Sum should be 55 and it is... " + sumByReduce); } }
private void start() { SparkSession spark = SparkSession.builder() .appName("Array to Dataframe (Dataset<Row>)") .master("local") .getOrCreate(); String[] l = new String[] { "a", "b", "c", "d" }; List<String> data = Arrays.asList(l); Dataset<String> ds = spark.createDataset(data, Encoders.STRING()); Dataset<Row> df = ds.toDF(); df.show(); } }
@Override public void setUp() throws IOException { super.setUp(); List<java.lang.Double> points = Arrays.asList(0.1, 1.1, 10.1, -1.1); dataset = spark.createDataset(points, Encoders.DOUBLE()).toDF("sample"); }
@Override public void setUp() throws IOException { super.setUp(); List<java.lang.Double> points = Arrays.asList(0.1, 1.1, 10.1, -1.1); dataset = spark.createDataset(points, Encoders.DOUBLE()).toDF("sample"); }
/** * Returns a dataset of ValueSet from the content stored at the given directory. */ protected Dataset<T> valueSetDatasetFromDirectory(String path) { JavaRDD<Tuple2<String,String>> fileNamesAndContents = this.spark.sparkContext() .wholeTextFiles(path, 1) .toJavaRDD(); return this.spark.createDataset(fileNamesAndContents .map(new ToValueSet(fhirVersion)) .rdd(), valueSetEncoder); }
private Dataset<T> conceptMapsDatasetFromDirectory(String path) { JavaRDD<Tuple2<String,String>> fileNamesAndContents = this.spark.sparkContext() .wholeTextFiles(path, 1) .toJavaRDD(); return this.spark.createDataset(fileNamesAndContents .map(new ToConceptMap(fhirVersion)) .rdd(), conceptMapEncoder); }