@Test public void testSelect() { List<Integer> data = Arrays.asList(2, 6); Dataset<Integer> ds = spark.createDataset(data, Encoders.INT()); Dataset<Tuple2<Integer, String>> selected = ds.select( expr("value + 1"), col("value").cast("string")).as(Encoders.tuple(Encoders.INT(), Encoders.STRING())); Assert.assertEquals( Arrays.asList(tuple2(3, "2"), tuple2(7, "6")), selected.collectAsList()); }
@Test public void testSelect() { List<Integer> data = Arrays.asList(2, 6); Dataset<Integer> ds = spark.createDataset(data, Encoders.INT()); Dataset<Tuple2<Integer, String>> selected = ds.select( expr("value + 1"), col("value").cast("string")).as(Encoders.tuple(Encoders.INT(), Encoders.STRING())); Assert.assertEquals( Arrays.asList(tuple2(3, "2"), tuple2(7, "6")), selected.collectAsList()); }
@Test public void testSelect() { List<Integer> data = Arrays.asList(2, 6); Dataset<Integer> ds = spark.createDataset(data, Encoders.INT()); Dataset<Tuple2<Integer, String>> selected = ds.select( expr("value + 1"), col("value").cast("string")).as(Encoders.tuple(Encoders.INT(), Encoders.STRING())); Assert.assertEquals( Arrays.asList(tuple2(3, "2"), tuple2(7, "6")), selected.collectAsList()); }
@Test public void testTupleEncoderSchema() { Encoder<Tuple2<String, Tuple2<String,String>>> encoder = Encoders.tuple(Encoders.STRING(), Encoders.tuple(Encoders.STRING(), Encoders.STRING())); List<Tuple2<String, Tuple2<String, String>>> data = Arrays.asList(tuple2("1", tuple2("a", "b")), tuple2("2", tuple2("c", "d"))); Dataset<Row> ds1 = spark.createDataset(data, encoder).toDF("value1", "value2"); JavaPairRDD<String, Tuple2<String, String>> pairRDD = jsc.parallelizePairs(data); Dataset<Row> ds2 = spark.createDataset(JavaPairRDD.toRDD(pairRDD), encoder) .toDF("value1", "value2"); Assert.assertEquals(ds1.schema(), ds2.schema()); Assert.assertEquals(ds1.select(expr("value2._1")).collectAsList(), ds2.select(expr("value2._1")).collectAsList()); }
@Test public void testTupleEncoderSchema() { Encoder<Tuple2<String, Tuple2<String,String>>> encoder = Encoders.tuple(Encoders.STRING(), Encoders.tuple(Encoders.STRING(), Encoders.STRING())); List<Tuple2<String, Tuple2<String, String>>> data = Arrays.asList(tuple2("1", tuple2("a", "b")), tuple2("2", tuple2("c", "d"))); Dataset<Row> ds1 = spark.createDataset(data, encoder).toDF("value1", "value2"); JavaPairRDD<String, Tuple2<String, String>> pairRDD = jsc.parallelizePairs(data); Dataset<Row> ds2 = spark.createDataset(JavaPairRDD.toRDD(pairRDD), encoder) .toDF("value1", "value2"); Assert.assertEquals(ds1.schema(), ds2.schema()); Assert.assertEquals(ds1.select(expr("value2._1")).collectAsList(), ds2.select(expr("value2._1")).collectAsList()); }