@Test public void testBeanWithoutGetter() { BeanWithoutGetter bean = new BeanWithoutGetter(); List<BeanWithoutGetter> data = Arrays.asList(bean); Dataset<Row> df = spark.createDataFrame(data, BeanWithoutGetter.class); Assert.assertEquals(df.schema().length(), 0); Assert.assertEquals(df.collectAsList().size(), 1); }
@Test public void testBeanWithoutGetter() { BeanWithoutGetter bean = new BeanWithoutGetter(); List<BeanWithoutGetter> data = Arrays.asList(bean); Dataset<Row> df = spark.createDataFrame(data, BeanWithoutGetter.class); Assert.assertEquals(df.schema().length(), 0); Assert.assertEquals(df.collectAsList().size(), 1); }
@Test public void testBeanWithoutGetter() { BeanWithoutGetter bean = new BeanWithoutGetter(); List<BeanWithoutGetter> data = Arrays.asList(bean); Dataset<Row> df = spark.createDataFrame(data, BeanWithoutGetter.class); Assert.assertEquals(df.schema().length(), 0); Assert.assertEquals(df.collectAsList().size(), 1); }
@Test public void testEmptyBean() { EmptyBean bean = new EmptyBean(); List<EmptyBean> data = Arrays.asList(bean); Dataset<EmptyBean> df = spark.createDataset(data, Encoders.bean(EmptyBean.class)); Assert.assertEquals(df.schema().length(), 0); Assert.assertEquals(df.collectAsList().size(), 1); }
@Test public void testEmptyBean() { EmptyBean bean = new EmptyBean(); List<EmptyBean> data = Arrays.asList(bean); Dataset<EmptyBean> df = spark.createDataset(data, Encoders.bean(EmptyBean.class)); Assert.assertEquals(df.schema().length(), 0); Assert.assertEquals(df.collectAsList().size(), 1); }
@Test public void testEmptyBean() { EmptyBean bean = new EmptyBean(); List<EmptyBean> data = Arrays.asList(bean); Dataset<EmptyBean> df = spark.createDataset(data, Encoders.bean(EmptyBean.class)); Assert.assertEquals(df.schema().length(), 0); Assert.assertEquals(df.collectAsList().size(), 1); }
@Test public void testCrosstab() { Dataset<Row> df = spark.table("testData2"); Dataset<Row> crosstab = df.stat().crosstab("a", "b"); String[] columnNames = crosstab.schema().fieldNames(); Assert.assertEquals("a_b", columnNames[0]); Assert.assertEquals("1", columnNames[1]); Assert.assertEquals("2", columnNames[2]); List<Row> rows = crosstab.collectAsList(); rows.sort(crosstabRowComparator); Integer count = 1; for (Row row : rows) { Assert.assertEquals(row.get(0).toString(), count.toString()); Assert.assertEquals(1L, row.getLong(1)); Assert.assertEquals(1L, row.getLong(2)); count++; } }
@Test public void testCrosstab() { Dataset<Row> df = spark.table("testData2"); Dataset<Row> crosstab = df.stat().crosstab("a", "b"); String[] columnNames = crosstab.schema().fieldNames(); Assert.assertEquals("a_b", columnNames[0]); Assert.assertEquals("1", columnNames[1]); Assert.assertEquals("2", columnNames[2]); List<Row> rows = crosstab.collectAsList(); rows.sort(crosstabRowComparator); Integer count = 1; for (Row row : rows) { Assert.assertEquals(row.get(0).toString(), count.toString()); Assert.assertEquals(1L, row.getLong(1)); Assert.assertEquals(1L, row.getLong(2)); count++; } }
@Test public void testCrosstab() { Dataset<Row> df = spark.table("testData2"); Dataset<Row> crosstab = df.stat().crosstab("a", "b"); String[] columnNames = crosstab.schema().fieldNames(); Assert.assertEquals("a_b", columnNames[0]); Assert.assertEquals("1", columnNames[1]); Assert.assertEquals("2", columnNames[2]); List<Row> rows = crosstab.collectAsList(); rows.sort(crosstabRowComparator); Integer count = 1; for (Row row : rows) { Assert.assertEquals(row.get(0).toString(), count.toString()); Assert.assertEquals(1L, row.getLong(1)); Assert.assertEquals(1L, row.getLong(2)); count++; } }
@Test public void testTupleEncoderSchema() { Encoder<Tuple2<String, Tuple2<String,String>>> encoder = Encoders.tuple(Encoders.STRING(), Encoders.tuple(Encoders.STRING(), Encoders.STRING())); List<Tuple2<String, Tuple2<String, String>>> data = Arrays.asList(tuple2("1", tuple2("a", "b")), tuple2("2", tuple2("c", "d"))); Dataset<Row> ds1 = spark.createDataset(data, encoder).toDF("value1", "value2"); JavaPairRDD<String, Tuple2<String, String>> pairRDD = jsc.parallelizePairs(data); Dataset<Row> ds2 = spark.createDataset(JavaPairRDD.toRDD(pairRDD), encoder) .toDF("value1", "value2"); Assert.assertEquals(ds1.schema(), ds2.schema()); Assert.assertEquals(ds1.select(expr("value2._1")).collectAsList(), ds2.select(expr("value2._1")).collectAsList()); }
@Test public void testTupleEncoderSchema() { Encoder<Tuple2<String, Tuple2<String,String>>> encoder = Encoders.tuple(Encoders.STRING(), Encoders.tuple(Encoders.STRING(), Encoders.STRING())); List<Tuple2<String, Tuple2<String, String>>> data = Arrays.asList(tuple2("1", tuple2("a", "b")), tuple2("2", tuple2("c", "d"))); Dataset<Row> ds1 = spark.createDataset(data, encoder).toDF("value1", "value2"); JavaPairRDD<String, Tuple2<String, String>> pairRDD = jsc.parallelizePairs(data); Dataset<Row> ds2 = spark.createDataset(JavaPairRDD.toRDD(pairRDD), encoder) .toDF("value1", "value2"); Assert.assertEquals(ds1.schema(), ds2.schema()); Assert.assertEquals(ds1.select(expr("value2._1")).collectAsList(), ds2.select(expr("value2._1")).collectAsList()); }
void validateDataFrameWithBeans(Bean bean, Dataset<Row> df) { StructType schema = df.schema(); Assert.assertEquals(new StructField("a", DoubleType$.MODULE$, false, Metadata.empty()), schema.apply("a"));
void validateDataFrameWithBeans(Bean bean, Dataset<Row> df) { StructType schema = df.schema(); Assert.assertEquals(new StructField("a", DoubleType$.MODULE$, false, Metadata.empty()), schema.apply("a"));
void validateDataFrameWithBeans(Bean bean, Dataset<Row> df) { StructType schema = df.schema(); Assert.assertEquals(new StructField("a", DoubleType$.MODULE$, false, Metadata.empty()), schema.apply("a"));