@Override public long getLong(int columnIndex) throws SQLException { wasNull = getCurrentRow().isNullAt(columnIndex-1); return wasNull ? 0 : getCurrentRow().getLong(columnIndex-1); }
@Override public Object evaluate(Row buffer) { if (buffer.isNullAt(0)) { // If the bufferSum is still null, we return null because this function has not got // any input row. return null; } else { // Otherwise, we calculate the special average value. return buffer.getDouble(0) / buffer.getLong(1) + 100.0; } } }
@Override public Object evaluate(Row buffer) { if (buffer.isNullAt(0)) { // If the bufferSum is still null, we return null because this function has not got // any input row. return null; } else { // Otherwise, we calculate the special average value. return buffer.getDouble(0) / buffer.getLong(1) + 100.0; } } }
@Test public void testSampleBy() { Dataset<Row> df = spark.range(0, 100, 1, 2).select(col("id").mod(3).as("key")); Dataset<Row> sampled = df.stat().sampleBy("key", ImmutableMap.of(0, 0.1, 1, 0.2), 0L); List<Row> actual = sampled.groupBy("key").count().orderBy("key").collectAsList(); Assert.assertEquals(0, actual.get(0).getLong(0)); Assert.assertTrue(0 <= actual.get(0).getLong(1) && actual.get(0).getLong(1) <= 8); Assert.assertEquals(1, actual.get(1).getLong(0)); Assert.assertTrue(2 <= actual.get(1).getLong(1) && actual.get(1).getLong(1) <= 13); }
@Test public void testSampleBy() { Dataset<Row> df = spark.range(0, 100, 1, 2).select(col("id").mod(3).as("key")); Dataset<Row> sampled = df.stat().sampleBy("key", ImmutableMap.of(0, 0.1, 1, 0.2), 0L); List<Row> actual = sampled.groupBy("key").count().orderBy("key").collectAsList(); Assert.assertEquals(0, actual.get(0).getLong(0)); Assert.assertTrue(0 <= actual.get(0).getLong(1) && actual.get(0).getLong(1) <= 8); Assert.assertEquals(1, actual.get(1).getLong(0)); Assert.assertTrue(2 <= actual.get(1).getLong(1) && actual.get(1).getLong(1) <= 13); }
@Test public void testSampleBy() { Dataset<Row> df = spark.range(0, 100, 1, 2).select(col("id").mod(3).as("key")); Dataset<Row> sampled = df.stat().sampleBy("key", ImmutableMap.of(0, 0.1, 1, 0.2), 0L); List<Row> actual = sampled.groupBy("key").count().orderBy("key").collectAsList(); Assert.assertEquals(0, actual.get(0).getLong(0)); Assert.assertTrue(0 <= actual.get(0).getLong(1) && actual.get(0).getLong(1) <= 8); Assert.assertEquals(1, actual.get(1).getLong(0)); Assert.assertTrue(2 <= actual.get(1).getLong(1) && actual.get(1).getLong(1) <= 13); }
@Test public void testJsonRDDToDataFrame() { // This is a test for the deprecated API in SPARK-15615. JavaRDD<String> rdd = jsc.parallelize(Arrays.asList("{\"a\": 2}")); Dataset<Row> df = spark.read().json(rdd); Assert.assertEquals(1L, df.count()); Assert.assertEquals(2L, df.collectAsList().get(0).getLong(0)); }
@Test public void testJsonRDDToDataFrame() { // This is a test for the deprecated API in SPARK-15615. JavaRDD<String> rdd = jsc.parallelize(Arrays.asList("{\"a\": 2}")); Dataset<Row> df = spark.read().json(rdd); Assert.assertEquals(1L, df.count()); Assert.assertEquals(2L, df.collectAsList().get(0).getLong(0)); }
@Test public void testJsonRDDToDataFrame() { // This is a test for the deprecated API in SPARK-15615. JavaRDD<String> rdd = jsc.parallelize(Arrays.asList("{\"a\": 2}")); Dataset<Row> df = spark.read().json(rdd); Assert.assertEquals(1L, df.count()); Assert.assertEquals(2L, df.collectAsList().get(0).getLong(0)); }
@Test public void testCrosstab() { Dataset<Row> df = spark.table("testData2"); Dataset<Row> crosstab = df.stat().crosstab("a", "b"); String[] columnNames = crosstab.schema().fieldNames(); Assert.assertEquals("a_b", columnNames[0]); Assert.assertEquals("1", columnNames[1]); Assert.assertEquals("2", columnNames[2]); List<Row> rows = crosstab.collectAsList(); rows.sort(crosstabRowComparator); Integer count = 1; for (Row row : rows) { Assert.assertEquals(row.get(0).toString(), count.toString()); Assert.assertEquals(1L, row.getLong(1)); Assert.assertEquals(1L, row.getLong(2)); count++; } }
.agg(sum("value")) .head() .getLong(0); LOG.debug("{} profile measurement(s) written to HBase", count);
@Test public void testCrosstab() { Dataset<Row> df = spark.table("testData2"); Dataset<Row> crosstab = df.stat().crosstab("a", "b"); String[] columnNames = crosstab.schema().fieldNames(); Assert.assertEquals("a_b", columnNames[0]); Assert.assertEquals("1", columnNames[1]); Assert.assertEquals("2", columnNames[2]); List<Row> rows = crosstab.collectAsList(); rows.sort(crosstabRowComparator); Integer count = 1; for (Row row : rows) { Assert.assertEquals(row.get(0).toString(), count.toString()); Assert.assertEquals(1L, row.getLong(1)); Assert.assertEquals(1L, row.getLong(2)); count++; } }
@Test public void testCrosstab() { Dataset<Row> df = spark.table("testData2"); Dataset<Row> crosstab = df.stat().crosstab("a", "b"); String[] columnNames = crosstab.schema().fieldNames(); Assert.assertEquals("a_b", columnNames[0]); Assert.assertEquals("1", columnNames[1]); Assert.assertEquals("2", columnNames[2]); List<Row> rows = crosstab.collectAsList(); rows.sort(crosstabRowComparator); Integer count = 1; for (Row row : rows) { Assert.assertEquals(row.get(0).toString(), count.toString()); Assert.assertEquals(1L, row.getLong(1)); Assert.assertEquals(1L, row.getLong(2)); count++; } }
@Override public void merge(MutableAggregationBuffer buffer1, Row buffer2) { // buffer1 and buffer2 have the same structure. // We only update the buffer1 when the input buffer2's sum value is not null. if (!buffer2.isNullAt(0)) { if (buffer1.isNullAt(0)) { // If the buffer value (intermediate result of the sum) is still null, // we set the it as the input buffer's value. buffer1.update(0, buffer2.getDouble(0)); buffer1.update(1, buffer2.getLong(1)); } else { // Otherwise, we update the bufferSum and bufferCount. Double newValue = buffer2.getDouble(0) + buffer1.getDouble(0); buffer1.update(0, newValue); buffer1.update(1, buffer1.getLong(1) + buffer2.getLong(1)); } } }
@Override public void merge(MutableAggregationBuffer buffer1, Row buffer2) { // buffer1 and buffer2 have the same structure. // We only update the buffer1 when the input buffer2's sum value is not null. if (!buffer2.isNullAt(0)) { if (buffer1.isNullAt(0)) { // If the buffer value (intermediate result of the sum) is still null, // we set the it as the input buffer's value. buffer1.update(0, buffer2.getDouble(0)); buffer1.update(1, buffer2.getLong(1)); } else { // Otherwise, we update the bufferSum and bufferCount. Double newValue = buffer2.getDouble(0) + buffer1.getDouble(0); buffer1.update(0, newValue); buffer1.update(1, buffer1.getLong(1) + buffer2.getLong(1)); } } }
@SuppressWarnings("unchecked") @Test public void udf4Test() { spark.udf().register("inc", (Long i) -> i + 1, DataTypes.LongType); spark.range(10).toDF("x").createOrReplaceTempView("tmp"); // This tests when Java UDFs are required to be the semantically same (See SPARK-9435). List<Row> results = spark.sql("SELECT inc(x) FROM tmp GROUP BY inc(x)").collectAsList(); Assert.assertEquals(10, results.size()); long sum = 0; for (Row result : results) { sum += result.getLong(0); } Assert.assertEquals(55, sum); }
@SuppressWarnings("unchecked") @Test public void udf4Test() { spark.udf().register("inc", (Long i) -> i + 1, DataTypes.LongType); spark.range(10).toDF("x").createOrReplaceTempView("tmp"); // This tests when Java UDFs are required to be the semantically same (See SPARK-9435). List<Row> results = spark.sql("SELECT inc(x) FROM tmp GROUP BY inc(x)").collectAsList(); Assert.assertEquals(10, results.size()); long sum = 0; for (Row result : results) { sum += result.getLong(0); } Assert.assertEquals(55, sum); }
@SuppressWarnings("unchecked") @Test public void udf4Test() { spark.udf().register("inc", (Long i) -> i + 1, DataTypes.LongType); spark.range(10).toDF("x").createOrReplaceTempView("tmp"); // This tests when Java UDFs are required to be the semantically same (See SPARK-9435). List<Row> results = spark.sql("SELECT inc(x) FROM tmp GROUP BY inc(x)").collectAsList(); Assert.assertEquals(10, results.size()); long sum = 0; for (Row result : results) { sum += result.getLong(0); } Assert.assertEquals(55, sum); } }
Assert.assertEquals(intValue, simpleRow.getInt(5)); Assert.assertEquals(intValue, simpleRow.get(5)); Assert.assertEquals(longValue, simpleRow.getLong(6)); Assert.assertEquals(longValue, simpleRow.get(6)); Assert.assertEquals(longValue, simpleRow.getLong(7)); Assert.assertEquals(longValue, simpleRow.get(7));
Assert.assertEquals(intValue, simpleRow.getInt(5)); Assert.assertEquals(intValue, simpleRow.get(5)); Assert.assertEquals(longValue, simpleRow.getLong(6)); Assert.assertEquals(longValue, simpleRow.get(6)); Assert.assertEquals(longValue, simpleRow.getLong(7)); Assert.assertEquals(longValue, simpleRow.get(7));