@Test public void testSaveModeAPI() { spark .range(10) .write() .format("org.apache.spark.sql.test") .mode(SaveMode.ErrorIfExists) .save(); }
@Test public void testSaveModeAPI() { spark .range(10) .write() .format("org.apache.spark.sql.test") .mode(SaveMode.ErrorIfExists) .save(); }
@Test public void testSaveModeAPI() { spark .range(10) .write() .format("org.apache.spark.sql.test") .mode(SaveMode.ErrorIfExists) .save(); }
@SuppressWarnings("unchecked") @Test public void udf1Test() { spark.range(1, 10).toDF("value").createOrReplaceTempView("df"); spark.udf().registerJavaUDAF("myDoubleAvg", MyDoubleAvg.class.getName()); Row result = spark.sql("SELECT myDoubleAvg(value) as my_avg from df").head(); Assert.assertEquals(105.0, result.getDouble(0), 1.0e-6); }
@SuppressWarnings("unchecked") @Test public void udf1Test() { spark.range(1, 10).toDF("value").createOrReplaceTempView("df"); spark.udf().registerJavaUDAF("myDoubleAvg", MyDoubleAvg.class.getName()); Row result = spark.sql("SELECT myDoubleAvg(value) as my_avg from df").head(); Assert.assertEquals(105.0, result.getDouble(0), 1.0e-6); }
@SuppressWarnings("unchecked") @Test public void udf4Test() { spark.udf().register("inc", (Long i) -> i + 1, DataTypes.LongType); spark.range(10).toDF("x").createOrReplaceTempView("tmp"); // This tests when Java UDFs are required to be the semantically same (See SPARK-9435). List<Row> results = spark.sql("SELECT inc(x) FROM tmp GROUP BY inc(x)").collectAsList(); Assert.assertEquals(10, results.size()); long sum = 0; for (Row result : results) { sum += result.getLong(0); } Assert.assertEquals(55, sum); }
@SuppressWarnings("unchecked") @Test public void udf4Test() { spark.udf().register("inc", (Long i) -> i + 1, DataTypes.LongType); spark.range(10).toDF("x").createOrReplaceTempView("tmp"); // This tests when Java UDFs are required to be the semantically same (See SPARK-9435). List<Row> results = spark.sql("SELECT inc(x) FROM tmp GROUP BY inc(x)").collectAsList(); Assert.assertEquals(10, results.size()); long sum = 0; for (Row result : results) { sum += result.getLong(0); } Assert.assertEquals(55, sum); }
@SuppressWarnings("unchecked") @Test public void udf4Test() { spark.udf().register("inc", (Long i) -> i + 1, DataTypes.LongType); spark.range(10).toDF("x").createOrReplaceTempView("tmp"); // This tests when Java UDFs are required to be the semantically same (See SPARK-9435). List<Row> results = spark.sql("SELECT inc(x) FROM tmp GROUP BY inc(x)").collectAsList(); Assert.assertEquals(10, results.size()); long sum = 0; for (Row result : results) { sum += result.getLong(0); } Assert.assertEquals(55, sum); } }
@Override public Dataset<Row> read() throws Exception { JavaRDD<Long> baseRDD = Contexts.getSparkSession().range(tasks).javaRDD().repartition(tasks); JavaRDD<Row> fixRDD = baseRDD.flatMap(new GenerateFIXMessages(ordersPerTask)); StructType schema = DataTypes.createStructType(Lists.newArrayList(DataTypes.createStructField("fix", DataTypes.StringType, false))); Dataset<Row> fixDF = Contexts.getSparkSession().createDataFrame(fixRDD, schema); return fixDF; }
BatchStep sourceStep = new BatchStep("source_step"); sourceStep.configure(sourceStepConfig); Dataset<Row> sourceDF = Contexts.getSparkSession().range(5, 8).map(new LongToRowFunction(), RowEncoder.apply(DataTypes.createStructType(Lists.newArrayList(DataTypes.createStructField("value", DataTypes.LongType, false))))); sourceStep.setData(sourceDF);
@Override public Dataset<Row> read() throws Exception { Dataset<Row> df = Contexts.getSparkSession() .range(numPartitions * 10) .repartition(numPartitions) .map(new LongToRowFunction(), RowEncoder.apply(DataTypes.createStructType( Lists.newArrayList( DataTypes.createStructField("value", DataTypes.LongType, true), DataTypes.createStructField("modulo", DataTypes.LongType, true)) ))); return df; }
private JavaPairRDD<Row, Row> getDummyRDD(int numPartitions) { return Contexts.getSparkSession().range(numPartitions).javaRDD() .map(new LongToRowFunction()).keyBy(new ItselfFunction<Row>()).repartition(numPartitions); }