public String call(Row row) { return row.getString(0); }}); System.out.println(topTweetText.collect());
@Override public String call(Row row) { return "Name: " + row.getString(0); } }).collect();
@Override public String call(Row row) { return "Name: " + row.getString(0); } }).collect();
@Override public String call(Row row) { return "Name: " + row.getString(0); } }).collect();
@Override public String getString(int columnIndex) throws SQLException { wasNull = getCurrentRow().isNullAt(columnIndex-1); return wasNull ? null : getCurrentRow().getString(columnIndex-1); }
@Override public URL getURL(int columnIndex) throws SQLException { try { return new URL(getCurrentRow().getString(columnIndex-1)); } catch (MalformedURLException e) { throw new SQLExceptionInfo.Builder(SQLExceptionCode.MALFORMED_URL).setRootCause(e) .build().buildException(); } }
@Test public void testUDF() { UserDefinedFunction foo = udf((Integer i, String s) -> i.toString() + s, DataTypes.StringType); Dataset<Row> df = spark.table("testData").select(foo.apply(col("key"), col("value"))); String[] result = df.collectAsList().stream().map(row -> row.getString(0)) .toArray(String[]::new); String[] expected = spark.table("testData").collectAsList().stream() .map(row -> row.get(0).toString() + row.getString(1)).toArray(String[]::new); Assert.assertArrayEquals(expected, result); } }
@Test public void testUDF() { UserDefinedFunction foo = udf((Integer i, String s) -> i.toString() + s, DataTypes.StringType); Dataset<Row> df = spark.table("testData").select(foo.apply(col("key"), col("value"))); String[] result = df.collectAsList().stream().map(row -> row.getString(0)) .toArray(String[]::new); String[] expected = spark.table("testData").collectAsList().stream() .map(row -> row.get(0).toString() + row.getString(1)).toArray(String[]::new); Assert.assertArrayEquals(expected, result); } }
@Override public String call(Row row) throws Exception { return row.getString(0); } });
@Override public String call(Row row) throws Exception { return row.getString(0); } });
@Override public String call(Row row) throws Exception { return row.getString(columnIndex); } }
.map((Row row) -> "Name: " + row.getString(0)).collect(); sqlContext.sql("SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19"); teenagerNames = teenagers2.toJavaRDD() .map((Row row) -> "Name: " + row.getString(0)).collect(); .map((Row row) -> "Name: " + row.getString(0)).collect(); .map((Row row) -> "Name: " + row.getString(0) + ", City: " + row.getString(1)).collect();
@Test public void dataFrameRDDOperations() { List<Person> personList = new ArrayList<>(2); Person person1 = new Person(); person1.setName("Michael"); person1.setAge(29); personList.add(person1); Person person2 = new Person(); person2.setName("Yin"); person2.setAge(28); personList.add(person2); JavaRDD<Row> rowRDD = jsc.parallelize(personList).map( person -> RowFactory.create(person.getName(), person.getAge())); List<StructField> fields = new ArrayList<>(2); fields.add(DataTypes.createStructField("", DataTypes.StringType, false)); fields.add(DataTypes.createStructField("age", DataTypes.IntegerType, false)); StructType schema = DataTypes.createStructType(fields); Dataset<Row> df = spark.createDataFrame(rowRDD, schema); df.createOrReplaceTempView("people"); List<String> actual = spark.sql("SELECT * FROM people").toJavaRDD() .map(row -> row.getString(0) + "_" + row.get(1)).collect(); List<String> expected = new ArrayList<>(2); expected.add("Michael_29"); expected.add("Yin_28"); Assert.assertEquals(expected, actual); }
@Test public void dataFrameRDDOperations() { List<Person> personList = new ArrayList<>(2); Person person1 = new Person(); person1.setName("Michael"); person1.setAge(29); personList.add(person1); Person person2 = new Person(); person2.setName("Yin"); person2.setAge(28); personList.add(person2); JavaRDD<Row> rowRDD = jsc.parallelize(personList).map( person -> RowFactory.create(person.getName(), person.getAge())); List<StructField> fields = new ArrayList<>(2); fields.add(DataTypes.createStructField("", DataTypes.StringType, false)); fields.add(DataTypes.createStructField("age", DataTypes.IntegerType, false)); StructType schema = DataTypes.createStructType(fields); Dataset<Row> df = spark.createDataFrame(rowRDD, schema); df.createOrReplaceTempView("people"); List<String> actual = spark.sql("SELECT * FROM people").toJavaRDD() .map(row -> row.getString(0) + "_" + row.get(1)).collect(); List<String> expected = new ArrayList<>(2); expected.add("Michael_29"); expected.add("Yin_28"); Assert.assertEquals(expected, actual); }
@Test public void dataFrameRDDOperations() { List<Person> personList = new ArrayList<>(2); Person person1 = new Person(); person1.setName("Michael"); person1.setAge(29); personList.add(person1); Person person2 = new Person(); person2.setName("Yin"); person2.setAge(28); personList.add(person2); JavaRDD<Row> rowRDD = jsc.parallelize(personList).map( person -> RowFactory.create(person.getName(), person.getAge())); List<StructField> fields = new ArrayList<>(2); fields.add(DataTypes.createStructField("", DataTypes.StringType, false)); fields.add(DataTypes.createStructField("age", DataTypes.IntegerType, false)); StructType schema = DataTypes.createStructType(fields); Dataset<Row> df = spark.createDataFrame(rowRDD, schema); df.createOrReplaceTempView("people"); List<String> actual = spark.sql("SELECT * FROM people").toJavaRDD() .map(row -> row.getString(0) + "_" + row.get(1)).collect(); List<String> expected = new ArrayList<>(2); expected.add("Michael_29"); expected.add("Yin_28"); Assert.assertEquals(expected, actual); }
public Iterable<Tuple2<Long, Long>> call(Tuple2<String, Row> tuple) throws Exception { Row row = tuple._2; String orderCategoryIds = row.getString(8); String[] orderCategoryIdsSplited = orderCategoryIds.split(","); List<Tuple2<Long, Long>> list = new ArrayList<Tuple2<Long, Long>>(); for(String orderCategoryId : orderCategoryIdsSplited) { list.add(new Tuple2<Long, Long>(Long.valueOf(orderCategoryId), 1L)); } return list; }
Assert.assertEquals(booleanValue, simpleRow.getBoolean(14)); Assert.assertEquals(booleanValue, simpleRow.get(14)); Assert.assertEquals(stringValue, simpleRow.getString(15)); Assert.assertEquals(stringValue, simpleRow.get(15)); Assert.assertEquals(binaryValue, simpleRow.get(16));
Assert.assertEquals(booleanValue, simpleRow.getBoolean(14)); Assert.assertEquals(booleanValue, simpleRow.get(14)); Assert.assertEquals(stringValue, simpleRow.getString(15)); Assert.assertEquals(stringValue, simpleRow.get(15)); Assert.assertEquals(binaryValue, simpleRow.get(16));
Assert.assertEquals(booleanValue, simpleRow.getBoolean(14)); Assert.assertEquals(booleanValue, simpleRow.get(14)); Assert.assertEquals(stringValue, simpleRow.getString(15)); Assert.assertEquals(stringValue, simpleRow.get(15)); Assert.assertEquals(binaryValue, simpleRow.get(16));