@SuppressWarnings("unchecked") @Test public void lookup() { JavaPairRDD<String, String> categories = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Apples", "Fruit"), new Tuple2<>("Oranges", "Fruit"), new Tuple2<>("Oranges", "Citrus") )); assertEquals(2, categories.lookup("Oranges").size()); assertEquals(2, Iterables.size(categories.groupByKey().lookup("Oranges").get(0))); }
@SuppressWarnings("unchecked") @Test public void lookup() { JavaPairRDD<String, String> categories = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Apples", "Fruit"), new Tuple2<>("Oranges", "Fruit"), new Tuple2<>("Oranges", "Citrus") )); assertEquals(2, categories.lookup("Oranges").size()); assertEquals(2, Iterables.size(categories.groupByKey().lookup("Oranges").get(0))); }
@SuppressWarnings("unchecked") @Test public void foldByKey() { List<Tuple2<Integer, Integer>> pairs = Arrays.asList( new Tuple2<>(2, 1), new Tuple2<>(2, 1), new Tuple2<>(1, 1), new Tuple2<>(3, 2), new Tuple2<>(3, 1) ); JavaPairRDD<Integer, Integer> rdd = sc.parallelizePairs(pairs); JavaPairRDD<Integer, Integer> sums = rdd.foldByKey(0, (a, b) -> a + b); assertEquals(1, sums.lookup(1).get(0).intValue()); assertEquals(2, sums.lookup(2).get(0).intValue()); assertEquals(3, sums.lookup(3).get(0).intValue()); }
@SuppressWarnings("unchecked") @Test public void lookup() { JavaPairRDD<String, String> categories = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Apples", "Fruit"), new Tuple2<>("Oranges", "Fruit"), new Tuple2<>("Oranges", "Citrus") )); assertEquals(2, categories.lookup("Oranges").size()); assertEquals(2, Iterables.size(categories.groupByKey().lookup("Oranges").get(0))); }
@SuppressWarnings("unchecked") @Test public void foldByKey() { List<Tuple2<Integer, Integer>> pairs = Arrays.asList( new Tuple2<>(2, 1), new Tuple2<>(2, 1), new Tuple2<>(1, 1), new Tuple2<>(3, 2), new Tuple2<>(3, 1) ); JavaPairRDD<Integer, Integer> rdd = sc.parallelizePairs(pairs); JavaPairRDD<Integer, Integer> sums = rdd.foldByKey(0, (a, b) -> a + b); assertEquals(1, sums.lookup(1).get(0).intValue()); assertEquals(2, sums.lookup(2).get(0).intValue()); assertEquals(3, sums.lookup(3).get(0).intValue()); }
@SuppressWarnings("unchecked") @Test public void foldByKey() { List<Tuple2<Integer, Integer>> pairs = Arrays.asList( new Tuple2<>(2, 1), new Tuple2<>(2, 1), new Tuple2<>(1, 1), new Tuple2<>(3, 2), new Tuple2<>(3, 1) ); JavaPairRDD<Integer, Integer> rdd = sc.parallelizePairs(pairs); JavaPairRDD<Integer, Integer> sums = rdd.foldByKey(0, (a, b) -> a + b); assertEquals(1, sums.lookup(1).get(0).intValue()); assertEquals(2, sums.lookup(2).get(0).intValue()); assertEquals(3, sums.lookup(3).get(0).intValue()); }
@Test public void foldByKey() { List<Tuple2<Integer, Integer>> pairs = Arrays.asList( new Tuple2<>(2, 1), new Tuple2<>(2, 1), new Tuple2<>(1, 1), new Tuple2<>(3, 2), new Tuple2<>(3, 1) ); JavaPairRDD<Integer, Integer> rdd = sc.parallelizePairs(pairs); JavaPairRDD<Integer, Integer> sums = rdd.foldByKey(0, (a, b) -> a + b); Assert.assertEquals(1, sums.lookup(1).get(0).intValue()); Assert.assertEquals(2, sums.lookup(2).get(0).intValue()); Assert.assertEquals(3, sums.lookup(3).get(0).intValue()); }
@Test public void foldByKey() { List<Tuple2<Integer, Integer>> pairs = Arrays.asList( new Tuple2<>(2, 1), new Tuple2<>(2, 1), new Tuple2<>(1, 1), new Tuple2<>(3, 2), new Tuple2<>(3, 1) ); JavaPairRDD<Integer, Integer> rdd = sc.parallelizePairs(pairs); JavaPairRDD<Integer, Integer> sums = rdd.foldByKey(0, (a, b) -> a + b); Assert.assertEquals(1, sums.lookup(1).get(0).intValue()); Assert.assertEquals(2, sums.lookup(2).get(0).intValue()); Assert.assertEquals(3, sums.lookup(3).get(0).intValue()); }
@Test public void foldByKey() { List<Tuple2<Integer, Integer>> pairs = Arrays.asList( new Tuple2<>(2, 1), new Tuple2<>(2, 1), new Tuple2<>(1, 1), new Tuple2<>(3, 2), new Tuple2<>(3, 1) ); JavaPairRDD<Integer, Integer> rdd = sc.parallelizePairs(pairs); JavaPairRDD<Integer, Integer> sums = rdd.foldByKey(0, (a, b) -> a + b); Assert.assertEquals(1, sums.lookup(1).get(0).intValue()); Assert.assertEquals(2, sums.lookup(2).get(0).intValue()); Assert.assertEquals(3, sums.lookup(3).get(0).intValue()); }
@Test public void groupBy() { JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); Function<Integer, Boolean> isOdd = x -> x % 2 == 0; JavaPairRDD<Boolean, Iterable<Integer>> oddsAndEvens = rdd.groupBy(isOdd); assertEquals(2, oddsAndEvens.count()); assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds oddsAndEvens = rdd.groupBy(isOdd, 1); assertEquals(2, oddsAndEvens.count()); assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds }
@Test public void groupBy() { JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); Function<Integer, Boolean> isOdd = x -> x % 2 == 0; JavaPairRDD<Boolean, Iterable<Integer>> oddsAndEvens = rdd.groupBy(isOdd); assertEquals(2, oddsAndEvens.count()); assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds oddsAndEvens = rdd.groupBy(isOdd, 1); assertEquals(2, oddsAndEvens.count()); assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds }
@Test public void groupBy() { JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); Function<Integer, Boolean> isOdd = x -> x % 2 == 0; JavaPairRDD<Boolean, Iterable<Integer>> oddsAndEvens = rdd.groupBy(isOdd); Assert.assertEquals(2, oddsAndEvens.count()); Assert.assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens Assert.assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds oddsAndEvens = rdd.groupBy(isOdd, 1); Assert.assertEquals(2, oddsAndEvens.count()); Assert.assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens Assert.assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds }
@Test public void groupBy() { JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); Function<Integer, Boolean> isOdd = x -> x % 2 == 0; JavaPairRDD<Boolean, Iterable<Integer>> oddsAndEvens = rdd.groupBy(isOdd); Assert.assertEquals(2, oddsAndEvens.count()); Assert.assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens Assert.assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds oddsAndEvens = rdd.groupBy(isOdd, 1); Assert.assertEquals(2, oddsAndEvens.count()); Assert.assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens Assert.assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds }
@Test public void groupBy() { JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); Function<Integer, Boolean> isOdd = x -> x % 2 == 0; JavaPairRDD<Boolean, Iterable<Integer>> oddsAndEvens = rdd.groupBy(isOdd); Assert.assertEquals(2, oddsAndEvens.count()); Assert.assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens Assert.assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds oddsAndEvens = rdd.groupBy(isOdd, 1); Assert.assertEquals(2, oddsAndEvens.count()); Assert.assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens Assert.assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds }
@Test public void groupBy() { JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); Function<Integer, Boolean> isOdd = x -> x % 2 == 0; JavaPairRDD<Boolean, Iterable<Integer>> oddsAndEvens = rdd.groupBy(isOdd); assertEquals(2, oddsAndEvens.count()); assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds oddsAndEvens = rdd.groupBy(isOdd, 1); assertEquals(2, oddsAndEvens.count()); assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds }
@Test public void groupByOnPairRDD() { // Regression test for SPARK-4459 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); Function<Tuple2<Integer, Integer>, Boolean> areOdd = x -> (x._1() % 2 == 0) && (x._2() % 2 == 0); JavaPairRDD<Integer, Integer> pairRDD = rdd.zip(rdd); JavaPairRDD<Boolean, Iterable<Tuple2<Integer, Integer>>> oddsAndEvens = pairRDD.groupBy(areOdd); assertEquals(2, oddsAndEvens.count()); assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds oddsAndEvens = pairRDD.groupBy(areOdd, 1); assertEquals(2, oddsAndEvens.count()); assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds }
@SuppressWarnings("unchecked") @Test public void cogroup() { JavaPairRDD<String, String> categories = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Apples", "Fruit"), new Tuple2<>("Oranges", "Fruit"), new Tuple2<>("Oranges", "Citrus") )); JavaPairRDD<String, Integer> prices = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Oranges", 2), new Tuple2<>("Apples", 3) )); JavaPairRDD<String, Tuple2<Iterable<String>, Iterable<Integer>>> cogrouped = categories.cogroup(prices); assertEquals("[Fruit, Citrus]", Iterables.toString(cogrouped.lookup("Oranges").get(0)._1())); assertEquals("[2]", Iterables.toString(cogrouped.lookup("Oranges").get(0)._2())); cogrouped.collect(); }
@SuppressWarnings("unchecked") @Test public void keyByOnPairRDD() { // Regression test for SPARK-4459 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); Function<Tuple2<Integer, Integer>, String> sumToString = x -> String.valueOf(x._1() + x._2()); JavaPairRDD<Integer, Integer> pairRDD = rdd.zip(rdd); JavaPairRDD<String, Tuple2<Integer, Integer>> keyed = pairRDD.keyBy(sumToString); assertEquals(7, keyed.count()); assertEquals(1, (long) keyed.lookup("2").get(0)._1()); }
@SuppressWarnings("unchecked") @Test public void keyByOnPairRDD() { // Regression test for SPARK-4459 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); Function<Tuple2<Integer, Integer>, String> sumToString = x -> String.valueOf(x._1() + x._2()); JavaPairRDD<Integer, Integer> pairRDD = rdd.zip(rdd); JavaPairRDD<String, Tuple2<Integer, Integer>> keyed = pairRDD.keyBy(sumToString); assertEquals(7, keyed.count()); assertEquals(1, (long) keyed.lookup("2").get(0)._1()); }
@SuppressWarnings("unchecked") @Test public void keyByOnPairRDD() { // Regression test for SPARK-4459 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); Function<Tuple2<Integer, Integer>, String> sumToString = x -> String.valueOf(x._1() + x._2()); JavaPairRDD<Integer, Integer> pairRDD = rdd.zip(rdd); JavaPairRDD<String, Tuple2<Integer, Integer>> keyed = pairRDD.keyBy(sumToString); assertEquals(7, keyed.count()); assertEquals(1, (long) keyed.lookup("2").get(0)._1()); }