@Test public void glom() { JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4), 2); assertEquals("[1, 2]", rdd.glom().first().toString()); }
@Test public void glom() { JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4), 2); assertEquals("[1, 2]", rdd.glom().first().toString()); }
@Test public void sortBy() { List<Tuple2<Integer, Integer>> pairs = new ArrayList<>(); pairs.add(new Tuple2<>(0, 4)); pairs.add(new Tuple2<>(3, 2)); pairs.add(new Tuple2<>(-1, 1)); JavaRDD<Tuple2<Integer, Integer>> rdd = sc.parallelize(pairs); // compare on first value JavaRDD<Tuple2<Integer, Integer>> sortedRDD = rdd.sortBy(Tuple2::_1, true, 2); assertEquals(new Tuple2<>(-1, 1), sortedRDD.first()); List<Tuple2<Integer, Integer>> sortedPairs = sortedRDD.collect(); assertEquals(new Tuple2<>(0, 4), sortedPairs.get(1)); assertEquals(new Tuple2<>(3, 2), sortedPairs.get(2)); // compare on second value sortedRDD = rdd.sortBy(Tuple2::_2, true, 2); assertEquals(new Tuple2<>(-1, 1), sortedRDD.first()); sortedPairs = sortedRDD.collect(); assertEquals(new Tuple2<>(3, 2), sortedPairs.get(1)); assertEquals(new Tuple2<>(0, 4), sortedPairs.get(2)); }
@Test public void glom() { JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4), 2); assertEquals("[1, 2]", rdd.glom().first().toString()); }
@Test public void sortBy() { List<Tuple2<Integer, Integer>> pairs = new ArrayList<>(); pairs.add(new Tuple2<>(0, 4)); pairs.add(new Tuple2<>(3, 2)); pairs.add(new Tuple2<>(-1, 1)); JavaRDD<Tuple2<Integer, Integer>> rdd = sc.parallelize(pairs); // compare on first value JavaRDD<Tuple2<Integer, Integer>> sortedRDD = rdd.sortBy(Tuple2::_1, true, 2); assertEquals(new Tuple2<>(-1, 1), sortedRDD.first()); List<Tuple2<Integer, Integer>> sortedPairs = sortedRDD.collect(); assertEquals(new Tuple2<>(0, 4), sortedPairs.get(1)); assertEquals(new Tuple2<>(3, 2), sortedPairs.get(2)); // compare on second value sortedRDD = rdd.sortBy(Tuple2::_2, true, 2); assertEquals(new Tuple2<>(-1, 1), sortedRDD.first()); sortedPairs = sortedRDD.collect(); assertEquals(new Tuple2<>(3, 2), sortedPairs.get(1)); assertEquals(new Tuple2<>(0, 4), sortedPairs.get(2)); }
@Test public void sortBy() { List<Tuple2<Integer, Integer>> pairs = new ArrayList<>(); pairs.add(new Tuple2<>(0, 4)); pairs.add(new Tuple2<>(3, 2)); pairs.add(new Tuple2<>(-1, 1)); JavaRDD<Tuple2<Integer, Integer>> rdd = sc.parallelize(pairs); // compare on first value JavaRDD<Tuple2<Integer, Integer>> sortedRDD = rdd.sortBy(Tuple2::_1, true, 2); assertEquals(new Tuple2<>(-1, 1), sortedRDD.first()); List<Tuple2<Integer, Integer>> sortedPairs = sortedRDD.collect(); assertEquals(new Tuple2<>(0, 4), sortedPairs.get(1)); assertEquals(new Tuple2<>(3, 2), sortedPairs.get(2)); // compare on second value sortedRDD = rdd.sortBy(Tuple2::_2, true, 2); assertEquals(new Tuple2<>(-1, 1), sortedRDD.first()); sortedPairs = sortedRDD.collect(); assertEquals(new Tuple2<>(3, 2), sortedPairs.get(1)); assertEquals(new Tuple2<>(0, 4), sortedPairs.get(2)); }
@Test public void take() { JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); assertEquals(1, rdd.first().intValue()); rdd.take(2); rdd.takeSample(false, 2, 42); }
@Test public void take() { JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); assertEquals(1, rdd.first().intValue()); rdd.take(2); rdd.takeSample(false, 2, 42); }
@Test public void take() { JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); assertEquals(1, rdd.first().intValue()); rdd.take(2); rdd.takeSample(false, 2, 42); }
@Test public void flatMap() { JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello World!", "The quick brown fox jumps over the lazy dog.")); JavaRDD<String> words = rdd.flatMap(x -> Arrays.asList(x.split(" ")).iterator()); assertEquals("Hello", words.first()); assertEquals(11, words.count()); JavaPairRDD<String, String> pairsRDD = rdd.flatMapToPair(s -> { List<Tuple2<String, String>> pairs = new LinkedList<>(); for (String word : s.split(" ")) { pairs.add(new Tuple2<>(word, word)); } return pairs.iterator(); } ); assertEquals(new Tuple2<>("Hello", "Hello"), pairsRDD.first()); assertEquals(11, pairsRDD.count()); JavaDoubleRDD doubles = rdd.flatMapToDouble(s -> { List<Double> lengths = new LinkedList<>(); for (String word : s.split(" ")) { lengths.add((double) word.length()); } return lengths.iterator(); }); assertEquals(5.0, doubles.first(), 0.01); assertEquals(11, pairsRDD.count()); }
@Test public void flatMap() { JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello World!", "The quick brown fox jumps over the lazy dog.")); JavaRDD<String> words = rdd.flatMap(x -> Arrays.asList(x.split(" ")).iterator()); assertEquals("Hello", words.first()); assertEquals(11, words.count()); JavaPairRDD<String, String> pairsRDD = rdd.flatMapToPair(s -> { List<Tuple2<String, String>> pairs = new LinkedList<>(); for (String word : s.split(" ")) { pairs.add(new Tuple2<>(word, word)); } return pairs.iterator(); } ); assertEquals(new Tuple2<>("Hello", "Hello"), pairsRDD.first()); assertEquals(11, pairsRDD.count()); JavaDoubleRDD doubles = rdd.flatMapToDouble(s -> { List<Double> lengths = new LinkedList<>(); for (String word : s.split(" ")) { lengths.add((double) word.length()); } return lengths.iterator(); }); assertEquals(5.0, doubles.first(), 0.01); assertEquals(11, pairsRDD.count()); }
@Test public void flatMap() { JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello World!", "The quick brown fox jumps over the lazy dog.")); JavaRDD<String> words = rdd.flatMap(x -> Arrays.asList(x.split(" ")).iterator()); Assert.assertEquals("Hello", words.first()); Assert.assertEquals(11, words.count()); JavaPairRDD<String, String> pairs = rdd.flatMapToPair(s -> { List<Tuple2<String, String>> pairs2 = new LinkedList<>(); for (String word : s.split(" ")) { pairs2.add(new Tuple2<>(word, word)); } return pairs2.iterator(); }); Assert.assertEquals(new Tuple2<>("Hello", "Hello"), pairs.first()); Assert.assertEquals(11, pairs.count()); JavaDoubleRDD doubles = rdd.flatMapToDouble(s -> { List<Double> lengths = new LinkedList<>(); for (String word : s.split(" ")) { lengths.add((double) word.length()); } return lengths.iterator(); }); Assert.assertEquals(5.0, doubles.first(), 0.01); Assert.assertEquals(11, pairs.count()); }
@Test public void flatMap() { JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello World!", "The quick brown fox jumps over the lazy dog.")); JavaRDD<String> words = rdd.flatMap(x -> Arrays.asList(x.split(" ")).iterator()); Assert.assertEquals("Hello", words.first()); Assert.assertEquals(11, words.count()); JavaPairRDD<String, String> pairs = rdd.flatMapToPair(s -> { List<Tuple2<String, String>> pairs2 = new LinkedList<>(); for (String word : s.split(" ")) { pairs2.add(new Tuple2<>(word, word)); } return pairs2.iterator(); }); Assert.assertEquals(new Tuple2<>("Hello", "Hello"), pairs.first()); Assert.assertEquals(11, pairs.count()); JavaDoubleRDD doubles = rdd.flatMapToDouble(s -> { List<Double> lengths = new LinkedList<>(); for (String word : s.split(" ")) { lengths.add((double) word.length()); } return lengths.iterator(); }); Assert.assertEquals(5.0, doubles.first(), 0.01); Assert.assertEquals(11, pairs.count()); }
@Test public void flatMap() { JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello World!", "The quick brown fox jumps over the lazy dog.")); JavaRDD<String> words = rdd.flatMap(x -> Arrays.asList(x.split(" ")).iterator()); Assert.assertEquals("Hello", words.first()); Assert.assertEquals(11, words.count()); JavaPairRDD<String, String> pairs = rdd.flatMapToPair(s -> { List<Tuple2<String, String>> pairs2 = new LinkedList<>(); for (String word : s.split(" ")) { pairs2.add(new Tuple2<>(word, word)); } return pairs2.iterator(); }); Assert.assertEquals(new Tuple2<>("Hello", "Hello"), pairs.first()); Assert.assertEquals(11, pairs.count()); JavaDoubleRDD doubles = rdd.flatMapToDouble(s -> { List<Double> lengths = new LinkedList<>(); for (String word : s.split(" ")) { lengths.add((double) word.length()); } return lengths.iterator(); }); Assert.assertEquals(5.0, doubles.first(), 0.01); Assert.assertEquals(11, pairs.count()); }
@Test public void flatMap() { JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello World!", "The quick brown fox jumps over the lazy dog.")); JavaRDD<String> words = rdd.flatMap(x -> Arrays.asList(x.split(" ")).iterator()); assertEquals("Hello", words.first()); assertEquals(11, words.count()); JavaPairRDD<String, String> pairsRDD = rdd.flatMapToPair(s -> { List<Tuple2<String, String>> pairs = new LinkedList<>(); for (String word : s.split(" ")) { pairs.add(new Tuple2<>(word, word)); } return pairs.iterator(); } ); assertEquals(new Tuple2<>("Hello", "Hello"), pairsRDD.first()); assertEquals(11, pairsRDD.count()); JavaDoubleRDD doubles = rdd.flatMapToDouble(s -> { List<Double> lengths = new LinkedList<>(); for (String word : s.split(" ")) { lengths.add((double) word.length()); } return lengths.iterator(); }); assertEquals(5.0, doubles.first(), 0.01); assertEquals(11, pairsRDD.count()); }
@SuppressWarnings("unchecked") @Test public void persist() { JavaDoubleRDD doubleRDD = sc.parallelizeDoubles(Arrays.asList(1.0, 1.0, 2.0, 3.0, 5.0, 8.0)); doubleRDD = doubleRDD.persist(StorageLevel.DISK_ONLY()); assertEquals(20, doubleRDD.sum(), 0.1); List<Tuple2<Integer, String>> pairs = Arrays.asList( new Tuple2<>(1, "a"), new Tuple2<>(2, "aa"), new Tuple2<>(3, "aaa") ); JavaPairRDD<Integer, String> pairRDD = sc.parallelizePairs(pairs); pairRDD = pairRDD.persist(StorageLevel.DISK_ONLY()); assertEquals("a", pairRDD.first()._2()); JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5)); rdd = rdd.persist(StorageLevel.DISK_ONLY()); assertEquals(1, rdd.first().intValue()); }
@SuppressWarnings("unchecked") @Test public void persist() { JavaDoubleRDD doubleRDD = sc.parallelizeDoubles(Arrays.asList(1.0, 1.0, 2.0, 3.0, 5.0, 8.0)); doubleRDD = doubleRDD.persist(StorageLevel.DISK_ONLY()); assertEquals(20, doubleRDD.sum(), 0.1); List<Tuple2<Integer, String>> pairs = Arrays.asList( new Tuple2<>(1, "a"), new Tuple2<>(2, "aa"), new Tuple2<>(3, "aaa") ); JavaPairRDD<Integer, String> pairRDD = sc.parallelizePairs(pairs); pairRDD = pairRDD.persist(StorageLevel.DISK_ONLY()); assertEquals("a", pairRDD.first()._2()); JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5)); rdd = rdd.persist(StorageLevel.DISK_ONLY()); assertEquals(1, rdd.first().intValue()); }
@SuppressWarnings("unchecked") @Test public void persist() { JavaDoubleRDD doubleRDD = sc.parallelizeDoubles(Arrays.asList(1.0, 1.0, 2.0, 3.0, 5.0, 8.0)); doubleRDD = doubleRDD.persist(StorageLevel.DISK_ONLY()); assertEquals(20, doubleRDD.sum(), 0.1); List<Tuple2<Integer, String>> pairs = Arrays.asList( new Tuple2<>(1, "a"), new Tuple2<>(2, "aa"), new Tuple2<>(3, "aaa") ); JavaPairRDD<Integer, String> pairRDD = sc.parallelizePairs(pairs); pairRDD = pairRDD.persist(StorageLevel.DISK_ONLY()); assertEquals("a", pairRDD.first()._2()); JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5)); rdd = rdd.persist(StorageLevel.DISK_ONLY()); assertEquals(1, rdd.first().intValue()); }
@Override public Void call(JavaRDD<Tuple2<Integer, Integer>> tuple2JavaRDD) throws Exception { if (tuple2JavaRDD.count() > 0) { System.out.println("Current avg: " + tuple2JavaRDD.first()._2() / tuple2JavaRDD.first()._1()); } else { System.out.println("Got no data in this window"); } return null; } });
@Test public void testPredictJavaRDD() { JavaRDD<LabeledPoint> examples = jsc.parallelize(POINTS, 2).cache(); NaiveBayesModel model = NaiveBayes.train(examples.rdd()); JavaRDD<Vector> vectors = examples.map(LabeledPoint::features); JavaRDD<Double> predictions = model.predict(vectors); // Should be able to get the first prediction. predictions.first(); }