public static Map<String,Integer> countDistinctOtherWords(JavaPairRDD<String,String> data) { return data.values().flatMapToPair(line -> { Set<String> distinctTokens = new HashSet<>(Arrays.asList(line.split(" "))); return distinctTokens.stream().flatMap(a -> distinctTokens.stream().filter(b -> !a.equals(b)).map(b -> new Tuple2<>(a, b)) ).iterator(); }).distinct().mapValues(a -> 1).reduceByKey((c1, c2) -> c1 + c2).collectAsMap(); }
examplesRDD.flatMapToPair(example -> { Feature target = example.getTarget(); DecisionTree[] trees = forest.getTrees();
@Test public void flatMap() { JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello World!", "The quick brown fox jumps over the lazy dog.")); JavaRDD<String> words = rdd.flatMap(x -> Arrays.asList(x.split(" ")).iterator()); Assert.assertEquals("Hello", words.first()); Assert.assertEquals(11, words.count()); JavaPairRDD<String, String> pairs = rdd.flatMapToPair(s -> { List<Tuple2<String, String>> pairs2 = new LinkedList<>(); for (String word : s.split(" ")) { pairs2.add(new Tuple2<>(word, word)); } return pairs2.iterator(); }); Assert.assertEquals(new Tuple2<>("Hello", "Hello"), pairs.first()); Assert.assertEquals(11, pairs.count()); JavaDoubleRDD doubles = rdd.flatMapToDouble(s -> { List<Double> lengths = new LinkedList<>(); for (String word : s.split(" ")) { lengths.add((double) word.length()); } return lengths.iterator(); }); Assert.assertEquals(5.0, doubles.first(), 0.01); Assert.assertEquals(11, pairs.count()); }
@Test public void flatMap() { JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello World!", "The quick brown fox jumps over the lazy dog.")); JavaRDD<String> words = rdd.flatMap(x -> Arrays.asList(x.split(" ")).iterator()); Assert.assertEquals("Hello", words.first()); Assert.assertEquals(11, words.count()); JavaPairRDD<String, String> pairs = rdd.flatMapToPair(s -> { List<Tuple2<String, String>> pairs2 = new LinkedList<>(); for (String word : s.split(" ")) { pairs2.add(new Tuple2<>(word, word)); } return pairs2.iterator(); }); Assert.assertEquals(new Tuple2<>("Hello", "Hello"), pairs.first()); Assert.assertEquals(11, pairs.count()); JavaDoubleRDD doubles = rdd.flatMapToDouble(s -> { List<Double> lengths = new LinkedList<>(); for (String word : s.split(" ")) { lengths.add((double) word.length()); } return lengths.iterator(); }); Assert.assertEquals(5.0, doubles.first(), 0.01); Assert.assertEquals(11, pairs.count()); }
@Test public void flatMap() { JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello World!", "The quick brown fox jumps over the lazy dog.")); JavaRDD<String> words = rdd.flatMap(x -> Arrays.asList(x.split(" ")).iterator()); assertEquals("Hello", words.first()); assertEquals(11, words.count()); JavaPairRDD<String, String> pairsRDD = rdd.flatMapToPair(s -> { List<Tuple2<String, String>> pairs = new LinkedList<>(); for (String word : s.split(" ")) { pairs.add(new Tuple2<>(word, word)); } return pairs.iterator(); } ); assertEquals(new Tuple2<>("Hello", "Hello"), pairsRDD.first()); assertEquals(11, pairsRDD.count()); JavaDoubleRDD doubles = rdd.flatMapToDouble(s -> { List<Double> lengths = new LinkedList<>(); for (String word : s.split(" ")) { lengths.add((double) word.length()); } return lengths.iterator(); }); assertEquals(5.0, doubles.first(), 0.01); assertEquals(11, pairsRDD.count()); }
@Test public void flatMap() { JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello World!", "The quick brown fox jumps over the lazy dog.")); JavaRDD<String> words = rdd.flatMap(x -> Arrays.asList(x.split(" ")).iterator()); Assert.assertEquals("Hello", words.first()); Assert.assertEquals(11, words.count()); JavaPairRDD<String, String> pairs = rdd.flatMapToPair(s -> { List<Tuple2<String, String>> pairs2 = new LinkedList<>(); for (String word : s.split(" ")) { pairs2.add(new Tuple2<>(word, word)); } return pairs2.iterator(); }); Assert.assertEquals(new Tuple2<>("Hello", "Hello"), pairs.first()); Assert.assertEquals(11, pairs.count()); JavaDoubleRDD doubles = rdd.flatMapToDouble(s -> { List<Double> lengths = new LinkedList<>(); for (String word : s.split(" ")) { lengths.add((double) word.length()); } return lengths.iterator(); }); Assert.assertEquals(5.0, doubles.first(), 0.01); Assert.assertEquals(11, pairs.count()); }
@Test public void flatMap() { JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello World!", "The quick brown fox jumps over the lazy dog.")); JavaRDD<String> words = rdd.flatMap(x -> Arrays.asList(x.split(" ")).iterator()); assertEquals("Hello", words.first()); assertEquals(11, words.count()); JavaPairRDD<String, String> pairsRDD = rdd.flatMapToPair(s -> { List<Tuple2<String, String>> pairs = new LinkedList<>(); for (String word : s.split(" ")) { pairs.add(new Tuple2<>(word, word)); } return pairs.iterator(); } ); assertEquals(new Tuple2<>("Hello", "Hello"), pairsRDD.first()); assertEquals(11, pairsRDD.count()); JavaDoubleRDD doubles = rdd.flatMapToDouble(s -> { List<Double> lengths = new LinkedList<>(); for (String word : s.split(" ")) { lengths.add((double) word.length()); } return lengths.iterator(); }); assertEquals(5.0, doubles.first(), 0.01); assertEquals(11, pairsRDD.count()); }
@Test public void flatMap() { JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello World!", "The quick brown fox jumps over the lazy dog.")); JavaRDD<String> words = rdd.flatMap(x -> Arrays.asList(x.split(" ")).iterator()); assertEquals("Hello", words.first()); assertEquals(11, words.count()); JavaPairRDD<String, String> pairsRDD = rdd.flatMapToPair(s -> { List<Tuple2<String, String>> pairs = new LinkedList<>(); for (String word : s.split(" ")) { pairs.add(new Tuple2<>(word, word)); } return pairs.iterator(); } ); assertEquals(new Tuple2<>("Hello", "Hello"), pairsRDD.first()); assertEquals(11, pairsRDD.count()); JavaDoubleRDD doubles = rdd.flatMapToDouble(s -> { List<Double> lengths = new LinkedList<>(); for (String word : s.split(" ")) { lengths.add((double) word.length()); } return lengths.iterator(); }); assertEquals(5.0, doubles.first(), 0.01); assertEquals(11, pairsRDD.count()); }
JavaPairRDD<String, String> rdd = lines.flatMapToPair( (String line) -> Util.mapToKeyValueList(line, N).iterator() );
JavaPairRDD<String,Integer> ones = records.flatMapToPair((String rec) -> {
JavaPairRDD<String, String> rdd = lines.flatMapToPair( (String line) -> Util.mapToKeyValueList(line, N).iterator() );
JavaPairRDD<Tuple2<String,String>,Integer> pairs = training.flatMapToPair((String rec) -> { List<Tuple2<Tuple2<String,String>,Integer>> result = new ArrayList<Tuple2<Tuple2<String,String>,Integer>>();
JavaPairRDD<String, String> rdd = lines.flatMapToPair( (String line) -> Util.mapToKeyValueList(line, N).iterator() );
JavaPairRDD<String, String> rdd = lines.flatMapToPair( new PairFlatMapFunction<String, String, String>() { @Override
JavaPairRDD<String, String> rdd = lines.flatMapToPair( new PairFlatMapFunction<String, String, String>() { @Override
JavaPairRDD<Tuple2<String,String>,Integer> pairs = training.flatMapToPair(new PairFlatMapFunction<
JavaPairRDD<Character,Long> chars = lines.flatMapToPair(new PairFlatMapFunction<String, Character, Long>() { @Override public Iterator<Tuple2<Character,Long>> call(String s) {
JavaPairRDD<String,Integer> ones = records.flatMapToPair(new PairFlatMapFunction
JavaPairRDD<Character,Long> chars = lines.flatMapToPair(new PairFlatMapFunction<String, Character, Long>() { @Override public Iterator<Tuple2<Character,Long>> call(String s) {
S.flatMapToPair(new PairFlatMapFunction<