@SuppressWarnings("unchecked") @Test public void mapOnPairRDD() { JavaRDD<Integer> rdd1 = sc.parallelize(Arrays.asList(1,2,3,4)); JavaPairRDD<Integer, Integer> rdd2 = rdd1.mapToPair(i -> new Tuple2<>(i, i % 2)); JavaPairRDD<Integer, Integer> rdd3 = rdd2.mapToPair(in -> new Tuple2<>(in._2(), in._1())); assertEquals(Arrays.asList( new Tuple2<>(1, 1), new Tuple2<>(0, 2), new Tuple2<>(1, 3), new Tuple2<>(0, 4)), rdd3.collect()); }
static Tuple2<String, CallLog[]> fetchResultFromRequest(ObjectMapper mapper, Tuple2<String, ContentExchange> signExchange) { String sign = signExchange._1(); ContentExchange exchange = signExchange._2(); return new Tuple2(sign, readExchangeCallLog(mapper, exchange)); } static Tuple2<String, ContentExchange> createRequestForSign(String sign, HttpClient client) throws Exception {
private static void saveFeaturesRDD(JavaPairRDD<Integer,float[]> features, Path path, Broadcast<? extends Map<Integer,String>> bIndexToID) { log.info("Saving features RDD to {}", path); features.map(keyAndVector -> { String id = bIndexToID.value().get(keyAndVector._1()); float[] vector = keyAndVector._2(); return TextUtils.joinJSON(Arrays.asList(id, vector)); }).saveAsTextFile(path.toString(), GzipCodec.class); }
@SuppressWarnings("unchecked") @Test public void hadoopFile() { String outputDir = new File(tempDir, "output").getAbsolutePath(); List<Tuple2<Integer, String>> pairs = Arrays.asList( new Tuple2<>(1, "a"), new Tuple2<>(2, "aa"), new Tuple2<>(3, "aaa") ); JavaPairRDD<Integer, String> rdd = sc.parallelizePairs(pairs); rdd.mapToPair(pair -> new Tuple2<>(new IntWritable(pair._1()), new Text(pair._2()))) .saveAsHadoopFile(outputDir, IntWritable.class, Text.class, SequenceFileOutputFormat.class); JavaPairRDD<IntWritable, Text> output = sc.hadoopFile(outputDir, SequenceFileInputFormat.class, IntWritable.class, Text.class); assertEquals(pairs.toString(), output.map(Tuple2::toString).collect().toString()); }
@Test public void map() { JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5)); JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue).cache(); doubles.collect(); JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x)).cache(); pairs.collect(); JavaRDD<String> strings = rdd.map(Object::toString).cache(); strings.collect(); }
@Test public void collectAsMapWithIntArrayValues() { // Regression test for SPARK-1040 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1)); JavaPairRDD<Integer, int[]> pairRDD = rdd.mapToPair(x -> new Tuple2<>(x, new int[]{x})); pairRDD.collect(); // Works fine pairRDD.collectAsMap(); // Used to crash with ClassCastException } }
@SuppressWarnings("unchecked") @Test public void writeWithNewAPIHadoopFile() { String outputDir = new File(tempDir, "output").getAbsolutePath(); List<Tuple2<Integer, String>> pairs = Arrays.asList( new Tuple2<>(1, "a"), new Tuple2<>(2, "aa"), new Tuple2<>(3, "aaa") ); JavaPairRDD<Integer, String> rdd = sc.parallelizePairs(pairs); rdd.mapToPair(pair -> new Tuple2<>(new IntWritable(pair._1()), new Text(pair._2()))) .saveAsNewAPIHadoopFile(outputDir, IntWritable.class, Text.class, org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat.class); JavaPairRDD<IntWritable, Text> output = sc.sequenceFile(outputDir, IntWritable.class, Text.class); assertEquals(pairs.toString(), output.map(Tuple2::toString).collect().toString()); }
@SuppressWarnings("unchecked") @Test public void keyByOnPairRDD() { // Regression test for SPARK-4459 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); Function<Tuple2<Integer, Integer>, String> sumToString = x -> String.valueOf(x._1() + x._2()); JavaPairRDD<Integer, Integer> pairRDD = rdd.zip(rdd); JavaPairRDD<String, Tuple2<Integer, Integer>> keyed = pairRDD.keyBy(sumToString); assertEquals(7, keyed.count()); assertEquals(1, (long) keyed.lookup("2").get(0)._1()); }
@Test public void groupByOnPairRDD() { // Regression test for SPARK-4459 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); Function<Tuple2<Integer, Integer>, Boolean> areOdd = x -> (x._1() % 2 == 0) && (x._2() % 2 == 0); JavaPairRDD<Integer, Integer> pairRDD = rdd.zip(rdd); JavaPairRDD<Boolean, Iterable<Tuple2<Integer, Integer>>> oddsAndEvens = pairRDD.groupBy(areOdd); assertEquals(2, oddsAndEvens.count()); assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds oddsAndEvens = pairRDD.groupBy(areOdd, 1); assertEquals(2, oddsAndEvens.count()); assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds }
final ArrayList<Product2<Object, Object>> dataToWrite = new ArrayList<>(); for (int i : new int[] { 1, 2, 3, 4, 4, 2 }) { dataToWrite.add(new Tuple2<>(i, i)); writer.closeAndWriteOutput(); final Option<MapStatus> mapStatus = writer.stop(true); assertTrue(mapStatus.isDefined()); assertTrue(mergedOutputFile.exists()); assertEquals(2, spillFilesCreated.size());
@Test public void keyBy() { JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2)); List<Tuple2<String, Integer>> s = rdd.keyBy(Object::toString).collect(); assertEquals(new Tuple2<>("1", 1), s.get(0)); assertEquals(new Tuple2<>("2", 2), s.get(1)); }
private static List<PartitionInfo> getPartitionInfo(KafkaZkClient zkClient, String topic) { scala.collection.immutable.Set<String> topicList = new scala.collection.immutable.Set.Set1<>(topic); scala.collection.Map<Object, scala.collection.Seq<Object>> partitionAssignments = zkClient.getPartitionAssignmentForTopics(topicList).apply(topic); List<PartitionInfo> partitionInfoList = new ArrayList<>(); scala.collection.Iterator<scala.Tuple2<Object, scala.collection.Seq<Object>>> it = partitionAssignments.iterator(); while (it.hasNext()) { scala.Tuple2<Object, scala.collection.Seq<Object>> scalaTuple = it.next(); Integer partition = (Integer) scalaTuple._1(); scala.Option<Object> leaderOption = zkClient.getLeaderForPartition(new TopicPartition(topic, partition)); Node leader = leaderOption.isEmpty() ? null : new Node((Integer) leaderOption.get(), "", -1); Node[] replicas = new Node[scalaTuple._2().size()]; for (int i = 0; i < replicas.length; i++) { Integer brokerId = (Integer) scalaTuple._2().apply(i); replicas[i] = new Node(brokerId, "", -1); } partitionInfoList.add(new PartitionInfo(topic, partition, leader, replicas, null)); } return partitionInfoList; }
final String akkaHostname = AkkaUtils.getAddress(actorSystem).host().get(); final int akkaPort = (Integer) AkkaUtils.getAddress(actorSystem).port().get(); highAvailabilityServices, metricRegistry, webMonitor == null ? Option.empty() : Option.apply(webMonitor.getRestAddress()), new Some<>(JobMaster.JOB_MANAGER_NAME), Option.<String>empty(), getJobManagerClass(), getArchivistClass())._1();
@Test public void mapsFromPairsToPairs() { List<Tuple2<Integer, String>> pairs = Arrays.asList( new Tuple2<>(1, "a"), new Tuple2<>(2, "aa"), new Tuple2<>(3, "aaa") ); JavaPairRDD<Integer, String> pairRDD = sc.parallelizePairs(pairs); // Regression test for SPARK-668: JavaPairRDD<String, Integer> swapped = pairRDD.flatMapToPair(x -> Collections.singletonList(x.swap()).iterator()); swapped.collect(); // There was never a bug here, but it's worth testing: pairRDD.map(Tuple2::swap).collect(); }
@Override public Tuple2<String, String> call(String x) { return new Tuple2(x.split(" ")[0], x); } };
@SuppressWarnings("unchecked") @Test public void objectFilesOfComplexTypes() { String outputDir = new File(tempDir, "output").getAbsolutePath(); List<Tuple2<Integer, String>> pairs = Arrays.asList( new Tuple2<>(1, "a"), new Tuple2<>(2, "aa"), new Tuple2<>(3, "aaa") ); JavaPairRDD<Integer, String> rdd = sc.parallelizePairs(pairs); rdd.saveAsObjectFile(outputDir); // Try reading the output back as an object file JavaRDD<Tuple2<Integer, String>> readRDD = sc.objectFile(outputDir); assertEquals(pairs, readRDD.collect()); }
public static Map<String,Integer> countDistinctOtherWords(JavaPairRDD<String,String> data) { return data.values().flatMapToPair(line -> { Set<String> distinctTokens = new HashSet<>(Arrays.asList(line.split(" "))); return distinctTokens.stream().flatMap(a -> distinctTokens.stream().filter(b -> !a.equals(b)).map(b -> new Tuple2<>(a, b)) ).iterator(); }).distinct().mapValues(a -> 1).reduceByKey((c1, c2) -> c1 + c2).collectAsMap(); }
@Override public FetchResponse fetch(FetchRequest request) { scala.collection.Traversable<Tuple2<TopicAndPartition, PartitionFetchInfo>> requestInfo = request.requestInfo(); java.util.Map<TopicAndPartition, Short> errorMap = new HashMap<>(); while (requestInfo.headOption().isDefined()) { // jfim: IntelliJ erroneously thinks the following line is an incompatible type error, but it's only because // it doesn't understand scala covariance when called from Java (ie. it thinks head() is of type A even though // it's really of type Tuple2[TopicAndPartition, PartitionFetchInfo]) Tuple2<TopicAndPartition, PartitionFetchInfo> t2 = requestInfo.head(); TopicAndPartition topicAndPartition = t2._1(); PartitionFetchInfo partitionFetchInfo = t2._2(); if (!topicAndPartition.topic().equals(topicName)) { errorMap.put(topicAndPartition, Errors.UNKNOWN_TOPIC_OR_PARTITION.code()); } else if (partitionLeaderIndices.length < topicAndPartition.partition()) { errorMap.put(topicAndPartition, Errors.UNKNOWN_TOPIC_OR_PARTITION.code()); } else if (partitionLeaderIndices[topicAndPartition.partition()] != index) { errorMap.put(topicAndPartition, Errors.NOT_LEADER_FOR_PARTITION.code()); } else { // Do nothing, we'll generate a fake message } requestInfo = requestInfo.tail(); } return new MockFetchResponse(errorMap); }
logger.info("Using thriftmux."); Tuple2<Transport.Liveness, Stack.Param<Transport.Liveness>> livenessParam = new Transport.Liveness( Duration.Top(), Duration.Top(), Option.apply((Object) Boolean.valueOf(true))).mk(); serverBuilder = serverBuilder.stack(ThriftMuxServer$.MODULE$.configured(livenessParam._1(), livenessParam._2()));