public static <K, V> JavaPairRDD<K, V> intersectByKey(JavaPairRDD<K, V> rdd1, JavaPairRDD<K, V> rdd2) { JavaPairRDD<K, Tuple2<Iterable<V>, Iterable<V>>> grouped = rdd1.cogroup(rdd2); return grouped.flatMapValues(new Function<Tuple2<Iterable<V>, Iterable<V>>, Iterable<V>>() { @Override public Iterable<V> call(Tuple2<Iterable<V>, Iterable<V>> input) { ArrayList<V> al = new ArrayList<V>(); if (!Iterables.isEmpty(input._1()) && !Iterables.isEmpty(input._2())) { Iterables.addAll(al, input._1()); Iterables.addAll(al, input._2()); } return al; } }); } public static void main(String[] args) throws Exception {
@SuppressWarnings("unchecked") @Test public void cogroup() { JavaPairRDD<String, String> categories = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Apples", "Fruit"), new Tuple2<>("Oranges", "Fruit"), new Tuple2<>("Oranges", "Citrus") )); JavaPairRDD<String, Integer> prices = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Oranges", 2), new Tuple2<>("Apples", 3) )); JavaPairRDD<String, Tuple2<Iterable<String>, Iterable<Integer>>> cogrouped = categories.cogroup(prices); assertEquals("[Fruit, Citrus]", Iterables.toString(cogrouped.lookup("Oranges").get(0)._1())); assertEquals("[2]", Iterables.toString(cogrouped.lookup("Oranges").get(0)._2())); cogrouped.collect(); }
@SuppressWarnings("unchecked") @Test public void cogroup() { JavaPairRDD<String, String> categories = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Apples", "Fruit"), new Tuple2<>("Oranges", "Fruit"), new Tuple2<>("Oranges", "Citrus") )); JavaPairRDD<String, Integer> prices = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Oranges", 2), new Tuple2<>("Apples", 3) )); JavaPairRDD<String, Tuple2<Iterable<String>, Iterable<Integer>>> cogrouped = categories.cogroup(prices); assertEquals("[Fruit, Citrus]", Iterables.toString(cogrouped.lookup("Oranges").get(0)._1())); assertEquals("[2]", Iterables.toString(cogrouped.lookup("Oranges").get(0)._2())); cogrouped.collect(); }
@SuppressWarnings("unchecked") @Test public void cogroup() { JavaPairRDD<String, String> categories = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Apples", "Fruit"), new Tuple2<>("Oranges", "Fruit"), new Tuple2<>("Oranges", "Citrus") )); JavaPairRDD<String, Integer> prices = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Oranges", 2), new Tuple2<>("Apples", 3) )); JavaPairRDD<String, Tuple2<Iterable<String>, Iterable<Integer>>> cogrouped = categories.cogroup(prices); assertEquals("[Fruit, Citrus]", Iterables.toString(cogrouped.lookup("Oranges").get(0)._1())); assertEquals("[2]", Iterables.toString(cogrouped.lookup("Oranges").get(0)._2())); cogrouped.collect(); }
@SuppressWarnings("unchecked") @Test public void cogroup3() { JavaPairRDD<String, String> categories = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Apples", "Fruit"), new Tuple2<>("Oranges", "Fruit"), new Tuple2<>("Oranges", "Citrus") )); JavaPairRDD<String, Integer> prices = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Oranges", 2), new Tuple2<>("Apples", 3) )); JavaPairRDD<String, Integer> quantities = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Oranges", 21), new Tuple2<>("Apples", 42) )); JavaPairRDD<String, Tuple3<Iterable<String>, Iterable<Integer>, Iterable<Integer>>> cogrouped = categories.cogroup(prices, quantities); assertEquals("[Fruit, Citrus]", Iterables.toString(cogrouped.lookup("Oranges").get(0)._1())); assertEquals("[2]", Iterables.toString(cogrouped.lookup("Oranges").get(0)._2())); assertEquals("[42]", Iterables.toString(cogrouped.lookup("Apples").get(0)._3())); cogrouped.collect(); }
@SuppressWarnings("unchecked") @Test public void cogroup4() { JavaPairRDD<String, String> categories = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Apples", "Fruit"), new Tuple2<>("Oranges", "Fruit"), new Tuple2<>("Oranges", "Citrus") )); JavaPairRDD<String, Integer> prices = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Oranges", 2), new Tuple2<>("Apples", 3) )); JavaPairRDD<String, Integer> quantities = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Oranges", 21), new Tuple2<>("Apples", 42) )); JavaPairRDD<String, String> countries = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Oranges", "BR"), new Tuple2<>("Apples", "US") )); JavaPairRDD<String, Tuple4<Iterable<String>, Iterable<Integer>, Iterable<Integer>, Iterable<String>>> cogrouped = categories.cogroup(prices, quantities, countries); assertEquals("[Fruit, Citrus]", Iterables.toString(cogrouped.lookup("Oranges").get(0)._1())); assertEquals("[2]", Iterables.toString(cogrouped.lookup("Oranges").get(0)._2())); assertEquals("[42]", Iterables.toString(cogrouped.lookup("Apples").get(0)._3())); assertEquals("[BR]", Iterables.toString(cogrouped.lookup("Oranges").get(0)._4())); cogrouped.collect(); }
@SuppressWarnings("unchecked") @Test public void cogroup3() { JavaPairRDD<String, String> categories = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Apples", "Fruit"), new Tuple2<>("Oranges", "Fruit"), new Tuple2<>("Oranges", "Citrus") )); JavaPairRDD<String, Integer> prices = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Oranges", 2), new Tuple2<>("Apples", 3) )); JavaPairRDD<String, Integer> quantities = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Oranges", 21), new Tuple2<>("Apples", 42) )); JavaPairRDD<String, Tuple3<Iterable<String>, Iterable<Integer>, Iterable<Integer>>> cogrouped = categories.cogroup(prices, quantities); assertEquals("[Fruit, Citrus]", Iterables.toString(cogrouped.lookup("Oranges").get(0)._1())); assertEquals("[2]", Iterables.toString(cogrouped.lookup("Oranges").get(0)._2())); assertEquals("[42]", Iterables.toString(cogrouped.lookup("Apples").get(0)._3())); cogrouped.collect(); }
@SuppressWarnings("unchecked") @Test public void cogroup3() { JavaPairRDD<String, String> categories = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Apples", "Fruit"), new Tuple2<>("Oranges", "Fruit"), new Tuple2<>("Oranges", "Citrus") )); JavaPairRDD<String, Integer> prices = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Oranges", 2), new Tuple2<>("Apples", 3) )); JavaPairRDD<String, Integer> quantities = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Oranges", 21), new Tuple2<>("Apples", 42) )); JavaPairRDD<String, Tuple3<Iterable<String>, Iterable<Integer>, Iterable<Integer>>> cogrouped = categories.cogroup(prices, quantities); assertEquals("[Fruit, Citrus]", Iterables.toString(cogrouped.lookup("Oranges").get(0)._1())); assertEquals("[2]", Iterables.toString(cogrouped.lookup("Oranges").get(0)._2())); assertEquals("[42]", Iterables.toString(cogrouped.lookup("Apples").get(0)._3())); cogrouped.collect(); }
@SuppressWarnings("unchecked") @Test public void cogroup4() { JavaPairRDD<String, String> categories = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Apples", "Fruit"), new Tuple2<>("Oranges", "Fruit"), new Tuple2<>("Oranges", "Citrus") )); JavaPairRDD<String, Integer> prices = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Oranges", 2), new Tuple2<>("Apples", 3) )); JavaPairRDD<String, Integer> quantities = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Oranges", 21), new Tuple2<>("Apples", 42) )); JavaPairRDD<String, String> countries = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Oranges", "BR"), new Tuple2<>("Apples", "US") )); JavaPairRDD<String, Tuple4<Iterable<String>, Iterable<Integer>, Iterable<Integer>, Iterable<String>>> cogrouped = categories.cogroup(prices, quantities, countries); assertEquals("[Fruit, Citrus]", Iterables.toString(cogrouped.lookup("Oranges").get(0)._1())); assertEquals("[2]", Iterables.toString(cogrouped.lookup("Oranges").get(0)._2())); assertEquals("[42]", Iterables.toString(cogrouped.lookup("Apples").get(0)._3())); assertEquals("[BR]", Iterables.toString(cogrouped.lookup("Oranges").get(0)._4())); cogrouped.collect(); }
@SuppressWarnings("unchecked") @Test public void cogroup4() { JavaPairRDD<String, String> categories = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Apples", "Fruit"), new Tuple2<>("Oranges", "Fruit"), new Tuple2<>("Oranges", "Citrus") )); JavaPairRDD<String, Integer> prices = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Oranges", 2), new Tuple2<>("Apples", 3) )); JavaPairRDD<String, Integer> quantities = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Oranges", 21), new Tuple2<>("Apples", 42) )); JavaPairRDD<String, String> countries = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Oranges", "BR"), new Tuple2<>("Apples", "US") )); JavaPairRDD<String, Tuple4<Iterable<String>, Iterable<Integer>, Iterable<Integer>, Iterable<String>>> cogrouped = categories.cogroup(prices, quantities, countries); assertEquals("[Fruit, Citrus]", Iterables.toString(cogrouped.lookup("Oranges").get(0)._1())); assertEquals("[2]", Iterables.toString(cogrouped.lookup("Oranges").get(0)._2())); assertEquals("[42]", Iterables.toString(cogrouped.lookup("Apples").get(0)._3())); assertEquals("[BR]", Iterables.toString(cogrouped.lookup("Oranges").get(0)._4())); cogrouped.collect(); }
throw new Exception("[OverlayOperator][JoinImage] The back image is not distributed. Please don't use distributed format."); this.distributedBackRasterImage = this.distributedBackRasterImage.cogroup(distributedFontImage).mapToPair(new PairFunction<Tuple2<Integer, Tuple2<Iterable<ImageSerializableWrapper>, Iterable<ImageSerializableWrapper>>>, Integer, ImageSerializableWrapper>()
leftTier.cogroup(rightTier, partitioner);
@Override public Dataset<Row> derive(Map<String, Dataset<Row>> dependencies) throws Exception { if (!dependencies.containsKey(intoDependency)) { throw new RuntimeException("Nest deriver points to non-existent nest-into dependency"); } Dataset<Row> into = dependencies.get(intoDependency); if (!dependencies.containsKey(fromDependency)) { throw new RuntimeException("Nest deriver points to non-existent nest-from dependency"); } Dataset<Row> from = dependencies.get(fromDependency); ExtractFieldsFunction extractFieldsFunction = new ExtractFieldsFunction(keyFieldNames); JavaPairRDD<List<Object>, Row> keyedIntoRDD = into.javaRDD().keyBy(extractFieldsFunction); JavaPairRDD<List<Object>, Row> keyedFromRDD = from.javaRDD().keyBy(extractFieldsFunction); NestFunction nestFunction = new NestFunction(); JavaRDD<Row> nestedRDD = keyedIntoRDD.cogroup(keyedFromRDD).values().map(nestFunction); StructType nestedSchema = into.schema().add(nestedFieldName, DataTypes.createArrayType(from.schema())); Dataset<Row> nested = into.sqlContext().createDataFrame(nestedRDD, nestedSchema); return nested; }
leftJV.cogroup(rightJV);
leftJV.cogroup(rightJV);
JavaPairRDD<Long, Tuple2<Iterable<Tuple2<Long, FieldT>>, Iterable<Tuple2<Long, FieldT>>>> cogroupResult = aFullRDD.cogroup(bFullRDD);
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, SparkExecutor sparkExecutor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == this.getNumInputs(); assert outputs.length == this.getNumOutputs(); final RddChannel.Instance input0 = (RddChannel.Instance) inputs[0]; final RddChannel.Instance input1 = (RddChannel.Instance) inputs[1]; final RddChannel.Instance output = (RddChannel.Instance) outputs[0]; final JavaRDD<In0> inputRdd0 = input0.provideRdd(); final JavaRDD<In1> inputRdd1 = input1.provideRdd(); FunctionCompiler compiler = sparkExecutor.getCompiler(); final PairFunction<In0, Key, In0> keyExtractor0 = compiler.compileToKeyExtractor(this.keyDescriptor0); final PairFunction<In1, Key, In1> keyExtractor1 = compiler.compileToKeyExtractor(this.keyDescriptor1); JavaPairRDD<Key, In0> pairRdd0 = inputRdd0.mapToPair(keyExtractor0); JavaPairRDD<Key, In1> pairRdd1 = inputRdd1.mapToPair(keyExtractor1); final JavaPairRDD<Key, scala.Tuple2<Iterable<In0>, Iterable<In1>>> outputPair = pairRdd0.cogroup(pairRdd1, sparkExecutor.getNumDefaultPartitions()); this.name(outputPair); // Map the output to what Rheem expects. final JavaRDD<Tuple2<Iterable<In0>, Iterable<In1>>> outputRdd = outputPair.map(new TupleConverter<>()); this.name(outputRdd); output.accept(outputRdd, sparkExecutor); return ExecutionOperator.modelLazyExecution(inputs, outputs, operatorContext); }
JavaPairRDD<Long, FieldT> xm = xTransformed.cogroup(meanTransformed).flatMapToPair(x -> { return xMinusMeanAssignment(fieldFactory, x._1(), x._2(), n, d, outputAssignmentIndexer); });