public static String rddGraphToString(JavaPairRDD rdd) { StringBuilder sb = new StringBuilder(); rddToString(rdd.rdd(), sb, ""); return sb.toString(); }
@Override public void runUpdate(JavaSparkContext sparkContext, long timestamp, JavaPairRDD<K,M> newData, JavaPairRDD<K,M> pastData, String modelDirString, TopicProducer<String,U> modelUpdateTopic) { scalaUpdate.configureUpdate(sparkContext.sc(), timestamp, newData.rdd(), pastData == null ? null : pastData.rdd(), modelDirString, modelUpdateTopic); }
@Override public Iterable<U> buildUpdates(JavaPairRDD<K, M> newData) { return JavaConversions.asJavaIterable(scalaManager.buildUpdates(newData.rdd())); }
private static JavaPairRDD<Integer,Iterable<Rating>> predictAll( MatrixFactorizationModel mfModel, JavaRDD<Rating> data, JavaPairRDD<Integer,Integer> userProducts) { @SuppressWarnings("unchecked") RDD<Tuple2<Object,Object>> userProductsRDD = (RDD<Tuple2<Object,Object>>) (RDD<?>) userProducts.rdd(); return data.wrapRDD(mfModel.predict(userProductsRDD)).groupBy(Rating::user); }
private static RDD<Tuple2<Object,double[]>> readAndConvertFeatureRDD( JavaPairRDD<String,float[]> javaRDD, Broadcast<? extends Map<String,Integer>> bIdToIndex) { RDD<Tuple2<Integer,double[]>> scalaRDD = javaRDD.mapToPair(t -> new Tuple2<>(bIdToIndex.value().get(t._1()), t._2()) ).mapValues(f -> { double[] d = new double[f.length]; for (int i = 0; i < d.length; i++) { d[i] = f[i]; } return d; } ).rdd(); // This mimics the persistence level establish by ALS training methods scalaRDD.persist(StorageLevel.MEMORY_AND_DISK()); @SuppressWarnings("unchecked") RDD<Tuple2<Object,double[]>> objKeyRDD = (RDD<Tuple2<Object,double[]>>) (RDD<?>) scalaRDD; return objKeyRDD; }
@Override public JavaPairRDD<HiveKey, BytesWritable> shuffle( JavaPairRDD<HiveKey, BytesWritable> input, int numPartitions) { JavaPairRDD<HiveKey, BytesWritable> rdd; if (totalOrder) { if (numPartitions > 0) { if (numPartitions > 1 && input.getStorageLevel() == StorageLevel.NONE()) { input.persist(StorageLevel.DISK_ONLY()); sparkPlan.addCachedRDDId(input.id()); } rdd = input.sortByKey(true, numPartitions); } else { rdd = input.sortByKey(true); } } else { Partitioner partitioner = new HashPartitioner(numPartitions); rdd = input.repartitionAndSortWithinPartitions(partitioner); } if (shuffleSerializer != null) { if (rdd.rdd() instanceof ShuffledRDD) { ((ShuffledRDD) rdd.rdd()).setSerializer(shuffleSerializer); } } return rdd; }
@Test public void combineByKey() { JavaRDD<Integer> originalRDD = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6)); Function<Integer, Integer> keyFunction = v1 -> v1 % 3; Function<Integer, Integer> createCombinerFunction = v1 -> v1; Function2<Integer, Integer, Integer> mergeValueFunction = (v1, v2) -> v1 + v2; JavaPairRDD<Integer, Integer> combinedRDD = originalRDD.keyBy(keyFunction) .combineByKey(createCombinerFunction, mergeValueFunction, mergeValueFunction); Map<Integer, Integer> results = combinedRDD.collectAsMap(); ImmutableMap<Integer, Integer> expected = ImmutableMap.of(0, 9, 1, 5, 2, 7); assertEquals(expected, results); Partitioner defaultPartitioner = Partitioner.defaultPartitioner( combinedRDD.rdd(), JavaConverters.collectionAsScalaIterableConverter( Collections.<RDD<?>>emptyList()).asScala().toSeq()); combinedRDD = originalRDD.keyBy(keyFunction) .combineByKey( createCombinerFunction, mergeValueFunction, mergeValueFunction, defaultPartitioner, false, new KryoSerializer(new SparkConf())); results = combinedRDD.collectAsMap(); assertEquals(expected, results); }
@Test public void combineByKey() { JavaRDD<Integer> originalRDD = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6)); Function<Integer, Integer> keyFunction = v1 -> v1 % 3; Function<Integer, Integer> createCombinerFunction = v1 -> v1; Function2<Integer, Integer, Integer> mergeValueFunction = (v1, v2) -> v1 + v2; JavaPairRDD<Integer, Integer> combinedRDD = originalRDD.keyBy(keyFunction) .combineByKey(createCombinerFunction, mergeValueFunction, mergeValueFunction); Map<Integer, Integer> results = combinedRDD.collectAsMap(); ImmutableMap<Integer, Integer> expected = ImmutableMap.of(0, 9, 1, 5, 2, 7); assertEquals(expected, results); Partitioner defaultPartitioner = Partitioner.defaultPartitioner( combinedRDD.rdd(), JavaConverters.collectionAsScalaIterableConverter( Collections.<RDD<?>>emptyList()).asScala().toSeq()); combinedRDD = originalRDD.keyBy(keyFunction) .combineByKey( createCombinerFunction, mergeValueFunction, mergeValueFunction, defaultPartitioner, false, new KryoSerializer(new SparkConf())); results = combinedRDD.collectAsMap(); assertEquals(expected, results); }
@Test public void combineByKey() { JavaRDD<Integer> originalRDD = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6)); Function<Integer, Integer> keyFunction = v1 -> v1 % 3; Function<Integer, Integer> createCombinerFunction = v1 -> v1; Function2<Integer, Integer, Integer> mergeValueFunction = (v1, v2) -> v1 + v2; JavaPairRDD<Integer, Integer> combinedRDD = originalRDD.keyBy(keyFunction) .combineByKey(createCombinerFunction, mergeValueFunction, mergeValueFunction); Map<Integer, Integer> results = combinedRDD.collectAsMap(); ImmutableMap<Integer, Integer> expected = ImmutableMap.of(0, 9, 1, 5, 2, 7); assertEquals(expected, results); Partitioner defaultPartitioner = Partitioner.defaultPartitioner( combinedRDD.rdd(), JavaConverters.collectionAsScalaIterableConverter( Collections.<RDD<?>>emptyList()).asScala().toSeq()); combinedRDD = originalRDD.keyBy(keyFunction) .combineByKey( createCombinerFunction, mergeValueFunction, mergeValueFunction, defaultPartitioner, false, new KryoSerializer(new SparkConf())); results = combinedRDD.collectAsMap(); assertEquals(expected, results); }
null); SparkPlan sparkPlan = sparkPlanGenerator.generate(sparkTask.getWork()); RDD<Tuple2<HiveKey, BytesWritable>> reducerRdd = sparkPlan.generateGraph().rdd();
@Override public RiakRDD<Tuple2<K, V>> rdd() { return (RiakRDD<Tuple2<K, V>>) super.rdd(); }
@Override public CassandraRDD<Tuple2<K, V>> rdd() { return (CassandraRDD<Tuple2<K, V>>) super.rdd(); }
@Override public CassandraRDD<Tuple2<K, V>> rdd() { return (CassandraRDD<Tuple2<K, V>>) super.rdd(); }
/** * A static factory method to create a {@link PairRDDJavaFunctions} based on an existing {@link * JavaPairRDD} instance. */ public static <K, V> PairRDDJavaFunctions<K, V> javaFunctions(JavaPairRDD<K, V> rdd) { return new PairRDDJavaFunctions<>(rdd.rdd()); }
/** * A static factory method to create a {@link PairRDDJavaFunctions} based on an existing {@link * JavaPairRDD} instance. */ public static <K, V> PairRDDJavaFunctions<K, V> javaFunctions(JavaPairRDD<K, V> rdd) { return new PairRDDJavaFunctions<>(rdd.rdd()); }
/** * A static factory method to create a {@link PairRDDJavaFunctions} based on an existing {@link * JavaPairRDD} instance. */ public static <K, V> PairRDDJavaFunctions<K, V> javaFunctions(JavaPairRDD<K, V> rdd) { return new PairRDDJavaFunctions<>(rdd.rdd()); }
/** * A static factory method to create a {@link PairRDDJavaFunctions} based on an existing {@link * JavaPairRDD} instance. */ public static <K, V> PairRDDJavaFunctions<K, V> javaFunctions(JavaPairRDD<K, V> rdd) { return new PairRDDJavaFunctions<>(rdd.rdd()); }
/** * A static factory method to create a {@link PairRDDJavaFunctions} based on an existing {@link * JavaPairRDD} instance. */ public static <K, V> PairRDDJavaFunctions<K, V> javaFunctions(JavaPairRDD<K, V> rdd) { return new PairRDDJavaFunctions<>(rdd.rdd()); }