/** * Groups items with the same key, assuming the items with the same key are next to each other in the * collection. It does not perform shuffle, therefore it is much faster than using much more * universal Spark RDD `groupByKey`. For this method to be useful with Cassandra tables, the key must * represent a prefix of the primary key, containing at least the partition key of the Cassandra * table. */ public JavaPairRDD<K, Collection<V>> spanByKey(ClassTag<K> keyClassTag) { ClassTag<Tuple2<K, Collection<V>>> tupleClassTag = classTag(Tuple2.class); ClassTag<Collection<V>> vClassTag = classTag(Collection.class); RDD<Tuple2<K, Collection<V>>> newRDD = pairRDDFunctions.spanByKey() .map(JavaApiHelper.<K, V, Seq<V>>valuesAsJavaCollection(), tupleClassTag); return new JavaPairRDD<>(newRDD, keyClassTag, vClassTag); } }
/** * Groups items with the same key, assuming the items with the same key are next to each other in the * collection. It does not perform shuffle, therefore it is much faster than using much more * universal Spark RDD `groupByKey`. For this method to be useful with Cassandra tables, the key must * represent a prefix of the primary key, containing at least the partition key of the Cassandra * table. */ public JavaPairRDD<K, Collection<V>> spanByKey(ClassTag<K> keyClassTag) { ClassTag<Tuple2<K, Collection<V>>> tupleClassTag = classTag(Tuple2.class); ClassTag<Collection<V>> vClassTag = classTag(Collection.class); RDD<Tuple2<K, Collection<V>>> newRDD = pairRDDFunctions.spanByKey() .map(JavaApiHelper.<K, V, Seq<V>>valuesAsJavaCollection(), tupleClassTag); return new JavaPairRDD<>(newRDD, keyClassTag, vClassTag); } }
/** * Groups items with the same key, assuming the items with the same key are next to each other in the * collection. It does not perform shuffle, therefore it is much faster than using much more * universal Spark RDD `groupByKey`. For this method to be useful with Cassandra tables, the key must * represent a prefix of the primary key, containing at least the partition key of the Cassandra * table. */ public JavaPairRDD<K, Collection<V>> spanByKey(ClassTag<K> keyClassTag) { ClassTag<Tuple2<K, Collection<V>>> tupleClassTag = classTag(Tuple2.class); ClassTag<Collection<V>> vClassTag = classTag(Collection.class); RDD<Tuple2<K, Collection<V>>> newRDD = pairRDDFunctions.spanByKey() .map(JavaApiHelper.<K, V, Seq<V>>valuesAsJavaCollection(), tupleClassTag); return new JavaPairRDD<>(newRDD, keyClassTag, vClassTag); } }
/** * Groups items with the same key, assuming the items with the same key are next to each other in the * collection. It does not perform shuffle, therefore it is much faster than using much more * universal Spark RDD `groupByKey`. For this method to be useful with Cassandra tables, the key must * represent a prefix of the primary key, containing at least the partition key of the Cassandra * table. */ public JavaPairRDD<K, Collection<V>> spanByKey(ClassTag<K> keyClassTag) { ClassTag<Tuple2<K, Collection<V>>> tupleClassTag = classTag(Tuple2.class); ClassTag<Collection<V>> vClassTag = classTag(Collection.class); RDD<Tuple2<K, Collection<V>>> newRDD = pairRDDFunctions.spanByKey() .map(JavaApiHelper.<K, V, Seq<V>>valuesAsJavaCollection(), tupleClassTag); return new JavaPairRDD<>(newRDD, keyClassTag, vClassTag); } }
/** * Groups items with the same key, assuming the items with the same key are next to each other in the * collection. It does not perform shuffle, therefore it is much faster than using much more * universal Spark RDD `groupByKey`. For this method to be useful with Cassandra tables, the key must * represent a prefix of the primary key, containing at least the partition key of the Cassandra * table. */ public JavaPairRDD<K, Collection<V>> spanByKey(ClassTag<K> keyClassTag) { ClassTag<Tuple2<K, Collection<V>>> tupleClassTag = classTag(Tuple2.class); ClassTag<Collection<V>> vClassTag = classTag(Collection.class); RDD<Tuple2<K, Collection<V>>> newRDD = pairRDDFunctions.spanByKey() .map(JavaApiHelper.<K, V, Seq<V>>valuesAsJavaCollection(), tupleClassTag); return new JavaPairRDD<>(newRDD, keyClassTag, vClassTag); } }
/** * Applies a function to each item, and groups consecutive items having the same value together. * Contrary to {@code groupBy}, items from the same group must be already next to each other in the * original collection. Works locally on each partition, so items from different partitions will * never be placed in the same group. */ public <U> JavaPairRDD<U, Iterable<T>> spanBy(final Function<T, U> f, ClassTag<U> keyClassTag) { ClassTag<Tuple2<U, Iterable<T>>> tupleClassTag = classTag(Tuple2.class); ClassTag<Iterable<T>> iterableClassTag = CassandraJavaUtil.classTag(Iterable.class); RDD<Tuple2<U, Iterable<T>>> newRDD = rddFunctions.spanBy(toScalaFunction1(f)) .map(JavaApiHelper.<U, T, scala.collection.Iterable<T>>valuesAsJavaIterable(), tupleClassTag); return new JavaPairRDD<>(newRDD, keyClassTag, iterableClassTag); }
/** * Applies a function to each item, and groups consecutive items having the same value together. * Contrary to {@code groupBy}, items from the same group must be already next to each other in the * original collection. Works locally on each partition, so items from different partitions will * never be placed in the same group. */ public <U> JavaPairRDD<U, Iterable<T>> spanBy(final Function<T, U> f, ClassTag<U> keyClassTag) { ClassTag<Tuple2<U, Iterable<T>>> tupleClassTag = classTag(Tuple2.class); ClassTag<Iterable<T>> iterableClassTag = CassandraJavaUtil.classTag(Iterable.class); RDD<Tuple2<U, Iterable<T>>> newRDD = rddFunctions.spanBy(toScalaFunction1(f)) .map(JavaApiHelper.<U, T, scala.collection.Iterable<T>>valuesAsJavaIterable(), tupleClassTag); return new JavaPairRDD<>(newRDD, keyClassTag, iterableClassTag); }
/** * Applies a function to each item, and groups consecutive items having the same value together. * Contrary to {@code groupBy}, items from the same group must be already next to each other in the * original collection. Works locally on each partition, so items from different partitions will * never be placed in the same group. */ public <U> JavaPairRDD<U, Iterable<T>> spanBy(final Function<T, U> f, ClassTag<U> keyClassTag) { ClassTag<Tuple2<U, Iterable<T>>> tupleClassTag = classTag(Tuple2.class); ClassTag<Iterable<T>> iterableClassTag = CassandraJavaUtil.classTag(Iterable.class); RDD<Tuple2<U, Iterable<T>>> newRDD = rddFunctions.spanBy(toScalaFunction1(f)) .map(JavaApiHelper.<U, T, scala.collection.Iterable<T>>valuesAsJavaIterable(), tupleClassTag); return new JavaPairRDD<>(newRDD, keyClassTag, iterableClassTag); }
/** * Applies a function to each item, and groups consecutive items having the same value together. * Contrary to {@code groupBy}, items from the same group must be already next to each other in the * original collection. Works locally on each partition, so items from different partitions will * never be placed in the same group. */ public <U> JavaPairRDD<U, Iterable<T>> spanBy(final Function<T, U> f, ClassTag<U> keyClassTag) { ClassTag<Tuple2<U, Iterable<T>>> tupleClassTag = classTag(Tuple2.class); ClassTag<Iterable<T>> iterableClassTag = CassandraJavaUtil.classTag(Iterable.class); RDD<Tuple2<U, Iterable<T>>> newRDD = rddFunctions.spanBy(toScalaFunction1(f)) .map(JavaApiHelper.<U, T, scala.collection.Iterable<T>>valuesAsJavaIterable(), tupleClassTag); return new JavaPairRDD<>(newRDD, keyClassTag, iterableClassTag); }
/** * Applies a function to each item, and groups consecutive items having the same value together. * Contrary to {@code groupBy}, items from the same group must be already next to each other in the * original collection. Works locally on each partition, so items from different partitions will * never be placed in the same group. */ public <U> JavaPairRDD<U, Iterable<T>> spanBy(final Function<T, U> f, ClassTag<U> keyClassTag) { ClassTag<Tuple2<U, Iterable<T>>> tupleClassTag = classTag(Tuple2.class); ClassTag<Iterable<T>> iterableClassTag = CassandraJavaUtil.classTag(Iterable.class); RDD<Tuple2<U, Iterable<T>>> newRDD = rddFunctions.spanBy(toScalaFunction1(f)) .map(JavaApiHelper.<U, T, scala.collection.Iterable<T>>valuesAsJavaIterable(), tupleClassTag); return new JavaPairRDD<>(newRDD, keyClassTag, iterableClassTag); }