/** * Applies a function to each item, and groups consecutive items having the same value together. * Contrary to `groupBy`, items from the same group must be already next to each other in the * original collection. Works locally on each partition, so items from different partitions will * never be placed in the same group. */ public <U> JavaPairRDD<U, Iterable<Tuple2<K, V>>> spanBy( Function<Tuple2<K, V>, U> function, ClassTag<U> uClassTag ) { return new PairRDDJavaFunctions<>(rdd()).spanBy(function, uClassTag); }
/** * Groups items with the same key, assuming the items with the same key are next to each other in the * collection. It does not perform shuffle, therefore it is much faster than using much more * universal Spark RDD `groupByKey`. For this method to be useful with Cassandra tables, the key must * represent a prefix of the primary key, containing at least the partition key of the Cassandra * table. */ public JavaPairRDD<K, Collection<V>> spanByKey() { return new PairRDDJavaFunctions<>(rdd()).spanByKey(kClassTag()); }
/** * A static factory method to create a {@link PairRDDJavaFunctions} based on an existing {@link * JavaPairRDD} instance. */ public static <K, V> PairRDDJavaFunctions<K, V> javaFunctions(JavaPairRDD<K, V> rdd) { return new PairRDDJavaFunctions<>(rdd.rdd()); }
/** * Groups items with the same key, assuming the items with the same key are next to each other in the * collection. It does not perform shuffle, therefore it is much faster than using much more * universal Spark RDD `groupByKey`. For this method to be useful with Cassandra tables, the key must * represent a prefix of the primary key, containing at least the partition key of the Cassandra * table. */ public JavaPairRDD<K, Collection<V>> spanByKey() { return new PairRDDJavaFunctions<>(rdd()).spanByKey(kClassTag()); }
/** * A static factory method to create a {@link PairRDDJavaFunctions} based on an existing {@link * JavaPairRDD} instance. */ public static <K, V> PairRDDJavaFunctions<K, V> javaFunctions(JavaPairRDD<K, V> rdd) { return new PairRDDJavaFunctions<>(rdd.rdd()); }
/** * Applies a function to each item, and groups consecutive items having the same value together. * Contrary to `groupBy`, items from the same group must be already next to each other in the * original collection. Works locally on each partition, so items from different partitions will * never be placed in the same group. */ public <U> JavaPairRDD<U, Iterable<Tuple2<K, V>>> spanBy( Function<Tuple2<K, V>, U> function, ClassTag<U> uClassTag ) { return new PairRDDJavaFunctions<>(rdd()).spanBy(function, uClassTag); }
/** * Groups items with the same key, assuming the items with the same key are next to each other in the * collection. It does not perform shuffle, therefore it is much faster than using much more * universal Spark RDD `groupByKey`. For this method to be useful with Cassandra tables, the key must * represent a prefix of the primary key, containing at least the partition key of the Cassandra * table. */ public JavaPairRDD<K, Collection<V>> spanByKey() { return new PairRDDJavaFunctions<>(rdd()).spanByKey(kClassTag()); }
/** * A static factory method to create a {@link PairRDDJavaFunctions} based on an existing {@link * JavaPairRDD} instance. */ public static <K, V> PairRDDJavaFunctions<K, V> javaFunctions(JavaPairRDD<K, V> rdd) { return new PairRDDJavaFunctions<>(rdd.rdd()); }
/** * Applies a function to each item, and groups consecutive items having the same value together. * Contrary to `groupBy`, items from the same group must be already next to each other in the * original collection. Works locally on each partition, so items from different partitions will * never be placed in the same group. */ public <U> JavaPairRDD<U, Iterable<Tuple2<K, V>>> spanBy( Function<Tuple2<K, V>, U> function, ClassTag<U> uClassTag ) { return new PairRDDJavaFunctions<>(rdd()).spanBy(function, uClassTag); }
/** * Groups items with the same key, assuming the items with the same key are next to each other in the * collection. It does not perform shuffle, therefore it is much faster than using much more * universal Spark RDD `groupByKey`. For this method to be useful with Cassandra tables, the key must * represent a prefix of the primary key, containing at least the partition key of the Cassandra * table. */ public JavaPairRDD<K, Collection<V>> spanByKey() { return new PairRDDJavaFunctions<>(rdd()).spanByKey(kClassTag()); }
/** * A static factory method to create a {@link PairRDDJavaFunctions} based on an existing {@link * JavaPairRDD} instance. */ public static <K, V> PairRDDJavaFunctions<K, V> javaFunctions(JavaPairRDD<K, V> rdd) { return new PairRDDJavaFunctions<>(rdd.rdd()); }
/** * Applies a function to each item, and groups consecutive items having the same value together. * Contrary to `groupBy`, items from the same group must be already next to each other in the * original collection. Works locally on each partition, so items from different partitions will * never be placed in the same group. */ public <U> JavaPairRDD<U, Iterable<Tuple2<K, V>>> spanBy( Function<Tuple2<K, V>, U> function, ClassTag<U> uClassTag ) { return new PairRDDJavaFunctions<>(rdd()).spanBy(function, uClassTag); }
/** * Groups items with the same key, assuming the items with the same key are next to each other in the * collection. It does not perform shuffle, therefore it is much faster than using much more * universal Spark RDD `groupByKey`. For this method to be useful with Cassandra tables, the key must * represent a prefix of the primary key, containing at least the partition key of the Cassandra * table. */ public JavaPairRDD<K, Collection<V>> spanByKey() { return new PairRDDJavaFunctions<>(rdd()).spanByKey(kClassTag()); }
/** * A static factory method to create a {@link PairRDDJavaFunctions} based on an existing {@link * JavaPairRDD} instance. */ public static <K, V> PairRDDJavaFunctions<K, V> javaFunctions(JavaPairRDD<K, V> rdd) { return new PairRDDJavaFunctions<>(rdd.rdd()); }
/** * Applies a function to each item, and groups consecutive items having the same value together. * Contrary to `groupBy`, items from the same group must be already next to each other in the * original collection. Works locally on each partition, so items from different partitions will * never be placed in the same group. */ public <U> JavaPairRDD<U, Iterable<Tuple2<K, V>>> spanBy( Function<Tuple2<K, V>, U> function, ClassTag<U> uClassTag ) { return new PairRDDJavaFunctions<>(rdd()).spanBy(function, uClassTag); }
/** @see {@link #spanBy(Function, ClassTag)} */ public <U> JavaPairRDD<U, Iterable<Tuple2<K, V>>> spanBy( Function<Tuple2<K, V>, U> function, Class<U> uClass ) { return new PairRDDJavaFunctions<>(rdd()).spanBy(function, getClassTag(uClass)); }
/** @see {@link #spanBy(Function, ClassTag)} */ public <U> JavaPairRDD<U, Iterable<Tuple2<K, V>>> spanBy( Function<Tuple2<K, V>, U> function, Class<U> uClass ) { return new PairRDDJavaFunctions<>(rdd()).spanBy(function, getClassTag(uClass)); }
/** @see {@link #spanBy(Function, ClassTag)} */ public <U> JavaPairRDD<U, Iterable<Tuple2<K, V>>> spanBy( Function<Tuple2<K, V>, U> function, Class<U> uClass ) { return new PairRDDJavaFunctions<>(rdd()).spanBy(function, getClassTag(uClass)); }
/** @see {@link #spanBy(Function, ClassTag)} */ public <U> JavaPairRDD<U, Iterable<Tuple2<K, V>>> spanBy( Function<Tuple2<K, V>, U> function, Class<U> uClass ) { return new PairRDDJavaFunctions<>(rdd()).spanBy(function, getClassTag(uClass)); }
/** @see {@link #spanBy(Function, ClassTag)} */ public <U> JavaPairRDD<U, Iterable<Tuple2<K, V>>> spanBy( Function<Tuple2<K, V>, U> function, Class<U> uClass ) { return new PairRDDJavaFunctions<>(rdd()).spanBy(function, getClassTag(uClass)); }