private static JavaPairRDD<Integer,Iterable<Rating>> predictAll( MatrixFactorizationModel mfModel, JavaRDD<Rating> data, JavaPairRDD<Integer,Integer> userProducts) { @SuppressWarnings("unchecked") RDD<Tuple2<Object,Object>> userProductsRDD = (RDD<Tuple2<Object,Object>>) (RDD<?>) userProducts.rdd(); return data.wrapRDD(mfModel.predict(userProductsRDD)).groupBy(Rating::user); }
@Test public void groupBy() { JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); Function<Integer, Boolean> isOdd = x -> x % 2 == 0; JavaPairRDD<Boolean, Iterable<Integer>> oddsAndEvens = rdd.groupBy(isOdd); Assert.assertEquals(2, oddsAndEvens.count()); Assert.assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens Assert.assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds oddsAndEvens = rdd.groupBy(isOdd, 1); Assert.assertEquals(2, oddsAndEvens.count()); Assert.assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens Assert.assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds }
@Test public void groupBy() { JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); Function<Integer, Boolean> isOdd = x -> x % 2 == 0; JavaPairRDD<Boolean, Iterable<Integer>> oddsAndEvens = rdd.groupBy(isOdd); assertEquals(2, oddsAndEvens.count()); assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds oddsAndEvens = rdd.groupBy(isOdd, 1); assertEquals(2, oddsAndEvens.count()); assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds }
@Test public void groupBy() { JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); Function<Integer, Boolean> isOdd = x -> x % 2 == 0; JavaPairRDD<Boolean, Iterable<Integer>> oddsAndEvens = rdd.groupBy(isOdd); Assert.assertEquals(2, oddsAndEvens.count()); Assert.assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens Assert.assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds oddsAndEvens = rdd.groupBy(isOdd, 1); Assert.assertEquals(2, oddsAndEvens.count()); Assert.assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens Assert.assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds }
@Test public void groupBy() { JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); Function<Integer, Boolean> isOdd = x -> x % 2 == 0; JavaPairRDD<Boolean, Iterable<Integer>> oddsAndEvens = rdd.groupBy(isOdd); Assert.assertEquals(2, oddsAndEvens.count()); Assert.assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens Assert.assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds oddsAndEvens = rdd.groupBy(isOdd, 1); Assert.assertEquals(2, oddsAndEvens.count()); Assert.assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens Assert.assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds }
@Test public void groupBy() { JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); Function<Integer, Boolean> isOdd = x -> x % 2 == 0; JavaPairRDD<Boolean, Iterable<Integer>> oddsAndEvens = rdd.groupBy(isOdd); assertEquals(2, oddsAndEvens.count()); assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds oddsAndEvens = rdd.groupBy(isOdd, 1); assertEquals(2, oddsAndEvens.count()); assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds }
@Test public void groupBy() { JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); Function<Integer, Boolean> isOdd = x -> x % 2 == 0; JavaPairRDD<Boolean, Iterable<Integer>> oddsAndEvens = rdd.groupBy(isOdd); assertEquals(2, oddsAndEvens.count()); assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds oddsAndEvens = rdd.groupBy(isOdd, 1); assertEquals(2, oddsAndEvens.count()); assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds }
public static JavaPairRDD<Integer, Iterable<Vector>> groupByIndex( final JavaRDD<Vector> inputPoints, final KMeansModel clusterModel) { // Group the input points by their kmeans centroid index return inputPoints.groupBy(point -> { return clusterModel.predict(point); }); }
while (attempt++ < maxAttempts) { remainingPaths = jsc.parallelize(remainingPaths, parallelism) .groupBy(p -> new Path(basePath, p).getParent()) // list by partition .map(pair -> { FileSystem fs = FSUtils.getFs(basePath, hadoopConf.get());
@Override public <U> SparkPairStream<U, Iterable<T>> groupBy(@NonNull SerializableFunction<? super T, ? extends U> function) { return new SparkPairStream<>(rdd.groupBy(e -> { Configurator.INSTANCE.configure(configBroadcast.value()); return function.apply(e); })); }
while (attempt++ < maxAttempts) { remainingPaths = jsc.parallelize(remainingPaths, parallelism) .groupBy(p -> new Path(basePath, p).getParent()) // list by partition .map(pair -> { FileSystem fs = FSUtils.getFs(basePath, hadoopConf.get());
void run(String spanResource, String depResource) { log.info("Running Dependencies job for {}, reading from {} index, result storing to {}", day, spanResource ,depResource); JavaSparkContext sc = new JavaSparkContext(conf); try { JavaPairRDD<String, Iterable<Span>> traces = JavaEsSpark.esJsonRDD(sc, spanResource) .map(new ElasticTupleToSpan()) .groupBy(Span::getTraceId); List<Dependency> dependencyLinks = DependenciesSparkHelper.derive(traces); store(sc, dependencyLinks, depResource); log.info("Done, {} dependency objects created and stored to {}", dependencyLinks.size(), depResource); } finally { sc.stop(); } }
.load() .toJavaRDD() .groupBy(rowTraceId) .flatMapValues(new RowsToDependencyLinks(logInitializer, hasTraceIdHigh)) .values()
JavaPairRDD<String, DependencyLink> flatMapToLinksByTraceId( CassandraTableScanJavaRDD<CassandraRow> spans, long microsUpper, long microsLower, boolean inTest ) { if (strictTraceId) { return spans.spanBy(ROW_TRACE_ID, String.class) .flatMapValues( new CassandraRowsToDependencyLinks(logInitializer, microsLower, microsUpper, inTest)); } return spans.map(new CassandraRowToSpan(inTest)) .groupBy(SPAN_TRACE_ID) // groupBy instead of spanBy because trace_id is mixed length .flatMapValues(new SpansToDependencyLinks(logInitializer, microsLower, microsUpper)); }
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, SparkExecutor sparkExecutor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == this.getNumInputs(); assert outputs.length == this.getNumOutputs(); RddChannel.Instance input = (RddChannel.Instance) inputs[0]; RddChannel.Instance output = (RddChannel.Instance) outputs[0]; final JavaRDD<Type> inputRdd = input.provideRdd(); final Function<Type, KeyType> keyExtractor = sparkExecutor.getCompiler().compile(this.keyDescriptor, this, operatorContext, inputs); final Function<scala.Tuple2<KeyType, Iterable<Type>>, Iterable<Type>> projector = new GroupProjector<>(); final JavaPairRDD<KeyType, Iterable<Type>> groupedKeyRdd = inputRdd.groupBy(keyExtractor, sparkExecutor.getNumDefaultPartitions()); this.name(groupedKeyRdd); final JavaRDD<Iterable<Type>> outputRdd = groupedKeyRdd.map(projector); this.name(outputRdd); output.accept(outputRdd, sparkExecutor); return ExecutionOperator.modelLazyExecution(inputs, outputs, operatorContext); }
JavaPairRDD<String, Iterable<Row>> grouped = dataFrame.get().javaRDD().groupBy(new Function<Row, String>() { @Override public String call(Row row) throws Exception {
JavaPairRDD<String, Iterable<Row>> grouped = dataFrame.get().javaRDD().groupBy(new Function<Row, String>() { @Override public String call(Row row) throws Exception {