org.apache.spark.api.java.JavaRDD.reduce java code examples

private Map<Integer,Collection<String>> getDistinctValues(JavaRDD<String[]> parsedRDD) {
 int[] categoricalIndices = IntStream.range(0, inputSchema.getNumFeatures()).
   filter(inputSchema::isCategorical).toArray();
 return parsedRDD.mapPartitions(data -> {
   Map<Integer,Collection<String>> categoryValues = new HashMap<>();
   for (int i : categoricalIndices) {
    categoryValues.put(i, new HashSet<>());
   }
   data.forEachRemaining(datum ->
    categoryValues.forEach((category, values) -> values.add(datum[category]))
   );
   return Collections.singleton(categoryValues).iterator();
  }).reduce((v1, v2) -> {
   // Assumes both have the same key set
   v1.forEach((category, values) -> values.addAll(v2.get(category)));
   return v1;
  });
}

AvgCount result = rdd.mapPartitions(setup).reduce(combine);
System.out.println(result.avg());

long[] resultTrain = rddTrain.reduce(new ReduceArrayFunction());
log.info("Finished generating testing datasets");
long[] resultTest = rddTest.reduce(new ReduceArrayFunction());

  return Collections.singleton(treeNodeIDCounts).iterator();
).reduce((a, b) -> {
  Preconditions.checkArgument(a.size() == b.size());
  for (int i = 0; i < a.size(); i++) {

/**
 * @param trainPointData data to run down trees
 * @param model random decision forest model to count on
 * @return map of predictor index to the number of training examples that reached a
 *  node whose decision is based on that feature. The index is among predictors, not all
 *  features, since there are fewer predictors than features. That is, the index will
 *  match the one used in the {@link RandomForestModel}.
 */
private static IntLongHashMap predictorExampleCounts(JavaRDD<? extends LabeledPoint> trainPointData,
                           RandomForestModel model) {
 return trainPointData.mapPartitions(data -> {
   IntLongHashMap featureIndexCount = new IntLongHashMap();
   data.forEachRemaining(datum -> {
    double[] featureVector = datum.features().toArray();
    for (DecisionTreeModel tree : model.trees()) {
     org.apache.spark.mllib.tree.model.Node node = tree.topNode();
     // This logic cloned from Node.predict:
     while (!node.isLeaf()) {
      Split split = node.split().get();
      int featureIndex = split.feature();
      // Count feature
      featureIndexCount.addToValue(featureIndex, 1);
      node = nextNode(featureVector, node, split, featureIndex);
     }
    }
   });
   return Collections.singleton(featureIndexCount).iterator();
 }).reduce(RDFUpdate::merge);
}

@Test
public void reduce() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
 int sum = rdd.reduce(new AddInts());
 assertEquals(10, sum);
}

@Test
public void reduce() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
 int sum = rdd.reduce(new AddInts());
 assertEquals(10, sum);
}

@Test
public void reduce() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
 int sum = rdd.reduce(new AddInts());
 assertEquals(10, sum);
}

 @Test
 public void testJavaJdbcRDD() throws Exception {
  JavaRDD<Integer> rdd = JdbcRDD.create(
   sc,
   () -> DriverManager.getConnection("jdbc:derby:target/JavaJdbcRDDSuiteDb"),
   "SELECT DATA FROM FOO WHERE ? <= ID AND ID <= ?",
   1, 100, 1,
   r -> r.getInt(1)
  ).cache();

  Assert.assertEquals(100, rdd.count());
  Assert.assertEquals(Integer.valueOf(10100), rdd.reduce((i1, i2) -> i1 + i2));
 }
}

 @Test
 public void testJavaJdbcRDD() throws Exception {
  JavaRDD<Integer> rdd = JdbcRDD.create(
   sc,
   () -> DriverManager.getConnection("jdbc:derby:target/JavaJdbcRDDSuiteDb"),
   "SELECT DATA FROM FOO WHERE ? <= ID AND ID <= ?",
   1, 100, 1,
   r -> r.getInt(1)
  ).cache();

  Assert.assertEquals(100, rdd.count());
  Assert.assertEquals(Integer.valueOf(10100), rdd.reduce((i1, i2) -> i1 + i2));
 }
}

 @Test
 public void testJavaJdbcRDD() throws Exception {
  JavaRDD<Integer> rdd = JdbcRDD.create(
   sc,
   () -> DriverManager.getConnection("jdbc:derby:target/JavaJdbcRDDSuiteDb"),
   "SELECT DATA FROM FOO WHERE ? <= ID AND ID <= ?",
   1, 100, 1,
   r -> r.getInt(1)
  ).cache();

  Assert.assertEquals(100, rdd.count());
  Assert.assertEquals(Integer.valueOf(10100), rdd.reduce((i1, i2) -> i1 + i2));
 }
}

@Test
public void foldReduce() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13));
 Function2<Integer, Integer, Integer> add = (a, b) -> a + b;
 int sum = rdd.fold(0, add);
 assertEquals(33, sum);
 sum = rdd.reduce(add);
 assertEquals(33, sum);
}

@Test
public void foldReduce() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13));
 Function2<Integer, Integer, Integer> add = (a, b) -> a + b;
 int sum = rdd.fold(0, add);
 Assert.assertEquals(33, sum);
 sum = rdd.reduce(add);
 Assert.assertEquals(33, sum);
}

@Test
public void foldReduce() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13));
 Function2<Integer, Integer, Integer> add = (a, b) -> a + b;
 int sum = rdd.fold(0, add);
 assertEquals(33, sum);
 sum = rdd.reduce(add);
 assertEquals(33, sum);
}

@Test
public void foldReduce() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13));
 Function2<Integer, Integer, Integer> add = (a, b) -> a + b;
 int sum = rdd.fold(0, add);
 assertEquals(33, sum);
 sum = rdd.reduce(add);
 assertEquals(33, sum);
}

@Test
public void foldReduce() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13));
 Function2<Integer, Integer, Integer> add = (a, b) -> a + b;
 int sum = rdd.fold(0, add);
 Assert.assertEquals(33, sum);
 sum = rdd.reduce(add);
 Assert.assertEquals(33, sum);
}

@Test
public void foldReduce() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13));
 Function2<Integer, Integer, Integer> add = (a, b) -> a + b;
 int sum = rdd.fold(0, add);
 Assert.assertEquals(33, sum);
 sum = rdd.reduce(add);
 Assert.assertEquals(33, sum);
}

@Override
public Optional<T> reduce(@NonNull SerializableBinaryOperator<T> reducer) {
 return Optional.of(rdd.reduce((t, u) -> {
   Configurator.INSTANCE.configure(configBroadcast.value());
   return reducer.apply(t, u);
 }));
}

  public static <FieldT extends AbstractFieldElementExpanded<FieldT>> FieldT
  distributedVariableBaseMSM(
      final JavaPairRDD<Long, FieldT> scalars,
      final JavaPairRDD<Long, FieldT> bases) {
    return scalars.join(bases).map(pair -> pair._2._1.mul(pair._2._2)).reduce(FieldT::add);
  }
}

@SuppressWarnings("unchecked")
@Override
public Summary[] getSummaryImpl() throws DDFException {
 RDD<Object[]> rdd = (RDD<Object[]>) this.getDDF().getRepresentationHandler().get(RDD.class, Object[].class);
 JavaRDD<Object[]> data = rdd.toJavaRDD();
 Summary[] stats = data.map(new GetSummaryMapper()).reduce(new GetSummaryReducer());
 return stats;
}

Popular methods of JavaRDD

Popular in Java

Reactive rest calls using spring rest template
getSystemService (Context)
getContentResolver (Context)
getSupportFragmentManager (FragmentActivity)
Runnable (java.lang)
Represents a command that can be executed. Often used to run code in a different Thread.
System (java.lang)
Provides access to system-related information and resources including standard input and output. Ena
Time (java.sql)
Java representation of an SQL TIME value. Provides utilities to format and parse the time's represen
Dictionary (java.util)
Note: Do not use this class since it is obsolete. Please use the Map interface for new implementatio
Iterator (java.util)
An iterator over a sequence of objects, such as a collection.If a collection has been changed since
Notification (javax.management)
Top Sublime Text plugins

How to use reducemethodin org.apache.spark.api.java.JavaRDD

Best Java code snippets using org.apache.spark.api.java.JavaRDD.reduce (Showing top 20 results out of 441)

How to use
reduce
method
in
org.apache.spark.api.java.JavaRDD