org.apache.spark.api.java.JavaRDD.mapToDouble java code examples

 public static void main(String[] args) throws Exception {
    String master;
    if (args.length > 0) {
   master = args[0];
    } else {
      master = "local";
    }
    JavaSparkContext sc = new JavaSparkContext(
   master, "basicmaptodouble", System.getenv("SPARK_HOME"), System.getenv("JARS"));
  JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
  JavaDoubleRDD result = rdd.mapToDouble(
   new DoubleFunction<Integer>() {
    public double call(Integer x) {
     double y = (double) x;
     return y * y;
    }
   });
  System.out.println(StringUtils.join(result.collect(), ","));
  }
}

@Override
double evaluate(JavaRDD<Vector> evalData) {
 return fetchClusterMetrics(evalData).values().mapToDouble(ClusterMetric::getSumSquaredDist).sum();
}

public static final @Nullable Tuple4<Long, Long, Long, Long> contentSizeStats(
  JavaRDD<ApacheAccessLog> accessLogRDD) {
 JavaDoubleRDD contentSizes =
  accessLogRDD.mapToDouble(new GetContentSize()).cache();
 long count = contentSizes.count();
 if (count == 0) {
  return null;
 }
 Object ordering = Ordering.natural();
 final Comparator<Double> cmp = (Comparator<Double>)ordering;
 
 return new Tuple4<>(count,
           contentSizes.reduce(new SumReducer()).longValue(),
           contentSizes.min(cmp).longValue(),
           contentSizes.max(cmp).longValue());
}

static double rmse(DecisionForest forest, JavaRDD<Example> examples) {
 double mse = examples.mapToDouble(example -> {
   NumericPrediction prediction = (NumericPrediction) forest.predict(example);
   NumericFeature target = (NumericFeature) example.getTarget();
   double diff = prediction.getPrediction() - target.getValue();
   return diff * diff;
  }).mean();
 return Math.sqrt(mse);
}

/**
 * @param evalData data for evaluation
 * @return the Dunn Index of a given clustering
 *  (https://en.wikipedia.org/wiki/Cluster_analysis#Internal_evaluation); higher is better
 */
@Override
double evaluate(JavaRDD<Vector> evalData) {
 // Intra-cluster distance is mean distance to centroid
 double maxIntraClusterDistance =
   fetchClusterMetrics(evalData).values().mapToDouble(ClusterMetric::getMeanDist).max();
 // Inter-cluster distance is distance between centroids
 double minInterClusterDistance = Double.POSITIVE_INFINITY;
 List<ClusterInfo> clusters = new ArrayList<>(getClustersByID().values());
 DistanceFn<double[]> distanceFn = getDistanceFn();
 for (int i = 0; i < clusters.size(); i++) {
  double[] centerI = clusters.get(i).getCenter();
  // Distances are symmetric, hence d(i,j) == d(j,i)
  for (int j = i + 1; j < clusters.size(); j++) {
   double[] centerJ = clusters.get(j).getCenter();
   minInterClusterDistance = Math.min(minInterClusterDistance, distanceFn.applyAsDouble(centerI, centerJ));
  }
 }
 return minInterClusterDistance / maxIntraClusterDistance;
}

public static void main(String[] args) throws Exception {
   if (args.length != 2) {
  throw new Exception("Usage BasicLoadJson [sparkMaster] [cassandraHost]");
   }
 String sparkMaster = args[0];
 String cassandraHost = args[1];
 SparkConf conf = new SparkConf(true)
  .set("spark.cassandra.connection.host", cassandraHost);
   JavaSparkContext sc = new JavaSparkContext(
  sparkMaster, "basicquerycassandra", conf);
 // entire table as an RDD
 // assumes your table test was created as CREATE TABLE test.kv(key text PRIMARY KEY, value int);
 JavaRDD<CassandraRow> data = javaFunctions(sc).cassandraTable("test" , "kv");
 // print some basic stats
 System.out.println(data.mapToDouble(new DoubleFunction<CassandraRow>() {
   public double call(CassandraRow row) {
    return row.getInt("value");
   }}).stats());
 // write some basic data to Cassandra
 ArrayList<KeyValue> input = new ArrayList<KeyValue>();
 input.add(KeyValue.newInstance("mostmagic", 3));
 JavaRDD<KeyValue> kvRDD = sc.parallelize(input);
 javaFunctions(kvRDD, KeyValue.class).saveToCassandra("test", "kv");
 }
public static class KeyValue implements Serializable {

/**
 * Computes root mean squared error of {@link Rating#rating()} versus predicted value.
 */
static double rmse(MatrixFactorizationModel mfModel, JavaRDD<Rating> testData) {
 JavaPairRDD<Tuple2<Integer,Integer>,Double> testUserProductValues =
   testData.mapToPair(rating -> new Tuple2<>(new Tuple2<>(rating.user(), rating.product()), rating.rating()));
 @SuppressWarnings("unchecked")
 RDD<Tuple2<Object,Object>> testUserProducts =
   (RDD<Tuple2<Object,Object>>) (RDD<?>) testUserProductValues.keys().rdd();
 JavaRDD<Rating> predictions = testData.wrapRDD(mfModel.predict(testUserProducts));
 double mse = predictions.mapToPair(
   rating -> new Tuple2<>(new Tuple2<>(rating.user(), rating.product()), rating.rating())
 ).join(testUserProductValues).values().mapToDouble(valuePrediction -> {
  double diff = valuePrediction._1() - valuePrediction._2();
  return diff * diff;
 }).mean();
 return Math.sqrt(mse);
}

/**
 * Implementation which splits based solely on time. It will return approximately
 * the earliest {@link #getTestFraction()} of input, ordered by timestamp, as new training
 * data and the rest as test data.
 */
@Override
protected Pair<JavaRDD<String>,JavaRDD<String>> splitNewDataToTrainTest(JavaRDD<String> newData) {
 // Rough approximation; assumes timestamps are fairly evenly distributed
 StatCounter maxMin = newData.mapToDouble(line -> MLFunctions.TO_TIMESTAMP_FN.call(line).doubleValue()).stats();
 long minTime = (long) maxMin.min();
 long maxTime = (long) maxMin.max();
 log.info("New data timestamp range: {} - {}", minTime, maxTime);
 long approxTestTrainBoundary = (long) (maxTime - getTestFraction() * (maxTime - minTime));
 log.info("Splitting at timestamp {}", approxTestTrainBoundary);
 JavaRDD<String> newTrainData = newData.filter(
   line -> MLFunctions.TO_TIMESTAMP_FN.call(line) < approxTestTrainBoundary);
 JavaRDD<String> testData = newData.filter(
   line -> MLFunctions.TO_TIMESTAMP_FN.call(line) >= approxTestTrainBoundary);
 return new Pair<>(newTrainData, testData);
}

@Test
public void zip() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(x -> 1.0 * x);
 JavaPairRDD<Integer, Double> zipped = rdd.zip(doubles);
 zipped.count();
}

@Test
public void zip() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue);
 JavaPairRDD<Integer, Double> zipped = rdd.zip(doubles);
 zipped.count();
}

@Test
public void zip() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(x -> 1.0 * x);
 JavaPairRDD<Integer, Double> zipped = rdd.zip(doubles);
 zipped.count();
}

@Test
public void zip() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(x -> 1.0 * x);
 JavaPairRDD<Integer, Double> zipped = rdd.zip(doubles);
 zipped.count();
}

@Test
public void zip() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue);
 JavaPairRDD<Integer, Double> zipped = rdd.zip(doubles);
 zipped.count();
}

@Test
public void zip() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue);
 JavaPairRDD<Integer, Double> zipped = rdd.zip(doubles);
 zipped.count();
}

@Test
public void map() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue).cache();
 doubles.collect();
 JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x)).cache();
 pairs.collect();
 JavaRDD<String> strings = rdd.map(Object::toString).cache();
 strings.collect();
}

@Test
public void map() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(x -> 1.0 * x).cache();
 doubles.collect();
 JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x))
  .cache();
 pairs.collect();
 JavaRDD<String> strings = rdd.map(Object::toString).cache();
 strings.collect();
}

@Test
public void map() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue).cache();
 doubles.collect();
 JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x)).cache();
 pairs.collect();
 JavaRDD<String> strings = rdd.map(Object::toString).cache();
 strings.collect();
}

@Test
public void map() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue).cache();
 doubles.collect();
 JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x)).cache();
 pairs.collect();
 JavaRDD<String> strings = rdd.map(Object::toString).cache();
 strings.collect();
}

@Test
public void map() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(x -> 1.0 * x).cache();
 doubles.collect();
 JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x))
  .cache();
 pairs.collect();
 JavaRDD<String> strings = rdd.map(Object::toString).cache();
 strings.collect();
}

@Test
public void map() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(x -> 1.0 * x).cache();
 doubles.collect();
 JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x))
  .cache();
 pairs.collect();
 JavaRDD<String> strings = rdd.map(Object::toString).cache();
 strings.collect();
}

How to use mapToDoublemethodin org.apache.spark.api.java.JavaRDD

Best Java code snippets using org.apache.spark.api.java.JavaRDD.mapToDouble (Showing top 20 results out of 315)

How to use
mapToDouble
method
in
org.apache.spark.api.java.JavaRDD