static double rmse(DecisionForest forest, JavaRDD<Example> examples) { double mse = examples.mapToDouble(example -> { NumericPrediction prediction = (NumericPrediction) forest.predict(example); NumericFeature target = (NumericFeature) example.getTarget(); double diff = prediction.getPrediction() - target.getValue(); return diff * diff; }).mean(); return Math.sqrt(mse); }
/** * Computes root mean squared error of {@link Rating#rating()} versus predicted value. */ static double rmse(MatrixFactorizationModel mfModel, JavaRDD<Rating> testData) { JavaPairRDD<Tuple2<Integer,Integer>,Double> testUserProductValues = testData.mapToPair(rating -> new Tuple2<>(new Tuple2<>(rating.user(), rating.product()), rating.rating())); @SuppressWarnings("unchecked") RDD<Tuple2<Object,Object>> testUserProducts = (RDD<Tuple2<Object,Object>>) (RDD<?>) testUserProductValues.keys().rdd(); JavaRDD<Rating> predictions = testData.wrapRDD(mfModel.predict(testUserProducts)); double mse = predictions.mapToPair( rating -> new Tuple2<>(new Tuple2<>(rating.user(), rating.product()), rating.rating()) ).join(testUserProductValues).values().mapToDouble(valuePrediction -> { double diff = valuePrediction._1() - valuePrediction._2(); return diff * diff; }).mean(); return Math.sqrt(mse); }
@Test public void javaDoubleRDD() { JavaDoubleRDD rdd = sc.parallelizeDoubles(Arrays.asList(1.0, 1.0, 2.0, 3.0, 5.0, 8.0)); JavaDoubleRDD distinct = rdd.distinct(); assertEquals(5, distinct.count()); JavaDoubleRDD filter = rdd.filter(x -> x > 2.0); assertEquals(3, filter.count()); JavaDoubleRDD union = rdd.union(rdd); assertEquals(12, union.count()); union = union.cache(); assertEquals(12, union.count()); assertEquals(20, rdd.sum(), 0.01); StatCounter stats = rdd.stats(); assertEquals(20, stats.sum(), 0.01); assertEquals(20/6.0, rdd.mean(), 0.01); assertEquals(20/6.0, rdd.mean(), 0.01); assertEquals(6.22222, rdd.variance(), 0.01); assertEquals(rdd.variance(), rdd.popVariance(), 1e-14); assertEquals(7.46667, rdd.sampleVariance(), 0.01); assertEquals(2.49444, rdd.stdev(), 0.01); assertEquals(rdd.stdev(), rdd.popStdev(), 1e-14); assertEquals(2.73252, rdd.sampleStdev(), 0.01); rdd.first(); rdd.take(5); }
@Test public void javaDoubleRDD() { JavaDoubleRDD rdd = sc.parallelizeDoubles(Arrays.asList(1.0, 1.0, 2.0, 3.0, 5.0, 8.0)); JavaDoubleRDD distinct = rdd.distinct(); assertEquals(5, distinct.count()); JavaDoubleRDD filter = rdd.filter(x -> x > 2.0); assertEquals(3, filter.count()); JavaDoubleRDD union = rdd.union(rdd); assertEquals(12, union.count()); union = union.cache(); assertEquals(12, union.count()); assertEquals(20, rdd.sum(), 0.01); StatCounter stats = rdd.stats(); assertEquals(20, stats.sum(), 0.01); assertEquals(20/6.0, rdd.mean(), 0.01); assertEquals(20/6.0, rdd.mean(), 0.01); assertEquals(6.22222, rdd.variance(), 0.01); assertEquals(rdd.variance(), rdd.popVariance(), 1e-14); assertEquals(7.46667, rdd.sampleVariance(), 0.01); assertEquals(2.49444, rdd.stdev(), 0.01); assertEquals(rdd.stdev(), rdd.popStdev(), 1e-14); assertEquals(2.73252, rdd.sampleStdev(), 0.01); rdd.first(); rdd.take(5); }
@Test public void javaDoubleRDD() { JavaDoubleRDD rdd = sc.parallelizeDoubles(Arrays.asList(1.0, 1.0, 2.0, 3.0, 5.0, 8.0)); JavaDoubleRDD distinct = rdd.distinct(); assertEquals(5, distinct.count()); JavaDoubleRDD filter = rdd.filter(x -> x > 2.0); assertEquals(3, filter.count()); JavaDoubleRDD union = rdd.union(rdd); assertEquals(12, union.count()); union = union.cache(); assertEquals(12, union.count()); assertEquals(20, rdd.sum(), 0.01); StatCounter stats = rdd.stats(); assertEquals(20, stats.sum(), 0.01); assertEquals(20/6.0, rdd.mean(), 0.01); assertEquals(20/6.0, rdd.mean(), 0.01); assertEquals(6.22222, rdd.variance(), 0.01); assertEquals(rdd.variance(), rdd.popVariance(), 1e-14); assertEquals(7.46667, rdd.sampleVariance(), 0.01); assertEquals(2.49444, rdd.stdev(), 0.01); assertEquals(rdd.stdev(), rdd.popStdev(), 1e-14); assertEquals(2.73252, rdd.sampleStdev(), 0.01); rdd.first(); rdd.take(5); }
).rdd()).mean();
@Override public double mean() { return doubleStream.mean(); }
/** * The main function to take the input IDs. * @param args the arguments for the function * @throws IOException due to an error reading from the URL */ public static void main(String[] args) throws IOException { long start = System.currentTimeMillis(); StructureDataRDD structureDataRDD = new StructureDataRDD("/path/to/hadoopfolder"); SegmentDataRDD calphaChains = structureDataRDD.getCalpha().filterLength(10, 300); JavaDoubleRDD lengthDist = calphaChains.getLengthDist().cache(); System.out.println(lengthDist.mean()); System.out.println(lengthDist.min()); System.out.println(lengthDist.max()); System.out.println(lengthDist.count()); System.out.println(System.currentTimeMillis()-start); }
public static void main(String[] args) { //Sample test data - All numbers from 1 to 99999 List<Double> testData = IntStream.range(1, 100000).mapToDouble(d -> d).collect(ArrayList::new, ArrayList::add, ArrayList::addAll); JavaDoubleRDD rdd = sc.parallelizeDoubles(testData); LOGGER.info("Mean: " + rdd.mean()); //For efficiency, use StatCounter if more than one stats are required. StatCounter statCounter = rdd.stats(); LOGGER.info("Using StatCounter"); LOGGER.info("Count: " + statCounter.count()); LOGGER.info("Min: " + statCounter.min()); LOGGER.info("Max: " + statCounter.max()); LOGGER.info("Sum: " + statCounter.sum()); LOGGER.info("Mean: " + statCounter.mean()); LOGGER.info("Variance: " + statCounter.variance()); LOGGER.info("Stdev: " + statCounter.stdev()); } }
public static void main(String[] args) { fillHazelcastMapWithUsers(); SparkConf conf = new SparkConf() .setMaster("local[2]") .setAppName("Create RDD From Hazelcast") .set("hazelcast.server.addresses", "127.0.0.1:5701") .set("spark.driver.host", "127.0.0.1"); JavaSparkContext sparkContext = new JavaSparkContext(conf); HazelcastSparkContext hazelcastSparkContext = new HazelcastSparkContext(sparkContext); HazelcastJavaRDD<String, User> usersRdd = hazelcastSparkContext.fromHazelcastMap("users"); Double averageAge = usersRdd.flatMapToDouble( new DoubleFlatMapFunction<Tuple2<String, User>>() { @Override public Iterator<Double> call(Tuple2<String, User> entry) throws Exception { return singletonList((double) entry._2().getAge()).iterator(); } } ).mean(); System.out.println("Average user age = " + averageAge); }
Double meanTemp = javaDoubleRDD.mean(); System.out.printf("\nAVERAGE TEMPERATURE: %f C\n", meanTemp);
return Math.abs(pair._1() - pair._2()); }).rdd()).mean();
return Math.abs(pair._1() - pair._2()); }).rdd()).mean();