public static final @Nullable Tuple4<Long, Long, Long, Long> contentSizeStats( JavaRDD<ApacheAccessLog> accessLogRDD) { JavaDoubleRDD contentSizes = accessLogRDD.mapToDouble(new GetContentSize()).cache(); long count = contentSizes.count(); if (count == 0) { return null; } Object ordering = Ordering.natural(); final Comparator<Double> cmp = (Comparator<Double>)ordering; return new Tuple4<>(count, contentSizes.reduce(new SumReducer()).longValue(), contentSizes.min(cmp).longValue(), contentSizes.max(cmp).longValue()); }
@SuppressWarnings("unchecked") @Test public void sparkContextUnion() { // Union of non-specialized JavaRDDs List<String> strings = Arrays.asList("Hello", "World"); JavaRDD<String> s1 = sc.parallelize(strings); JavaRDD<String> s2 = sc.parallelize(strings); // Varargs JavaRDD<String> sUnion = sc.union(s1, s2); assertEquals(4, sUnion.count()); // List List<JavaRDD<String>> list = new ArrayList<>(); list.add(s2); sUnion = sc.union(s1, list); assertEquals(4, sUnion.count()); // Union of JavaDoubleRDDs List<Double> doubles = Arrays.asList(1.0, 2.0); JavaDoubleRDD d1 = sc.parallelizeDoubles(doubles); JavaDoubleRDD d2 = sc.parallelizeDoubles(doubles); JavaDoubleRDD dUnion = sc.union(d1, d2); assertEquals(4, dUnion.count()); // Union of JavaPairRDDs List<Tuple2<Integer, Integer>> pairs = new ArrayList<>(); pairs.add(new Tuple2<>(1, 2)); pairs.add(new Tuple2<>(3, 4)); JavaPairRDD<Integer, Integer> p1 = sc.parallelizePairs(pairs); JavaPairRDD<Integer, Integer> p2 = sc.parallelizePairs(pairs); JavaPairRDD<Integer, Integer> pUnion = sc.union(p1, p2); assertEquals(4, pUnion.count()); }
@SuppressWarnings("unchecked") @Test public void sparkContextUnion() { // Union of non-specialized JavaRDDs List<String> strings = Arrays.asList("Hello", "World"); JavaRDD<String> s1 = sc.parallelize(strings); JavaRDD<String> s2 = sc.parallelize(strings); // Varargs JavaRDD<String> sUnion = sc.union(s1, s2); assertEquals(4, sUnion.count()); // List List<JavaRDD<String>> list = new ArrayList<>(); list.add(s2); sUnion = sc.union(s1, list); assertEquals(4, sUnion.count()); // Union of JavaDoubleRDDs List<Double> doubles = Arrays.asList(1.0, 2.0); JavaDoubleRDD d1 = sc.parallelizeDoubles(doubles); JavaDoubleRDD d2 = sc.parallelizeDoubles(doubles); JavaDoubleRDD dUnion = sc.union(d1, d2); assertEquals(4, dUnion.count()); // Union of JavaPairRDDs List<Tuple2<Integer, Integer>> pairs = new ArrayList<>(); pairs.add(new Tuple2<>(1, 2)); pairs.add(new Tuple2<>(3, 4)); JavaPairRDD<Integer, Integer> p1 = sc.parallelizePairs(pairs); JavaPairRDD<Integer, Integer> p2 = sc.parallelizePairs(pairs); JavaPairRDD<Integer, Integer> pUnion = sc.union(p1, p2); assertEquals(4, pUnion.count()); }
@SuppressWarnings("unchecked") @Test public void sparkContextUnion() { // Union of non-specialized JavaRDDs List<String> strings = Arrays.asList("Hello", "World"); JavaRDD<String> s1 = sc.parallelize(strings); JavaRDD<String> s2 = sc.parallelize(strings); // Varargs JavaRDD<String> sUnion = sc.union(s1, s2); assertEquals(4, sUnion.count()); // List List<JavaRDD<String>> list = new ArrayList<>(); list.add(s2); sUnion = sc.union(s1, list); assertEquals(4, sUnion.count()); // Union of JavaDoubleRDDs List<Double> doubles = Arrays.asList(1.0, 2.0); JavaDoubleRDD d1 = sc.parallelizeDoubles(doubles); JavaDoubleRDD d2 = sc.parallelizeDoubles(doubles); JavaDoubleRDD dUnion = sc.union(d1, d2); assertEquals(4, dUnion.count()); // Union of JavaPairRDDs List<Tuple2<Integer, Integer>> pairs = new ArrayList<>(); pairs.add(new Tuple2<>(1, 2)); pairs.add(new Tuple2<>(3, 4)); JavaPairRDD<Integer, Integer> p1 = sc.parallelizePairs(pairs); JavaPairRDD<Integer, Integer> p2 = sc.parallelizePairs(pairs); JavaPairRDD<Integer, Integer> pUnion = sc.union(p1, p2); assertEquals(4, pUnion.count()); }
@SuppressWarnings("unchecked") @Test public void intersection() { List<Integer> ints1 = Arrays.asList(1, 10, 2, 3, 4, 5); List<Integer> ints2 = Arrays.asList(1, 6, 2, 3, 7, 8); JavaRDD<Integer> s1 = sc.parallelize(ints1); JavaRDD<Integer> s2 = sc.parallelize(ints2); JavaRDD<Integer> intersections = s1.intersection(s2); assertEquals(3, intersections.count()); JavaRDD<Integer> empty = sc.emptyRDD(); JavaRDD<Integer> emptyIntersection = empty.intersection(s2); assertEquals(0, emptyIntersection.count()); List<Double> doubles = Arrays.asList(1.0, 2.0); JavaDoubleRDD d1 = sc.parallelizeDoubles(doubles); JavaDoubleRDD d2 = sc.parallelizeDoubles(doubles); JavaDoubleRDD dIntersection = d1.intersection(d2); assertEquals(2, dIntersection.count()); List<Tuple2<Integer, Integer>> pairs = new ArrayList<>(); pairs.add(new Tuple2<>(1, 2)); pairs.add(new Tuple2<>(3, 4)); JavaPairRDD<Integer, Integer> p1 = sc.parallelizePairs(pairs); JavaPairRDD<Integer, Integer> p2 = sc.parallelizePairs(pairs); JavaPairRDD<Integer, Integer> pIntersection = p1.intersection(p2); assertEquals(2, pIntersection.count()); }
@SuppressWarnings("unchecked") @Test public void intersection() { List<Integer> ints1 = Arrays.asList(1, 10, 2, 3, 4, 5); List<Integer> ints2 = Arrays.asList(1, 6, 2, 3, 7, 8); JavaRDD<Integer> s1 = sc.parallelize(ints1); JavaRDD<Integer> s2 = sc.parallelize(ints2); JavaRDD<Integer> intersections = s1.intersection(s2); assertEquals(3, intersections.count()); JavaRDD<Integer> empty = sc.emptyRDD(); JavaRDD<Integer> emptyIntersection = empty.intersection(s2); assertEquals(0, emptyIntersection.count()); List<Double> doubles = Arrays.asList(1.0, 2.0); JavaDoubleRDD d1 = sc.parallelizeDoubles(doubles); JavaDoubleRDD d2 = sc.parallelizeDoubles(doubles); JavaDoubleRDD dIntersection = d1.intersection(d2); assertEquals(2, dIntersection.count()); List<Tuple2<Integer, Integer>> pairs = new ArrayList<>(); pairs.add(new Tuple2<>(1, 2)); pairs.add(new Tuple2<>(3, 4)); JavaPairRDD<Integer, Integer> p1 = sc.parallelizePairs(pairs); JavaPairRDD<Integer, Integer> p2 = sc.parallelizePairs(pairs); JavaPairRDD<Integer, Integer> pIntersection = p1.intersection(p2); assertEquals(2, pIntersection.count()); }
@SuppressWarnings("unchecked") @Test public void intersection() { List<Integer> ints1 = Arrays.asList(1, 10, 2, 3, 4, 5); List<Integer> ints2 = Arrays.asList(1, 6, 2, 3, 7, 8); JavaRDD<Integer> s1 = sc.parallelize(ints1); JavaRDD<Integer> s2 = sc.parallelize(ints2); JavaRDD<Integer> intersections = s1.intersection(s2); assertEquals(3, intersections.count()); JavaRDD<Integer> empty = sc.emptyRDD(); JavaRDD<Integer> emptyIntersection = empty.intersection(s2); assertEquals(0, emptyIntersection.count()); List<Double> doubles = Arrays.asList(1.0, 2.0); JavaDoubleRDD d1 = sc.parallelizeDoubles(doubles); JavaDoubleRDD d2 = sc.parallelizeDoubles(doubles); JavaDoubleRDD dIntersection = d1.intersection(d2); assertEquals(2, dIntersection.count()); List<Tuple2<Integer, Integer>> pairs = new ArrayList<>(); pairs.add(new Tuple2<>(1, 2)); pairs.add(new Tuple2<>(3, 4)); JavaPairRDD<Integer, Integer> p1 = sc.parallelizePairs(pairs); JavaPairRDD<Integer, Integer> p2 = sc.parallelizePairs(pairs); JavaPairRDD<Integer, Integer> pIntersection = p1.intersection(p2); assertEquals(2, pIntersection.count()); }
@Test public void javaDoubleRDD() { JavaDoubleRDD rdd = sc.parallelizeDoubles(Arrays.asList(1.0, 1.0, 2.0, 3.0, 5.0, 8.0)); JavaDoubleRDD distinct = rdd.distinct(); assertEquals(5, distinct.count()); JavaDoubleRDD filter = rdd.filter(x -> x > 2.0); assertEquals(3, filter.count()); JavaDoubleRDD union = rdd.union(rdd); assertEquals(12, union.count()); union = union.cache(); assertEquals(12, union.count()); assertEquals(20, rdd.sum(), 0.01); StatCounter stats = rdd.stats(); assertEquals(20, stats.sum(), 0.01); assertEquals(20/6.0, rdd.mean(), 0.01); assertEquals(20/6.0, rdd.mean(), 0.01); assertEquals(6.22222, rdd.variance(), 0.01); assertEquals(rdd.variance(), rdd.popVariance(), 1e-14); assertEquals(7.46667, rdd.sampleVariance(), 0.01); assertEquals(2.49444, rdd.stdev(), 0.01); assertEquals(rdd.stdev(), rdd.popStdev(), 1e-14); assertEquals(2.73252, rdd.sampleStdev(), 0.01); rdd.first(); rdd.take(5); }
@Test public void javaDoubleRDD() { JavaDoubleRDD rdd = sc.parallelizeDoubles(Arrays.asList(1.0, 1.0, 2.0, 3.0, 5.0, 8.0)); JavaDoubleRDD distinct = rdd.distinct(); assertEquals(5, distinct.count()); JavaDoubleRDD filter = rdd.filter(x -> x > 2.0); assertEquals(3, filter.count()); JavaDoubleRDD union = rdd.union(rdd); assertEquals(12, union.count()); union = union.cache(); assertEquals(12, union.count()); assertEquals(20, rdd.sum(), 0.01); StatCounter stats = rdd.stats(); assertEquals(20, stats.sum(), 0.01); assertEquals(20/6.0, rdd.mean(), 0.01); assertEquals(20/6.0, rdd.mean(), 0.01); assertEquals(6.22222, rdd.variance(), 0.01); assertEquals(rdd.variance(), rdd.popVariance(), 1e-14); assertEquals(7.46667, rdd.sampleVariance(), 0.01); assertEquals(2.49444, rdd.stdev(), 0.01); assertEquals(rdd.stdev(), rdd.popStdev(), 1e-14); assertEquals(2.73252, rdd.sampleStdev(), 0.01); rdd.first(); rdd.take(5); }
@Test public void javaDoubleRDD() { JavaDoubleRDD rdd = sc.parallelizeDoubles(Arrays.asList(1.0, 1.0, 2.0, 3.0, 5.0, 8.0)); JavaDoubleRDD distinct = rdd.distinct(); assertEquals(5, distinct.count()); JavaDoubleRDD filter = rdd.filter(x -> x > 2.0); assertEquals(3, filter.count()); JavaDoubleRDD union = rdd.union(rdd); assertEquals(12, union.count()); union = union.cache(); assertEquals(12, union.count()); assertEquals(20, rdd.sum(), 0.01); StatCounter stats = rdd.stats(); assertEquals(20, stats.sum(), 0.01); assertEquals(20/6.0, rdd.mean(), 0.01); assertEquals(20/6.0, rdd.mean(), 0.01); assertEquals(6.22222, rdd.variance(), 0.01); assertEquals(rdd.variance(), rdd.popVariance(), 1e-14); assertEquals(7.46667, rdd.sampleVariance(), 0.01); assertEquals(2.49444, rdd.stdev(), 0.01); assertEquals(rdd.stdev(), rdd.popStdev(), 1e-14); assertEquals(2.73252, rdd.sampleStdev(), 0.01); rdd.first(); rdd.take(5); }
@Override public boolean anyMatch(@NonNull SerializableDoublePredicate predicate) { return doubleStream.filter(d -> { Configurator.INSTANCE.configure(configBroadcast.value()); return predicate.test(d); }).count() != 0; }
@Override public boolean allMatch(@NonNull SerializableDoublePredicate predicate) { return doubleStream.filter(d -> { Configurator.INSTANCE.configure(configBroadcast.value()); return predicate.test(d); }).count() == count(); }
@Test public void testPoissonRDD() { double mean = 2.0; long m = 1000L; int p = 2; long seed = 1L; JavaDoubleRDD rdd1 = poissonJavaRDD(jsc, mean, m); JavaDoubleRDD rdd2 = poissonJavaRDD(jsc, mean, m, p); JavaDoubleRDD rdd3 = poissonJavaRDD(jsc, mean, m, p, seed); for (JavaDoubleRDD rdd : Arrays.asList(rdd1, rdd2, rdd3)) { Assert.assertEquals(m, rdd.count()); } }
@Test public void testNormalRDD() { long m = 1000L; int p = 2; long seed = 1L; JavaDoubleRDD rdd1 = normalJavaRDD(jsc, m); JavaDoubleRDD rdd2 = normalJavaRDD(jsc, m, p); JavaDoubleRDD rdd3 = normalJavaRDD(jsc, m, p, seed); for (JavaDoubleRDD rdd : Arrays.asList(rdd1, rdd2, rdd3)) { Assert.assertEquals(m, rdd.count()); } }
@Test public void testNormalRDD() { long m = 1000L; int p = 2; long seed = 1L; JavaDoubleRDD rdd1 = normalJavaRDD(jsc, m); JavaDoubleRDD rdd2 = normalJavaRDD(jsc, m, p); JavaDoubleRDD rdd3 = normalJavaRDD(jsc, m, p, seed); for (JavaDoubleRDD rdd : Arrays.asList(rdd1, rdd2, rdd3)) { Assert.assertEquals(m, rdd.count()); } }
@Test public void testUniformRDD() { long m = 1000L; int p = 2; long seed = 1L; JavaDoubleRDD rdd1 = uniformJavaRDD(jsc, m); JavaDoubleRDD rdd2 = uniformJavaRDD(jsc, m, p); JavaDoubleRDD rdd3 = uniformJavaRDD(jsc, m, p, seed); for (JavaDoubleRDD rdd : Arrays.asList(rdd1, rdd2, rdd3)) { Assert.assertEquals(m, rdd.count()); } }
@Test public void testNormalRDD() { long m = 1000L; int p = 2; long seed = 1L; JavaDoubleRDD rdd1 = normalJavaRDD(jsc, m); JavaDoubleRDD rdd2 = normalJavaRDD(jsc, m, p); JavaDoubleRDD rdd3 = normalJavaRDD(jsc, m, p, seed); for (JavaDoubleRDD rdd : Arrays.asList(rdd1, rdd2, rdd3)) { Assert.assertEquals(m, rdd.count()); } }
@Test public void testUniformRDD() { long m = 1000L; int p = 2; long seed = 1L; JavaDoubleRDD rdd1 = uniformJavaRDD(jsc, m); JavaDoubleRDD rdd2 = uniformJavaRDD(jsc, m, p); JavaDoubleRDD rdd3 = uniformJavaRDD(jsc, m, p, seed); for (JavaDoubleRDD rdd : Arrays.asList(rdd1, rdd2, rdd3)) { Assert.assertEquals(m, rdd.count()); } }
@Test public void testUniformRDD() { long m = 1000L; int p = 2; long seed = 1L; JavaDoubleRDD rdd1 = uniformJavaRDD(jsc, m); JavaDoubleRDD rdd2 = uniformJavaRDD(jsc, m, p); JavaDoubleRDD rdd3 = uniformJavaRDD(jsc, m, p, seed); for (JavaDoubleRDD rdd : Arrays.asList(rdd1, rdd2, rdd3)) { Assert.assertEquals(m, rdd.count()); } }
@Test public void testExponentialRDD() { double mean = 2.0; long m = 1000L; int p = 2; long seed = 1L; JavaDoubleRDD rdd1 = exponentialJavaRDD(jsc, mean, m); JavaDoubleRDD rdd2 = exponentialJavaRDD(jsc, mean, m, p); JavaDoubleRDD rdd3 = exponentialJavaRDD(jsc, mean, m, p, seed); for (JavaDoubleRDD rdd : Arrays.asList(rdd1, rdd2, rdd3)) { Assert.assertEquals(m, rdd.count()); } }