return timestampRatingRDD.sortByKey().values();
@Override public JavaPairRDD<HiveKey, BytesWritable> shuffle( JavaPairRDD<HiveKey, BytesWritable> input, int numPartitions) { JavaPairRDD<HiveKey, BytesWritable> rdd; if (totalOrder) { if (numPartitions > 0) { if (numPartitions > 1 && input.getStorageLevel() == StorageLevel.NONE()) { input.persist(StorageLevel.DISK_ONLY()); sparkPlan.addCachedRDDId(input.id()); } rdd = input.sortByKey(true, numPartitions); } else { rdd = input.sortByKey(true); } } else { Partitioner partitioner = new HashPartitioner(numPartitions); rdd = input.repartitionAndSortWithinPartitions(partitioner); } return rdd; }
@Test public void sortByKey() { List<Tuple2<Integer, Integer>> pairs = new ArrayList<>(); pairs.add(new Tuple2<>(0, 4)); pairs.add(new Tuple2<>(3, 2)); pairs.add(new Tuple2<>(-1, 1)); JavaPairRDD<Integer, Integer> rdd = sc.parallelizePairs(pairs); // Default comparator JavaPairRDD<Integer, Integer> sortedRDD = rdd.sortByKey(); assertEquals(new Tuple2<>(-1, 1), sortedRDD.first()); List<Tuple2<Integer, Integer>> sortedPairs = sortedRDD.collect(); assertEquals(new Tuple2<>(0, 4), sortedPairs.get(1)); assertEquals(new Tuple2<>(3, 2), sortedPairs.get(2)); // Custom comparator sortedRDD = rdd.sortByKey(Collections.reverseOrder(), false); assertEquals(new Tuple2<>(-1, 1), sortedRDD.first()); sortedPairs = sortedRDD.collect(); assertEquals(new Tuple2<>(0, 4), sortedPairs.get(1)); assertEquals(new Tuple2<>(3, 2), sortedPairs.get(2)); }
@Test public void sortByKey() { List<Tuple2<Integer, Integer>> pairs = new ArrayList<>(); pairs.add(new Tuple2<>(0, 4)); pairs.add(new Tuple2<>(3, 2)); pairs.add(new Tuple2<>(-1, 1)); JavaPairRDD<Integer, Integer> rdd = sc.parallelizePairs(pairs); // Default comparator JavaPairRDD<Integer, Integer> sortedRDD = rdd.sortByKey(); assertEquals(new Tuple2<>(-1, 1), sortedRDD.first()); List<Tuple2<Integer, Integer>> sortedPairs = sortedRDD.collect(); assertEquals(new Tuple2<>(0, 4), sortedPairs.get(1)); assertEquals(new Tuple2<>(3, 2), sortedPairs.get(2)); // Custom comparator sortedRDD = rdd.sortByKey(Collections.reverseOrder(), false); assertEquals(new Tuple2<>(-1, 1), sortedRDD.first()); sortedPairs = sortedRDD.collect(); assertEquals(new Tuple2<>(0, 4), sortedPairs.get(1)); assertEquals(new Tuple2<>(3, 2), sortedPairs.get(2)); }
@Test public void sortByKey() { List<Tuple2<Integer, Integer>> pairs = new ArrayList<>(); pairs.add(new Tuple2<>(0, 4)); pairs.add(new Tuple2<>(3, 2)); pairs.add(new Tuple2<>(-1, 1)); JavaPairRDD<Integer, Integer> rdd = sc.parallelizePairs(pairs); // Default comparator JavaPairRDD<Integer, Integer> sortedRDD = rdd.sortByKey(); assertEquals(new Tuple2<>(-1, 1), sortedRDD.first()); List<Tuple2<Integer, Integer>> sortedPairs = sortedRDD.collect(); assertEquals(new Tuple2<>(0, 4), sortedPairs.get(1)); assertEquals(new Tuple2<>(3, 2), sortedPairs.get(2)); // Custom comparator sortedRDD = rdd.sortByKey(Collections.reverseOrder(), false); assertEquals(new Tuple2<>(-1, 1), sortedRDD.first()); sortedPairs = sortedRDD.collect(); assertEquals(new Tuple2<>(0, 4), sortedPairs.get(1)); assertEquals(new Tuple2<>(3, 2), sortedPairs.get(2)); }
@Override public JavaPairRDD<HiveKey, BytesWritable> shuffle( JavaPairRDD<HiveKey, BytesWritable> input, int numPartitions) { JavaPairRDD<HiveKey, BytesWritable> rdd; if (totalOrder) { if (numPartitions > 0) { if (numPartitions > 1 && input.getStorageLevel() == StorageLevel.NONE()) { input.persist(StorageLevel.DISK_ONLY()); sparkPlan.addCachedRDDId(input.id()); } rdd = input.sortByKey(true, numPartitions); } else { rdd = input.sortByKey(true); } } else { Partitioner partitioner = new HashPartitioner(numPartitions); rdd = input.repartitionAndSortWithinPartitions(partitioner); } if (shuffleSerializer != null) { if (rdd.rdd() instanceof ShuffledRDD) { ((ShuffledRDD) rdd.rdd()).setSerializer(shuffleSerializer); } } return rdd; }
static JavaPairRDD<Integer,String> sort( final JavaPairRDD<Integer,String> frequencies, final String orderBy) throws Exception { // if (orderBy.equals("ascending")) { // sort in "ascending" order return frequencies.sortByKey(true); } else { // sort in "descending" order return frequencies.sortByKey(false); } }
static JavaPairRDD<String,Long> assignRank(JavaPairRDD<String,Double> rdd) throws Exception { // swap key and value (will be used for sorting by key) // convert value to abs(value) JavaPairRDD<Double,String> swappedRDD = rdd.mapToPair((Tuple2<String, Double> s) -> new Tuple2<Double,String>(Math.abs(s._2), s._1)); // sort copa scores descending // we need 1 partition so that we can zip numbers into this RDD by zipWithIndex() JavaPairRDD<Double,String> sorted = swappedRDD.sortByKey(false, 1); // JavaPairRDD<T,Long> zipWithIndex() // Long values will be 0, 1, 2, ... // for ranking, we need 1, 2, 3, ..., therefore, we will add 1 when calculating the ranked product JavaPairRDD<Tuple2<Double,String>,Long> indexed = sorted.zipWithIndex(); // next convert JavaPairRDD<Tuple2<Double,String>,Long> into JavaPairRDD<String,Long> // JavaPairRDD<Tuple2<value,mapped_id>,rank> into JavaPairRDD<mapped_id,rank> JavaPairRDD<String, Long> ranked = indexed.mapToPair((Tuple2<Tuple2<Double,String>,Long> s) -> new Tuple2<String,Long>(s._1._2, s._2 + 1) // ranks are 1, 2, ..., n ); // return ranked; }
public static <K, V> JavaPairRDD<K, V> executeMap( final JavaPairRDD<Object, VertexWritable> graphRDD, final MapReduce<K, V, ?, ?, ?> mapReduce, final Configuration graphComputerConfiguration) { JavaPairRDD<K, V> mapRDD = graphRDD.mapPartitionsToPair(partitionIterator -> { KryoShimServiceLoader.applyConfiguration(graphComputerConfiguration); return new MapIterator<>(MapReduce.<MapReduce<K, V, ?, ?, ?>>createMapReduce(HadoopGraph.open(graphComputerConfiguration), graphComputerConfiguration), partitionIterator); }); if (mapReduce.getMapKeySort().isPresent()) mapRDD = mapRDD.sortByKey(mapReduce.getMapKeySort().get(), true, 1); return mapRDD; }
JavaPairRDD<Double,String> sorted = swappedRDD.sortByKey(false, 1);
public static <K, V, OK, OV> JavaPairRDD<OK, OV> executeReduce( final JavaPairRDD<K, V> mapOrCombineRDD, final MapReduce<K, V, OK, OV, ?> mapReduce, final Configuration graphComputerConfiguration) { JavaPairRDD<OK, OV> reduceRDD = mapOrCombineRDD.groupByKey().mapPartitionsToPair(partitionIterator -> { KryoShimServiceLoader.applyConfiguration(graphComputerConfiguration); return new ReduceIterator<>(MapReduce.<MapReduce<K, V, OK, OV, ?>>createMapReduce(HadoopGraph.open(graphComputerConfiguration), graphComputerConfiguration), partitionIterator); }); if (mapReduce.getReduceKeySort().isPresent()) reduceRDD = reduceRDD.sortByKey(mapReduce.getReduceKeySort().get(), true, 1); return reduceRDD; } }
JavaPairRDD<Double,String> sorted = swappedRDD.sortByKey(false, 1);
counts.sortByKey().saveAsTextFile(outputPath);
counts.sortByKey().saveAsTextFile(outputPath);
JavaPairRDD<Double,String> sorted = swappedRDD.sortByKey(false, 1);
@Override public MPairStream<T, U> sortByKey(@NonNull SerializableComparator<T> comparator) { return new SparkPairStream<>(rdd.sortByKey(comparator)); }
public static <K, V> JavaPairRDD<K, V> executeMap( final JavaPairRDD<Object, VertexWritable> graphRDD, final MapReduce<K, V, ?, ?, ?> mapReduce, final Configuration graphComputerConfiguration) { JavaPairRDD<K, V> mapRDD = graphRDD.mapPartitionsToPair(partitionIterator -> { KryoShimServiceLoader.applyConfiguration(graphComputerConfiguration); return new MapIterator<>(MapReduce.<MapReduce<K, V, ?, ?, ?>>createMapReduce(HadoopGraph.open(graphComputerConfiguration), graphComputerConfiguration), partitionIterator); }); if (mapReduce.getMapKeySort().isPresent()){ mapRDD = mapRDD.sortByKey(mapReduce.getMapKeySort().get(), true, 1);} return mapRDD; }
public static <K, V> JavaPairRDD<K, V> executeMap( final JavaPairRDD<Object, VertexWritable> graphRDD, final MapReduce<K, V, ?, ?, ?> mapReduce, final Configuration graphComputerConfiguration) { JavaPairRDD<K, V> mapRDD = graphRDD.mapPartitionsToPair(partitionIterator -> { KryoShimServiceLoader.applyConfiguration(graphComputerConfiguration); return new MapIterator<>(MapReduce.<MapReduce<K, V, ?, ?, ?>>createMapReduce(HadoopGraph.open(graphComputerConfiguration), graphComputerConfiguration), partitionIterator); }); if (mapReduce.getMapKeySort().isPresent()) mapRDD = mapRDD.sortByKey(mapReduce.getMapKeySort().get(), true, 1); return mapRDD; }
public static <K, V, OK, OV> JavaPairRDD<OK, OV> executeReduce( final JavaPairRDD<K, V> mapOrCombineRDD, final MapReduce<K, V, OK, OV, ?> mapReduce, final Configuration graphComputerConfiguration) { JavaPairRDD<OK, OV> reduceRDD = mapOrCombineRDD.groupByKey().mapPartitionsToPair(partitionIterator -> { KryoShimServiceLoader.applyConfiguration(graphComputerConfiguration); return new ReduceIterator<>(MapReduce.<MapReduce<K, V, OK, OV, ?>>createMapReduce(HadoopGraph.open(graphComputerConfiguration), graphComputerConfiguration), partitionIterator); }); if (mapReduce.getReduceKeySort().isPresent()) reduceRDD = reduceRDD.sortByKey(mapReduce.getReduceKeySort().get(), true, 1); return reduceRDD; } }
public static <K, V, OK, OV> JavaPairRDD<OK, OV> executeReduce( final JavaPairRDD<K, V> mapOrCombineRDD, final MapReduce<K, V, OK, OV, ?> mapReduce, final Configuration graphComputerConfiguration) { JavaPairRDD<OK, OV> reduceRDD = mapOrCombineRDD.groupByKey().mapPartitionsToPair(partitionIterator -> { KryoShimServiceLoader.applyConfiguration(graphComputerConfiguration); return new ReduceIterator<>(MapReduce.<MapReduce<K, V, OK, OV, ?>>createMapReduce(HadoopGraph.open(graphComputerConfiguration), graphComputerConfiguration), partitionIterator); }); if (mapReduce.getReduceKeySort().isPresent()){ reduceRDD = reduceRDD.sortByKey(mapReduce.getReduceKeySort().get(), true, 1);} return reduceRDD; } }