org.apache.spark.api.java.JavaPairRDD.sortByKey java code examples

return timestampRatingRDD.sortByKey().values();

@Override
public JavaPairRDD<HiveKey, BytesWritable> shuffle(
  JavaPairRDD<HiveKey, BytesWritable> input, int numPartitions) {
 JavaPairRDD<HiveKey, BytesWritable> rdd;
 if (totalOrder) {
  if (numPartitions > 0) {
   if (numPartitions > 1 && input.getStorageLevel() == StorageLevel.NONE()) {
    input.persist(StorageLevel.DISK_ONLY());
    sparkPlan.addCachedRDDId(input.id());
   }
   rdd = input.sortByKey(true, numPartitions);
  } else {
   rdd = input.sortByKey(true);
  }
 } else {
  Partitioner partitioner = new HashPartitioner(numPartitions);
  rdd = input.repartitionAndSortWithinPartitions(partitioner);
 }
 return rdd;
}

@Test
public void sortByKey() {
 List<Tuple2<Integer, Integer>> pairs = new ArrayList<>();
 pairs.add(new Tuple2<>(0, 4));
 pairs.add(new Tuple2<>(3, 2));
 pairs.add(new Tuple2<>(-1, 1));
 JavaPairRDD<Integer, Integer> rdd = sc.parallelizePairs(pairs);
 // Default comparator
 JavaPairRDD<Integer, Integer> sortedRDD = rdd.sortByKey();
 assertEquals(new Tuple2<>(-1, 1), sortedRDD.first());
 List<Tuple2<Integer, Integer>> sortedPairs = sortedRDD.collect();
 assertEquals(new Tuple2<>(0, 4), sortedPairs.get(1));
 assertEquals(new Tuple2<>(3, 2), sortedPairs.get(2));
 // Custom comparator
 sortedRDD = rdd.sortByKey(Collections.reverseOrder(), false);
 assertEquals(new Tuple2<>(-1, 1), sortedRDD.first());
 sortedPairs = sortedRDD.collect();
 assertEquals(new Tuple2<>(0, 4), sortedPairs.get(1));
 assertEquals(new Tuple2<>(3, 2), sortedPairs.get(2));
}

@Test
public void sortByKey() {
 List<Tuple2<Integer, Integer>> pairs = new ArrayList<>();
 pairs.add(new Tuple2<>(0, 4));
 pairs.add(new Tuple2<>(3, 2));
 pairs.add(new Tuple2<>(-1, 1));
 JavaPairRDD<Integer, Integer> rdd = sc.parallelizePairs(pairs);
 // Default comparator
 JavaPairRDD<Integer, Integer> sortedRDD = rdd.sortByKey();
 assertEquals(new Tuple2<>(-1, 1), sortedRDD.first());
 List<Tuple2<Integer, Integer>> sortedPairs = sortedRDD.collect();
 assertEquals(new Tuple2<>(0, 4), sortedPairs.get(1));
 assertEquals(new Tuple2<>(3, 2), sortedPairs.get(2));
 // Custom comparator
 sortedRDD = rdd.sortByKey(Collections.reverseOrder(), false);
 assertEquals(new Tuple2<>(-1, 1), sortedRDD.first());
 sortedPairs = sortedRDD.collect();
 assertEquals(new Tuple2<>(0, 4), sortedPairs.get(1));
 assertEquals(new Tuple2<>(3, 2), sortedPairs.get(2));
}

@Test
public void sortByKey() {
 List<Tuple2<Integer, Integer>> pairs = new ArrayList<>();
 pairs.add(new Tuple2<>(0, 4));
 pairs.add(new Tuple2<>(3, 2));
 pairs.add(new Tuple2<>(-1, 1));
 JavaPairRDD<Integer, Integer> rdd = sc.parallelizePairs(pairs);
 // Default comparator
 JavaPairRDD<Integer, Integer> sortedRDD = rdd.sortByKey();
 assertEquals(new Tuple2<>(-1, 1), sortedRDD.first());
 List<Tuple2<Integer, Integer>> sortedPairs = sortedRDD.collect();
 assertEquals(new Tuple2<>(0, 4), sortedPairs.get(1));
 assertEquals(new Tuple2<>(3, 2), sortedPairs.get(2));
 // Custom comparator
 sortedRDD = rdd.sortByKey(Collections.reverseOrder(), false);
 assertEquals(new Tuple2<>(-1, 1), sortedRDD.first());
 sortedPairs = sortedRDD.collect();
 assertEquals(new Tuple2<>(0, 4), sortedPairs.get(1));
 assertEquals(new Tuple2<>(3, 2), sortedPairs.get(2));
}

@Override
public JavaPairRDD<HiveKey, BytesWritable> shuffle(
  JavaPairRDD<HiveKey, BytesWritable> input, int numPartitions) {
 JavaPairRDD<HiveKey, BytesWritable> rdd;
 if (totalOrder) {
  if (numPartitions > 0) {
   if (numPartitions > 1 && input.getStorageLevel() == StorageLevel.NONE()) {
    input.persist(StorageLevel.DISK_ONLY());
    sparkPlan.addCachedRDDId(input.id());
   }
   rdd = input.sortByKey(true, numPartitions);
  } else {
   rdd = input.sortByKey(true);
  }
 } else {
  Partitioner partitioner = new HashPartitioner(numPartitions);
  rdd = input.repartitionAndSortWithinPartitions(partitioner);
 }
 if (shuffleSerializer != null) {
  if (rdd.rdd() instanceof ShuffledRDD) {
   ((ShuffledRDD) rdd.rdd()).setSerializer(shuffleSerializer);
  }
 }
 return rdd;
}

static JavaPairRDD<Integer,String> sort(
    final JavaPairRDD<Integer,String> frequencies, 
    final String orderBy) 
  throws Exception {
  //
  if (orderBy.equals("ascending")) {
    // sort in "ascending" order
    return frequencies.sortByKey(true);
  }
  else {
    // sort in "descending" order
    return frequencies.sortByKey(false);
  }
}

static JavaPairRDD<String,Long>  assignRank(JavaPairRDD<String,Double> rdd) 
  throws Exception {
  
  // swap key and value (will be used for sorting by key)
  // convert value to abs(value)
  JavaPairRDD<Double,String> swappedRDD = 
      rdd.mapToPair((Tuple2<String, Double> s) -> 
          new Tuple2<Double,String>(Math.abs(s._2), s._1)); 
  
  // sort copa scores descending
  // we need 1 partition so that we can zip numbers into this RDD by zipWithIndex()
  JavaPairRDD<Double,String> sorted = swappedRDD.sortByKey(false, 1);
  
  // JavaPairRDD<T,Long> zipWithIndex()
  // Long values will be 0, 1, 2, ...
  // for ranking, we need 1, 2, 3, ..., therefore, we will add 1 when calculating the ranked product
  JavaPairRDD<Tuple2<Double,String>,Long> indexed = sorted.zipWithIndex();
  
  // next convert JavaPairRDD<Tuple2<Double,String>,Long> into JavaPairRDD<String,Long>
  //              JavaPairRDD<Tuple2<value,mapped_id>,rank> into JavaPairRDD<mapped_id,rank>
  JavaPairRDD<String, Long> ranked = 
      indexed.mapToPair((Tuple2<Tuple2<Double,String>,Long> s) -> 
          new Tuple2<String,Long>(s._1._2, s._2 + 1) // ranks are 1, 2, ..., n
  ); 
  //
  return ranked;
}

public static <K, V> JavaPairRDD<K, V> executeMap(
    final JavaPairRDD<Object, VertexWritable> graphRDD, final MapReduce<K, V, ?, ?, ?> mapReduce,
    final Configuration graphComputerConfiguration) {
  JavaPairRDD<K, V> mapRDD = graphRDD.mapPartitionsToPair(partitionIterator -> {
    KryoShimServiceLoader.applyConfiguration(graphComputerConfiguration);
    return new MapIterator<>(MapReduce.<MapReduce<K, V, ?, ?, ?>>createMapReduce(HadoopGraph.open(graphComputerConfiguration), graphComputerConfiguration), partitionIterator);
  });
  if (mapReduce.getMapKeySort().isPresent())
    mapRDD = mapRDD.sortByKey(mapReduce.getMapKeySort().get(), true, 1);
  return mapRDD;
}

JavaPairRDD<Double,String> sorted = swappedRDD.sortByKey(false, 1);

  public static <K, V, OK, OV> JavaPairRDD<OK, OV> executeReduce(
      final JavaPairRDD<K, V> mapOrCombineRDD, final MapReduce<K, V, OK, OV, ?> mapReduce,
      final Configuration graphComputerConfiguration) {
    JavaPairRDD<OK, OV> reduceRDD = mapOrCombineRDD.groupByKey().mapPartitionsToPair(partitionIterator -> {
      KryoShimServiceLoader.applyConfiguration(graphComputerConfiguration);
      return new ReduceIterator<>(MapReduce.<MapReduce<K, V, OK, OV, ?>>createMapReduce(HadoopGraph.open(graphComputerConfiguration), graphComputerConfiguration), partitionIterator);
    });
    if (mapReduce.getReduceKeySort().isPresent())
      reduceRDD = reduceRDD.sortByKey(mapReduce.getReduceKeySort().get(), true, 1);
    return reduceRDD;
  }
}

JavaPairRDD<Double,String> sorted = swappedRDD.sortByKey(false, 1);

counts.sortByKey().saveAsTextFile(outputPath);

counts.sortByKey().saveAsTextFile(outputPath);

JavaPairRDD<Double,String> sorted = swappedRDD.sortByKey(false, 1);

@Override
public MPairStream<T, U> sortByKey(@NonNull SerializableComparator<T> comparator) {
 return new SparkPairStream<>(rdd.sortByKey(comparator));
}

public static <K, V> JavaPairRDD<K, V> executeMap(
    final JavaPairRDD<Object, VertexWritable> graphRDD, final MapReduce<K, V, ?, ?, ?> mapReduce,
    final Configuration graphComputerConfiguration) {
  JavaPairRDD<K, V> mapRDD = graphRDD.mapPartitionsToPair(partitionIterator -> {
    KryoShimServiceLoader.applyConfiguration(graphComputerConfiguration);
    return new MapIterator<>(MapReduce.<MapReduce<K, V, ?, ?, ?>>createMapReduce(HadoopGraph.open(graphComputerConfiguration), graphComputerConfiguration), partitionIterator);
  });
  if (mapReduce.getMapKeySort().isPresent()){
    mapRDD = mapRDD.sortByKey(mapReduce.getMapKeySort().get(), true, 1);}
  return mapRDD;
}

public static <K, V> JavaPairRDD<K, V> executeMap(
    final JavaPairRDD<Object, VertexWritable> graphRDD, final MapReduce<K, V, ?, ?, ?> mapReduce,
    final Configuration graphComputerConfiguration) {
  JavaPairRDD<K, V> mapRDD = graphRDD.mapPartitionsToPair(partitionIterator -> {
    KryoShimServiceLoader.applyConfiguration(graphComputerConfiguration);
    return new MapIterator<>(MapReduce.<MapReduce<K, V, ?, ?, ?>>createMapReduce(HadoopGraph.open(graphComputerConfiguration), graphComputerConfiguration), partitionIterator);
  });
  if (mapReduce.getMapKeySort().isPresent())
    mapRDD = mapRDD.sortByKey(mapReduce.getMapKeySort().get(), true, 1);
  return mapRDD;
}

  public static <K, V, OK, OV> JavaPairRDD<OK, OV> executeReduce(
      final JavaPairRDD<K, V> mapOrCombineRDD, final MapReduce<K, V, OK, OV, ?> mapReduce,
      final Configuration graphComputerConfiguration) {
    JavaPairRDD<OK, OV> reduceRDD = mapOrCombineRDD.groupByKey().mapPartitionsToPair(partitionIterator -> {
      KryoShimServiceLoader.applyConfiguration(graphComputerConfiguration);
      return new ReduceIterator<>(MapReduce.<MapReduce<K, V, OK, OV, ?>>createMapReduce(HadoopGraph.open(graphComputerConfiguration), graphComputerConfiguration), partitionIterator);
    });
    if (mapReduce.getReduceKeySort().isPresent())
      reduceRDD = reduceRDD.sortByKey(mapReduce.getReduceKeySort().get(), true, 1);
    return reduceRDD;
  }
}

  public static <K, V, OK, OV> JavaPairRDD<OK, OV> executeReduce(
      final JavaPairRDD<K, V> mapOrCombineRDD, final MapReduce<K, V, OK, OV, ?> mapReduce,
      final Configuration graphComputerConfiguration) {
    JavaPairRDD<OK, OV> reduceRDD = mapOrCombineRDD.groupByKey().mapPartitionsToPair(partitionIterator -> {
      KryoShimServiceLoader.applyConfiguration(graphComputerConfiguration);
      return new ReduceIterator<>(MapReduce.<MapReduce<K, V, OK, OV, ?>>createMapReduce(HadoopGraph.open(graphComputerConfiguration), graphComputerConfiguration), partitionIterator);
    });
    if (mapReduce.getReduceKeySort().isPresent()){
      reduceRDD = reduceRDD.sortByKey(mapReduce.getReduceKeySort().get(), true, 1);}
    return reduceRDD;
  }
}

How to use sortByKeymethodin org.apache.spark.api.java.JavaPairRDD

Best Java code snippets using org.apache.spark.api.java.JavaPairRDD.sortByKey (Showing top 20 results out of 315)

How to use
sortByKey
method
in
org.apache.spark.api.java.JavaPairRDD