@Override public JavaPairRDD<HiveKey, BytesWritable> transform(JavaPairRDD<HiveKey, BytesWritable> input) { JavaPairRDD<HiveKey, BytesWritable> result = shuffler.shuffle(input, numOfPartitions); if (toCache) { sparkPlan.addCachedRDDId(result.id()); result = result.persist(StorageLevel.MEMORY_AND_DISK()); } return result; }
@Override public JavaPairRDD<WritableComparable, Writable> transform( JavaPairRDD<WritableComparable, Writable> input) { Preconditions.checkArgument(input == null, "AssertionError: MapInput doesn't take any input"); JavaPairRDD<WritableComparable, Writable> result; if (toCache) { result = hadoopRDD.mapToPair(new CopyFunction()); sparkPlan.addCachedRDDId(result.id()); result = result.persist(StorageLevel.MEMORY_AND_DISK()); } else { result = hadoopRDD; } result.setName(this.name); return result; }
@Override public JavaPairRDD<HiveKey, BytesWritable> transform(JavaPairRDD<HiveKey, BytesWritable> input) { JavaPairRDD<HiveKey, BytesWritable> result = shuffler.shuffle(input, numOfPartitions); if (toCache) { sparkPlan.addCachedRDDId(result.id()); result = result.persist(StorageLevel.MEMORY_AND_DISK()); } return result.setName(this.name + " (" + edge.getShuffleType() + ", " + numOfPartitions + (toCache ? ", cached)" : ")")); }
@Override public JavaPairRDD<WritableComparable, Writable> transform( JavaPairRDD<WritableComparable, Writable> input) { Preconditions.checkArgument(input == null, "AssertionError: MapInput doesn't take any input"); JavaPairRDD<WritableComparable, Writable> result; if (toCache) { result = hadoopRDD.mapToPair(new CopyFunction()); sparkPlan.addCachedRDDId(result.id()); result = result.persist(StorageLevel.MEMORY_AND_DISK()); } else { result = hadoopRDD; } return result; }
@Override public JavaPairRDD<HiveKey, BytesWritable> shuffle( JavaPairRDD<HiveKey, BytesWritable> input, int numPartitions) { JavaPairRDD<HiveKey, BytesWritable> rdd; if (totalOrder) { if (numPartitions > 0) { if (numPartitions > 1 && input.getStorageLevel() == StorageLevel.NONE()) { input.persist(StorageLevel.DISK_ONLY()); sparkPlan.addCachedRDDId(input.id()); } rdd = input.sortByKey(true, numPartitions); } else { rdd = input.sortByKey(true); } } else { Partitioner partitioner = new HashPartitioner(numPartitions); rdd = input.repartitionAndSortWithinPartitions(partitioner); } return rdd; }
@Override public JavaPairRDD<HiveKey, BytesWritable> shuffle( JavaPairRDD<HiveKey, BytesWritable> input, int numPartitions) { JavaPairRDD<HiveKey, BytesWritable> rdd; if (totalOrder) { if (numPartitions > 0) { if (numPartitions > 1 && input.getStorageLevel() == StorageLevel.NONE()) { input.persist(StorageLevel.DISK_ONLY()); sparkPlan.addCachedRDDId(input.id()); } rdd = input.sortByKey(true, numPartitions); } else { rdd = input.sortByKey(true); } } else { Partitioner partitioner = new HashPartitioner(numPartitions); rdd = input.repartitionAndSortWithinPartitions(partitioner); } if (shuffleSerializer != null) { if (rdd.rdd() instanceof ShuffledRDD) { ((ShuffledRDD) rdd.rdd()).setSerializer(shuffleSerializer); } } return rdd; }
@Override public JavaPairRDD<HiveKey, Iterable<BytesWritable>> transform(JavaPairRDD<HiveKey, BytesWritable> input) { JavaPairRDD<HiveKey, Iterable<BytesWritable>> result = shuffler.shuffle(input, numOfPartitions); if (toCache) { sparkPlan.addCachedRDDId(result.id()); result = result.persist(StorageLevel.MEMORY_AND_DISK()); } return result; } }
@Override public JavaPairRDD<WritableComparable, Writable> transform( JavaPairRDD<WritableComparable, Writable> input) { Preconditions.checkArgument(input == null, "AssertionError: MapInput doesn't take any input"); JavaPairRDD<WritableComparable, Writable> result; if (toCache) { result = hadoopRDD.mapToPair(new CopyFunction()); sparkPlan.addCachedRDDId(result.id()); result = result.persist(StorageLevel.MEMORY_AND_DISK()); } else { result = hadoopRDD; } return result; }