JavaRDD<String> dataSet = jsc.textFile(JobUtils.getSourceDirFromDate(cmdLineArgs.input_path_pattern, cmdLineArgs.input_date_string)).repartition(4);
@Test public void repartition() { // Shrinking number of partitions JavaRDD<Integer> in1 = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8), 2); JavaRDD<Integer> repartitioned1 = in1.repartition(4); List<List<Integer>> result1 = repartitioned1.glom().collect(); assertEquals(4, result1.size()); for (List<Integer> l : result1) { assertFalse(l.isEmpty()); } // Growing number of partitions JavaRDD<Integer> in2 = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8), 4); JavaRDD<Integer> repartitioned2 = in2.repartition(2); List<List<Integer>> result2 = repartitioned2.glom().collect(); assertEquals(2, result2.size()); for (List<Integer> l: result2) { assertFalse(l.isEmpty()); } }
@Test public void repartition() { // Shrinking number of partitions JavaRDD<Integer> in1 = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8), 2); JavaRDD<Integer> repartitioned1 = in1.repartition(4); List<List<Integer>> result1 = repartitioned1.glom().collect(); assertEquals(4, result1.size()); for (List<Integer> l : result1) { assertFalse(l.isEmpty()); } // Growing number of partitions JavaRDD<Integer> in2 = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8), 4); JavaRDD<Integer> repartitioned2 = in2.repartition(2); List<List<Integer>> result2 = repartitioned2.glom().collect(); assertEquals(2, result2.size()); for (List<Integer> l: result2) { assertFalse(l.isEmpty()); } }
@Test public void repartition() { // Shrinking number of partitions JavaRDD<Integer> in1 = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8), 2); JavaRDD<Integer> repartitioned1 = in1.repartition(4); List<List<Integer>> result1 = repartitioned1.glom().collect(); assertEquals(4, result1.size()); for (List<Integer> l : result1) { assertFalse(l.isEmpty()); } // Growing number of partitions JavaRDD<Integer> in2 = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8), 4); JavaRDD<Integer> repartitioned2 = in2.repartition(2); List<List<Integer>> result2 = repartitioned2.glom().collect(); assertEquals(2, result2.size()); for (List<Integer> l: result2) { assertFalse(l.isEmpty()); } }
@Override public SparkStream<T> repartition(int numPartitions) { return new SparkStream<>(rdd.repartition(numPartitions)); }
public void prepareLocal(JavaRDD<Record> data, final String output, int workerSize) { JavaRDD<Record> reparted_data = data.repartition(workerSize);
public void prepareLocalByte(JavaRDD<ByteRecord> data, final String output, int workerSize) { JavaRDD<ByteRecord> reparted_data = data.repartition(workerSize);
break; case TEXT: toTaggedSentence(output).repartition(1).saveAsTextFile(outputFileName);
final int desiredDigit = (int) Math.floor(Math.log10(partNum) + 1); this.digitNum = desiredDigit > DEFAULT_DIGIT_NUM ? desiredDigit : DEFAULT_DIGIT_NUM; final JavaRDD<String> dataRepartitioned = convertedData.repartition(partNum); final JavaRDD<String> dataToWrite; if (this.conf.isColumnHeader()) {
@Override public Dataset<Row> read() throws Exception { JavaRDD<Long> baseRDD = Contexts.getSparkSession().range(tasks).javaRDD().repartition(tasks); JavaRDD<Row> fixRDD = baseRDD.flatMap(new GenerateFIXMessages(ordersPerTask)); StructType schema = DataTypes.createStructType(Lists.newArrayList(DataTypes.createStructField("fix", DataTypes.StringType, false))); Dataset<Row> fixDF = Contexts.getSparkSession().createDataFrame(fixRDD, schema); return fixDF; }
in.getHeadTupleValue(), in.getTailTupleData().getValue(), refMinMax._1(), refMinMax._2()); }).repartition(1); JavaRDD<Tuple5<Long, Type0, Type0, Type1, Type1>> rdd2TinyObjects = listObjectDataRDD2 .map(in -> { in.getHeadTupleValue(), in.getTailTupleData().getValue(), refMinMax._1(), refMinMax._2()); }).repartition(1);
in.getHeadTupleValue(), in.getTailTupleData().getValue(), refMinMax._1(), refMinMax._2()); }).repartition(1);