org.apache.flink.api.java.operators.DataSink.setParallelism java code examples

private void createTextSink(PythonOperationInfo info) {
  DataSet<byte[]> parent = sets.getDataSet(info.parentID);
  parent.map(new StringDeserializerMap()).setParallelism(info.parallelism)
    .writeAsText(info.path, info.writeMode).setParallelism(info.parallelism).name("TextSink");
}

private void createPrintSink(PythonOperationInfo info) {
  DataSet<byte[]> parent = sets.getDataSet(info.parentID);
  parent.map(new StringDeserializerMap()).setParallelism(info.parallelism).name("PrintSinkPreStep")
    .output(new PrintingOutputFormat<String>(info.toError)).setParallelism(info.parallelism);
}

private void createCsvSink(PythonOperationInfo info) {
  DataSet<byte[]> parent = sets.getDataSet(info.parentID);
  parent.map(new StringTupleDeserializerMap()).setParallelism(info.parallelism).name("CsvSinkPreStep")
      .writeAsCsv(info.path, info.lineDelimiter, info.fieldDelimiter, info.writeMode).setParallelism(info.parallelism).name("CsvSink");
}

@Test
public void testSortingParallelism4() throws Exception {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Long> ds = env.generateSequence(0, 1000);
  // randomize
  ds.map(new MapFunction<Long, Long>() {
    Random rand = new Random(1234L);
    @Override
    public Long map(Long value) throws Exception {
      return rand.nextLong();
    }
  }).writeAsText(resultPath)
    .sortLocalOutput("*", Order.ASCENDING)
    .setParallelism(4);
  env.execute();
  BufferedReader[] resReaders = getResultReader(resultPath);
  for (BufferedReader br : resReaders) {
    long cmp = Long.MIN_VALUE;
    while (br.ready()) {
      long cur = Long.parseLong(br.readLine());
      assertTrue("Invalid order of sorted output", cmp <= cur);
      cmp = cur;
    }
    br.close();
  }
}

@Test
public void testIntSortingParallelism1() throws Exception {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Integer> ds = CollectionDataSets.getIntegerDataSet(env);
  ds.writeAsText(resultPath).sortLocalOutput("*", Order.DESCENDING).setParallelism(1);
  env.execute();
  String expected = "5\n5\n5\n5\n5\n4\n4\n4\n4\n3\n3\n3\n2\n2\n1\n";
  compareResultsByLinesInMemoryWithStrictOrder(expected, resultPath);
}

@Test
public void testStringSortingParallelism1() throws Exception {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<String> ds = CollectionDataSets.getStringDataSet(env);
  ds.writeAsText(resultPath).sortLocalOutput("*", Order.ASCENDING).setParallelism(1);
  env.execute();
  String expected = "Hello\n" +
      "Hello world\n" +
      "Hello world, how are you?\n" +
      "Hi\n" +
      "I am fine.\n" +
      "LOL\n" +
      "Luke Skywalker\n" +
      "Random comment\n";
  compareResultsByLinesInMemoryWithStrictOrder(expected, resultPath);
}

@Test
public void testPojoSortingSingleParallelism1() throws Exception {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<CollectionDataSets.POJO> ds = CollectionDataSets.getMixedPojoDataSet(env);
  ds.writeAsText(resultPath).sortLocalOutput("number", Order.ASCENDING).setParallelism(1);
  env.execute();
  String expected = "1 First (10,100,1000,One) 10100\n" +
      "2 First_ (10,105,1000,One) 10200\n" +
      "3 First (11,102,3000,One) 10200\n" +
      "4 First_ (11,106,1000,One) 10300\n" +
      "5 First (11,102,2000,One) 10100\n" +
      "6 Second_ (20,200,2000,Two) 10100\n" +
      "7 Third (31,301,2000,Three) 10200\n" +
      "8 Third_ (30,300,1000,Three) 10100\n";
  compareResultsByLinesInMemoryWithStrictOrder(expected, resultPath);
}

public static void main(String[] args) throws Exception {
  if (args.length < 2) {
    System.err.println("Usage: WordCount <input path> <result path>");
    return;
  }
  final String inputPath = args[0];
  final String outputPath = args[1];
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  // Set up the Hadoop Input Format
  HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, new JobConf());
  TextInputFormat.addInputPath(hadoopInputFormat.getJobConf(), new Path(inputPath));
  // Create a Flink job with it
  DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);
  DataSet<Tuple2<Text, LongWritable>> words =
      text.flatMap(new HadoopMapFunction<LongWritable, Text, Text, LongWritable>(new Tokenizer()))
        .groupBy(0).reduceGroup(new HadoopReduceCombineFunction<Text, LongWritable, Text, LongWritable>(new Counter(), new Counter()));
  // Set up Hadoop Output Format
  HadoopOutputFormat<Text, LongWritable> hadoopOutputFormat =
      new HadoopOutputFormat<Text, LongWritable>(new TextOutputFormat<Text, LongWritable>(), new JobConf());
  hadoopOutputFormat.getJobConf().set("mapred.textoutputformat.separator", " ");
  TextOutputFormat.setOutputPath(hadoopOutputFormat.getJobConf(), new Path(outputPath));
  // Output & Execute
  words.output(hadoopOutputFormat).setParallelism(1);
  env.execute("Hadoop Compat WordCount");
}

@Test
public void testTupleSortingNestedParallelism1_2() throws Exception {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple3<Tuple2<Integer, Integer>, String, Integer>> ds =
      CollectionDataSets.getGroupSortedNestedTupleDataSet2(env);
  ds.writeAsText(resultPath)
    .sortLocalOutput(1, Order.ASCENDING)
    .sortLocalOutput(2, Order.DESCENDING)
    .setParallelism(1);
  env.execute();
  String expected =
      "((2,1),a,3)\n" +
      "((1,3),a,2)\n" +
      "((1,2),a,1)\n" +
      "((2,2),b,4)\n" +
      "((4,9),c,7)\n" +
      "((3,6),c,6)\n" +
      "((3,3),c,5)\n";
  compareResultsByLinesInMemoryWithStrictOrder(expected, resultPath);
}

@Test
public void testPojoSortingDualParallelism1() throws Exception {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<CollectionDataSets.POJO> ds = CollectionDataSets.getMixedPojoDataSet(env);
  ds.writeAsText(resultPath)
    .sortLocalOutput("str", Order.ASCENDING)
    .sortLocalOutput("number", Order.DESCENDING)
    .setParallelism(1);
  env.execute();
  String expected =
      "5 First (11,102,2000,One) 10100\n" +
      "3 First (11,102,3000,One) 10200\n" +
      "1 First (10,100,1000,One) 10100\n" +
      "4 First_ (11,106,1000,One) 10300\n" +
      "2 First_ (10,105,1000,One) 10200\n" +
      "6 Second_ (20,200,2000,Two) 10100\n" +
      "7 Third (31,301,2000,Three) 10200\n" +
      "8 Third_ (30,300,1000,Three) 10100\n";
  compareResultsByLinesInMemoryWithStrictOrder(expected, resultPath);
}

@Test
public void testTupleSortingNestedParallelism1() throws Exception {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple3<Tuple2<Integer, Integer>, String, Integer>> ds =
      CollectionDataSets.getGroupSortedNestedTupleDataSet2(env);
  ds.writeAsText(resultPath)
    .sortLocalOutput("f0.f1", Order.ASCENDING)
    .sortLocalOutput("f1", Order.DESCENDING)
    .setParallelism(1);
  env.execute();
  String expected =
      "((2,1),a,3)\n" +
      "((2,2),b,4)\n" +
      "((1,2),a,1)\n" +
      "((3,3),c,5)\n" +
      "((1,3),a,2)\n" +
      "((3,6),c,6)\n" +
      "((4,9),c,7)\n";
  compareResultsByLinesInMemoryWithStrictOrder(expected, resultPath);
}

@Test
public void testTupleSortingSingleAscParallelism1() throws Exception {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
  ds.writeAsCsv(resultPath).sortLocalOutput(0, Order.ASCENDING).setParallelism(1);
  env.execute();
  String expected = "1,1,Hi\n" +
      "2,2,Hello\n" +
      "3,2,Hello world\n" +
      "4,3,Hello world, how are you?\n" +
      "5,3,I am fine.\n" +
      "6,3,Luke Skywalker\n" +
      "7,4,Comment#1\n" +
      "8,4,Comment#2\n" +
      "9,4,Comment#3\n" +
      "10,4,Comment#4\n" +
      "11,5,Comment#5\n" +
      "12,5,Comment#6\n" +
      "13,5,Comment#7\n" +
      "14,5,Comment#8\n" +
      "15,5,Comment#9\n" +
      "16,6,Comment#10\n" +
      "17,6,Comment#11\n" +
      "18,6,Comment#12\n" +
      "19,6,Comment#13\n" +
      "20,6,Comment#14\n" +
      "21,6,Comment#15\n";
  compareResultsByLinesInMemoryWithStrictOrder(expected, resultPath);
}

@Test
public void testTupleSortingSingleDescParallelism1() throws Exception {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
  ds.writeAsCsv(resultPath).sortLocalOutput(0, Order.DESCENDING).setParallelism(1);
  env.execute();
  String expected = "21,6,Comment#15\n" +
      "20,6,Comment#14\n" +
      "19,6,Comment#13\n" +
      "18,6,Comment#12\n" +
      "17,6,Comment#11\n" +
      "16,6,Comment#10\n" +
      "15,5,Comment#9\n" +
      "14,5,Comment#8\n" +
      "13,5,Comment#7\n" +
      "12,5,Comment#6\n" +
      "11,5,Comment#5\n" +
      "10,4,Comment#4\n" +
      "9,4,Comment#3\n" +
      "8,4,Comment#2\n" +
      "7,4,Comment#1\n" +
      "6,3,Luke Skywalker\n" +
      "5,3,I am fine.\n" +
      "4,3,Hello world, how are you?\n" +
      "3,2,Hello world\n" +
      "2,2,Hello\n" +
      "1,1,Hi\n";
  compareResultsByLinesInMemoryWithStrictOrder(expected, resultPath);
}

@Test
public void testPojoSortingNestedParallelism1() throws Exception {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<CollectionDataSets.POJO> ds = CollectionDataSets.getMixedPojoDataSet(env);
  ds.writeAsText(resultPath)
    .sortLocalOutput("nestedTupleWithCustom.f0", Order.ASCENDING)
    .sortLocalOutput("nestedTupleWithCustom.f1.myInt", Order.DESCENDING)
    .sortLocalOutput("nestedPojo.longNumber", Order.ASCENDING)
    .setParallelism(1);
  env.execute();
  String expected =
      "2 First_ (10,105,1000,One) 10200\n" +
      "1 First (10,100,1000,One) 10100\n" +
      "4 First_ (11,106,1000,One) 10300\n" +
      "5 First (11,102,2000,One) 10100\n" +
      "3 First (11,102,3000,One) 10200\n" +
      "6 Second_ (20,200,2000,Two) 10100\n" +
      "8 Third_ (30,300,1000,Three) 10100\n" +
      "7 Third (31,301,2000,Three) 10200\n";
  compareResultsByLinesInMemoryWithStrictOrder(expected, resultPath);
}

ds.writeAsCsv(resultPath)
  .sortLocalOutput(1, Order.DESCENDING).sortLocalOutput(0, Order.ASCENDING)
  .setParallelism(1);

@Override
public EdgeMetrics<K, VV, EV> run(Graph<K, VV, EV> input)
    throws Exception {
  super.run(input);
  // s, t, (d(s), d(t))
  DataSet<Edge<K, Tuple3<EV, LongValue, LongValue>>> edgeDegreePair = input
    .run(new EdgeDegreePair<K, VV, EV>()
      .setReduceOnTargetId(reduceOnTargetId)
      .setParallelism(parallelism));
  // s, d(s), count of (u, v) where deg(u) < deg(v) or (deg(u) == deg(v) and u < v)
  DataSet<Tuple3<K, LongValue, LongValue>> edgeStats = edgeDegreePair
    .map(new EdgeStats<>())
      .setParallelism(parallelism)
      .name("Edge stats")
    .groupBy(0)
    .reduce(new SumEdgeStats<>())
    .setCombineHint(CombineHint.HASH)
      .setParallelism(parallelism)
      .name("Sum edge stats");
  edgeMetricsHelper = new EdgeMetricsHelper<>();
  edgeStats
    .output(edgeMetricsHelper)
      .setParallelism(parallelism)
      .name("Edge metrics");
  return this;
}

@Override
public EdgeMetrics<K, VV, EV> run(Graph<K, VV, EV> input)
    throws Exception {
  super.run(input);
  // s, t, (d(s), d(t))
  DataSet<Edge<K, Tuple3<EV, Degrees, Degrees>>> edgeDegreesPair = input
    .run(new EdgeDegreesPair<K, VV, EV>()
      .setParallelism(parallelism));
  // s, d(s), count of (u, v) where deg(u) < deg(v) or (deg(u) == deg(v) and u < v)
  DataSet<Tuple3<K, Degrees, LongValue>> edgeStats = edgeDegreesPair
    .flatMap(new EdgeStats<>())
      .setParallelism(parallelism)
      .name("Edge stats")
    .groupBy(0, 1)
    .reduceGroup(new ReduceEdgeStats<>())
      .setParallelism(parallelism)
      .name("Reduce edge stats")
    .groupBy(0)
    .reduce(new SumEdgeStats<>())
    .setCombineHint(CombineHint.HASH)
      .setParallelism(parallelism)
      .name("Sum edge stats");
  edgeMetricsHelper = new EdgeMetricsHelper<>();
  edgeStats
    .output(edgeMetricsHelper)
      .setParallelism(parallelism)
      .name("Edge metrics");
  return this;
}

.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
.withForwardedFields("*").setParallelism(p * 2).name("Reduce2")
.output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink");

.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
.withForwardedFields("*").setParallelism(p * 2).name("Reduce2")
.output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink");

.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
  .withForwardedFields("*").setParallelism(p).name("Reduce2")
.output(new DiscardingOutputFormat<Long>()).setParallelism(p).name("Sink");

Javadoc

Sets the parallelism for this data sink. The degree must be 1 or more.

Popular methods of DataSink

name
<init>
getDataSet
getMinResources
Returns the minimum resources of this data sink. If no minimum resources have been set, this returns
getPreferredResources
Returns the preferred resources of this data sink. If no preferred resources have been set, this ret
sortLocalOutput
Sorts each local partition of a data set on the field(s) specified by the field expression in the sp
translateToDataFlow
withParameters
Pass a configuration to the OutputFormat.

Popular in Java

Reading from database using SQL prepared statement
notifyDataSetChanged (ArrayAdapter)
onCreateOptionsMenu (Activity)
setRequestProperty (URLConnection)
FileWriter (java.io)
A specialized Writer that writes to a file in the file system. All write requests made by calling me
PrintWriter (java.io)
Wraps either an existing OutputStream or an existing Writerand provides convenience methods for prin
Arrays (java.util)
This class contains various methods for manipulating arrays (such as sorting and searching). This cl
StringUtils (org.apache.commons.lang)
Operations on java.lang.String that arenull safe. * IsEmpty/IsBlank - checks if a String contains
SAXParseException (org.xml.sax)
Encapsulate an XML parse error or warning.> This module, both source code and documentation, is in t
Font (java.awt)
The Font class represents fonts, which are used to render text in a visible way. A font provides the
CodeWhisperer alternatives

How to use setParallelismmethodin org.apache.flink.api.java.operators.DataSink

Best Java code snippets using org.apache.flink.api.java.operators.DataSink.setParallelism (Showing top 20 results out of 315)

How to use
setParallelism
method
in
org.apache.flink.api.java.operators.DataSink