eu.stratosphere.api.java.record.operators.MapOperator$Builder.name java code examples

@Override
public Plan getPlan(String... args) {
  // parse job parameters
  int numSubTasks   = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
  String dataInput = (args.length > 1 ? args[1] : "");
  String output    = (args.length > 2 ? args[2] : "");
  HadoopDataSource<LongWritable, Text> source = new HadoopDataSource<LongWritable, Text>(
      new TextInputFormat(), new JobConf(), "Input Lines");
  TextInputFormat.addInputPath(source.getJobConf(), new Path(dataInput));
  MapOperator mapper = MapOperator.builder(new TokenizeLine())
      .input(source)
      .name("Tokenize Lines")
      .build();
  ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0)
      .input(mapper)
      .name("Count Words")
      .build();
  HadoopDataSink<Text, IntWritable> out = new HadoopDataSink<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(),new JobConf(), "Hadoop TextOutputFormat", reducer, Text.class, IntWritable.class);
  TextOutputFormat.setOutputPath(out.getJobConf(), new Path(output));
  Plan plan = new Plan(out, "Hadoop OutputFormat Example");
  plan.setDefaultParallelism(numSubTasks);
  return plan;
}

  .name("Tokenize Lines")
  .build();
ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0)

.input(edges).name("Project Edge").build();

MapOperator verticesWithId = MapOperator.builder(DuplicateLongMap.class).input(initialVertices).name("Assign Vertex Ids").build();

FileDataSource clustersSource = new FileDataSource(new CsvInputFormat('|', IntValue.class, DoubleValue.class, DoubleValue.class, DoubleValue.class), clusterInput, "Centers");
MapOperator dataPoints = MapOperator.builder(new PointBuilder()).name("Build data points").input(pointsSource).build();
MapOperator clusterPoints = MapOperator.builder(new PointBuilder()).name("Build cluster points").input(clustersSource).build();
  .setBroadcastVariable("centers", iter.getPartialSolution())
  .input(dataPoints)
  .name("Find Nearest Centers")
  .build();

FileDataSource clustersSource = new FileDataSource(new CsvInputFormat('|', IntValue.class, DoubleValue.class, DoubleValue.class, DoubleValue.class), clusterInput, "Centers");
MapOperator dataPoints = MapOperator.builder(new PointBuilder()).name("Build data points").input(pointsSource).build();
MapOperator clusterPoints = MapOperator.builder(new PointBuilder()).name("Build cluster points").input(clustersSource).build();
  .setBroadcastVariable("centers", clusterPoints)
  .input(dataPoints)
  .name("Find Nearest Centers")
  .build();

  .name("LineItem Filter")
  .build();
lineItemFilter.setDegreeOfParallelism(this.degreeOfParallelism);

.input(edges).name("Project Edge").build();
.input(countJoiner).name("Select lower-degree Edge").build();
.input(countJoiner).name("Project out Counts").build();

.name("Compute termination criterion (Map)")
.build();

.name("Compute termination criterion (Map)")
.build();

@Override
public Plan getPlan(String... args) {
  // parse job parameters
  int numSubTasks   = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
  String dataInput = (args.length > 1 ? args[1] : "");
  String output    = (args.length > 2 ? args[2] : "");
  FileDataSource source = new FileDataSource(new TextInputFormat(), dataInput, "Input Lines");
  MapOperator mapper = MapOperator.builder(new TokenizeLine())
    .input(source)
    .name("Tokenize Lines")
    .build();
  ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0)
    .input(mapper)
    .name("Count Words")
    .build();
  
  @SuppressWarnings("unchecked")
  FileDataSink out = new FileDataSink(new CsvOutputFormat("\n", " ", StringValue.class, IntValue.class), output, reducer, "Word Counts");
  
  Plan plan = new Plan(out, "WordCount Example");
  plan.setDefaultParallelism(numSubTasks);
  return plan;
}

static Plan getTestPlanPlan(int numSubTasks, String input, String output) {
  
  FileDataSource source = new FileDataSource(new TextInputFormat(), input, "Input Lines");
  source.setParameter(TextInputFormat.CHARSET_NAME, "ASCII");
  MapOperator mapper = MapOperator.builder(new TokenizeLine())
    .input(source)
    .name("Tokenize Lines")
    .build();
  ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0)
    .input(mapper)
    .name("Count Words")
    .build();
  @SuppressWarnings("unchecked")
  FileDataSink out = new FileDataSink(new CsvOutputFormat("\n"," ", StringValue.class, IntValue.class), output, reducer, "Word Counts");
  Plan plan = new Plan(out, "WordCount Example");
  plan.setDefaultParallelism(numSubTasks);
  
  return plan;
}

public Plan getPlan(int numSubTasks, String dataInput, String output) {
  // input is {word, count} pair
  FileDataSource source = new FileDataSource(new TextInputFormat(), dataInput, "Input Lines");
  //do a selection using cached file
  MapOperator mapper = MapOperator.builder(new TokenizeLine())
    .input(source)
    .name("Tokenize Lines")
    .build();
  FileDataSink out = new FileDataSink(new CsvOutputFormat(), output, mapper, "Selection");
  CsvOutputFormat.configureRecordFormat(out)
    .recordDelimiter('\n')
    .fieldDelimiter(' ')
    .field(StringValue.class, 0)
    .field(IntValue.class, 1);
  Plan plan = new Plan(out, "Distributed Cache");
  plan.setDefaultParallelism(numSubTasks);
  return plan;
}

@Override
public Plan getPlan(String... args) {
  int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
  String dataInput = (args.length > 1 ? args[1] : "");
  String output = (args.length > 2 ? args[2] : "");
  FileDataSource source = new FileDataSource(new TextInputFormat(), dataInput, "Input Lines");
  MapOperator mapper = MapOperator.builder(new TokenizeLine()).input(source).name("Tokenize Lines").build();
  
  ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0).input(mapper)
      .name("Count Words").build();
  
  FileDataSink out = new FileDataSink(new CsvOutputFormat(), output, reducer, "Word Counts");
  
  CsvOutputFormat.configureRecordFormat(out).recordDelimiter('\n')
      .fieldDelimiter(' ').field(StringValue.class, 0)
      .field(IntValue.class, 1);
  Plan plan = new Plan(out, "WordCount Example");
  plan.setDefaultParallelism(numSubTasks);
  return plan;
}

private static Plan getPlan(int numSubTasks, String input, String output) {
  FileDataSource initialInput = new FileDataSource(new PointInFormat(), input, "Input");
  initialInput.setDegreeOfParallelism(1);
  
  BulkIteration iteration = new BulkIteration("Loop");
  iteration.setInput(initialInput);
  iteration.setMaximumNumberOfIterations(2);
  @SuppressWarnings("unchecked")
  MapOperator map2 = MapOperator.builder(new IdentityMapper()).input(iteration.getPartialSolution(), iteration.getPartialSolution()).name("map").build();
  
  iteration.setNextPartialSolution(map2);
  FileDataSink finalResult = new FileDataSink(new PointOutFormat(), output, iteration, "Output");
  Plan plan = new Plan(finalResult, "Iteration with union test");
  plan.setDefaultParallelism(numSubTasks);
  return plan;
}

public void testSlowMapCancelling() throws Exception {
  GenericDataSource<InfiniteIntegerInputFormat> source = new GenericDataSource<InfiniteIntegerInputFormat>(
                                  new InfiniteIntegerInputFormat(), "Source");
  MapOperator mapper = MapOperator.builder(DelayingIdentityMapper.class)
    .input(source)
    .name("Delay Mapper")
    .build();
  GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), mapper, "Sink");
  
  
  Plan p = new Plan(sink);
  p.setDefaultParallelism(4);
  
  runAndCancelJob(p, 5 * 1000, 10 * 1000);
}

public static void main(String[] args) throws Exception {
  
  GenericDataSource<UserGeneratingInputFormat> source = new GenericDataSource<UserGeneratingInputFormat>(UserGeneratingInputFormat.class);
  
  MapOperator mapper = MapOperator.builder(new NumberExtractingMapper())
      .input(source).name("le mapper").build();
  
  ReduceOperator reducer = ReduceOperator.builder(new ConcatenatingReducer(), IntValue.class, 1)
      .input(mapper).name("le reducer").build();
  
  GenericDataSink sink = new GenericDataSink(PrintingOutputFormat.class, reducer);
  
  Plan p = new Plan(sink);
  p.setDefaultParallelism(4);
  
  LocalExecutor.execute(p);
}

public void testMapCancelling() throws Exception {
  GenericDataSource<InfiniteIntegerInputFormat> source = new GenericDataSource<InfiniteIntegerInputFormat>(
                                  new InfiniteIntegerInputFormat(), "Source");
  MapOperator mapper = MapOperator.builder(IdentityMapper.class)
    .input(source)
    .name("Identity Mapper")
    .build();
  GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), mapper, "Sink");
  
  
  Plan p = new Plan(sink);
  p.setDefaultParallelism(4);
  
  runAndCancelJob(p, 5 * 1000, 10 * 1000);
}

public void testMapWithLongCancellingResponse() throws Exception {
  GenericDataSource<InfiniteIntegerInputFormat> source = new GenericDataSource<InfiniteIntegerInputFormat>(
                                  new InfiniteIntegerInputFormat(), "Source");
  MapOperator mapper = MapOperator.builder(LongCancelTimeIdentityMapper.class)
    .input(source)
    .name("Long Cancelling Time Mapper")
    .build();
  GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), mapper, "Sink");
  
  
  Plan p = new Plan(sink);
  p.setDefaultParallelism(4);
  
  runAndCancelJob(p, 10 * 1000, 10 * 1000);
}

public void testMapPriorToFirstRecordReading() throws Exception {
  GenericDataSource<InfiniteIntegerInputFormat> source = new GenericDataSource<InfiniteIntegerInputFormat>(
                                  new InfiniteIntegerInputFormat(), "Source");
  MapOperator mapper = MapOperator.builder(StuckInOpenIdentityMapper.class)
    .input(source)
    .name("Stuck-In-Open Mapper")
    .build();
  GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), mapper, "Sink");
  
  
  Plan p = new Plan(sink);
  p.setDefaultParallelism(4);
  
  runAndCancelJob(p, 10 * 1000, 10 * 1000);
}

Javadoc

Sets the name of this operator.

Popular methods of MapOperator$Builder

build
Creates and returns a MapOperator from using the values given to the builder.
input
<init>
setBroadcastVariable
Binds the result produced by a plan rooted at root to a variable used by the UDF wrapped in this ope

Popular in Java

Start an intent from android
onRequestPermissionsResult (Fragment)
onCreateOptionsMenu (Activity)
getSystemService (Context)
UnknownHostException (java.net)
Thrown when a hostname can not be resolved.
Date (java.util)
A specific moment in time, with millisecond precision. Values typically come from System#currentTime
Hashtable (java.util)
A plug-in replacement for JDK1.5 java.util.Hashtable. This version is based on org.cliffc.high_scale
Iterator (java.util)
An iterator over a sequence of objects, such as a collection.If a collection has been changed since
JTextField (javax.swing)
Loader (org.hibernate.loader)
Abstract superclass of object loading (and querying) strategies. This class implements useful common
Top 12 Jupyter Notebook extensions

How to use namemethodin eu.stratosphere.api.java.record.operators.MapOperator$Builder

Best Java code snippets using eu.stratosphere.api.java.record.operators.MapOperator$Builder.name (Showing top 20 results out of 315)

How to use
name
method
in
eu.stratosphere.api.java.record.operators.MapOperator$Builder