eu.stratosphere.api.java.record.operators.MapOperator$Builder.input java code examples

@Override
public Plan getPlan(String... args) {
  // parse job parameters
  int numSubTasks   = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
  String dataInput = (args.length > 1 ? args[1] : "");
  String output    = (args.length > 2 ? args[2] : "");
  HadoopDataSource<LongWritable, Text> source = new HadoopDataSource<LongWritable, Text>(
      new TextInputFormat(), new JobConf(), "Input Lines");
  TextInputFormat.addInputPath(source.getJobConf(), new Path(dataInput));
  MapOperator mapper = MapOperator.builder(new TokenizeLine())
      .input(source)
      .name("Tokenize Lines")
      .build();
  ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0)
      .input(mapper)
      .name("Count Words")
      .build();
  HadoopDataSink<Text, IntWritable> out = new HadoopDataSink<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(),new JobConf(), "Hadoop TextOutputFormat", reducer, Text.class, IntWritable.class);
  TextOutputFormat.setOutputPath(out.getJobConf(), new Path(output));
  Plan plan = new Plan(out, "Hadoop OutputFormat Example");
  plan.setDefaultParallelism(numSubTasks);
  return plan;
}

.input(source)
.name("Tokenize Lines")
.build();

.input(edges).name("Project Edge").build();

FileDataSource clustersSource = new FileDataSource(new CsvInputFormat('|', IntValue.class, DoubleValue.class, DoubleValue.class, DoubleValue.class), clusterInput, "Centers");
MapOperator dataPoints = MapOperator.builder(new PointBuilder()).name("Build data points").input(pointsSource).build();
MapOperator clusterPoints = MapOperator.builder(new PointBuilder()).name("Build cluster points").input(clustersSource).build();
  .input(dataPoints)
  .name("Find Nearest Centers")
  .build();

FileDataSource clustersSource = new FileDataSource(new CsvInputFormat('|', IntValue.class, DoubleValue.class, DoubleValue.class, DoubleValue.class), clusterInput, "Centers");
MapOperator dataPoints = MapOperator.builder(new PointBuilder()).name("Build data points").input(pointsSource).build();
MapOperator clusterPoints = MapOperator.builder(new PointBuilder()).name("Build cluster points").input(clustersSource).build();
  .input(dataPoints)
  .name("Find Nearest Centers")
  .build();

.input(edges).name("Project Edge").build();
.input(countJoiner).name("Select lower-degree Edge").build();
.input(countJoiner).name("Project out Counts").build();

.input(sumReduce)
.name("Compute termination criterion (Map)")
.build();

.input(iteration.getPartialSolution())
.name("Compute termination criterion (Map)")
.build();

@Override
protected Plan getTestJob() {
  // Sc1 generates M parameters a,b,c for second degree polynomials P(x) = ax^2 + bx + c identified by id
  FileDataSource sc1 = new FileDataSource(new CsvInputFormat(), sc1Path);
  CsvInputFormat.configureRecordFormat(sc1).fieldDelimiter(' ').field(StringValue.class, 0).field(IntValue.class, 1)
      .field(IntValue.class, 2).field(IntValue.class, 3);
  // Sc2 generates N x values to be evaluated with the polynomial identified by id
  FileDataSource sc2 = new FileDataSource(new CsvInputFormat(), sc2Path);
  CsvInputFormat.configureRecordFormat(sc2).fieldDelimiter(' ').field(StringValue.class, 0).field(IntValue.class, 1);
  // Sc3 generates N y values to be evaluated with the polynomial identified by id
  FileDataSource sc3 = new FileDataSource(new CsvInputFormat(), sc3Path);
  CsvInputFormat.configureRecordFormat(sc3).fieldDelimiter(' ').field(StringValue.class, 0).field(IntValue.class, 1);
  // Jn1 matches x and y values on id and emits (id, x, y) triples
  JoinOperator jn1 = JoinOperator.builder(Jn1.class, StringValue.class, 0, 0).input1(sc2).input2(sc3).build();
  // Jn2 matches polynomial and arguments by id, computes p = min(P(x),P(y)) and emits (id, p) tuples
  JoinOperator jn2 = JoinOperator.builder(Jn2.class, StringValue.class, 0, 0).input1(jn1).input2(sc1).build();
  // Mp1 selects (id, x, y) triples where x = y and broadcasts z (=x=y) to Mp2
  MapOperator mp1 = MapOperator.builder(Mp1.class).input(jn1).build();
  // Mp2 filters out all p values which can be divided by z
  MapOperator mp2 = MapOperator.builder(Mp2.class).setBroadcastVariable("z", mp1).input(jn2).build();
  FileDataSink output = new FileDataSink(new ContractITCaseOutputFormat(), resultPath);
  output.setDegreeOfParallelism(1);
  output.setInput(mp2);
  return new Plan(output);
}

@Override
public Plan getPlan(String... args) {
  // parse job parameters
  int numSubTasks   = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
  String dataInput = (args.length > 1 ? args[1] : "");
  String output    = (args.length > 2 ? args[2] : "");
  FileDataSource source = new FileDataSource(new TextInputFormat(), dataInput, "Input Lines");
  MapOperator mapper = MapOperator.builder(new TokenizeLine())
    .input(source)
    .name("Tokenize Lines")
    .build();
  ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0)
    .input(mapper)
    .name("Count Words")
    .build();
  
  @SuppressWarnings("unchecked")
  FileDataSink out = new FileDataSink(new CsvOutputFormat("\n", " ", StringValue.class, IntValue.class), output, reducer, "Word Counts");
  
  Plan plan = new Plan(out, "WordCount Example");
  plan.setDefaultParallelism(numSubTasks);
  return plan;
}

  static Plan getTestPlan(int numSubTasks, String input, String output) {

    FileDataSource initialInput = new FileDataSource(new PointInFormat(), input, "Input");
    initialInput.setDegreeOfParallelism(1);

    BulkIteration iteration = new BulkIteration("Loop");
    iteration.setInput(initialInput);
    iteration.setMaximumNumberOfIterations(2);

    ReduceOperator dummyReduce = ReduceOperator.builder(new DummyReducer(), IntValue.class, 0)
        .input(iteration.getPartialSolution())
        .name("Reduce something")
        .build();


    MapOperator dummyMap = MapOperator.builder(new IdentityMapper()).input(dummyReduce).build();
    iteration.setNextPartialSolution(dummyMap);

    FileDataSink finalResult = new FileDataSink(new PointOutFormat(), output, iteration, "Output");

    Plan plan = new Plan(finalResult, "Iteration with chained map test");
    plan.setDefaultParallelism(numSubTasks);
    return plan;
  }
}

public Plan getPlan(int numSubTasks, String dataInput, String output) {
  // input is {word, count} pair
  FileDataSource source = new FileDataSource(new TextInputFormat(), dataInput, "Input Lines");
  //do a selection using cached file
  MapOperator mapper = MapOperator.builder(new TokenizeLine())
    .input(source)
    .name("Tokenize Lines")
    .build();
  FileDataSink out = new FileDataSink(new CsvOutputFormat(), output, mapper, "Selection");
  CsvOutputFormat.configureRecordFormat(out)
    .recordDelimiter('\n')
    .fieldDelimiter(' ')
    .field(StringValue.class, 0)
    .field(IntValue.class, 1);
  Plan plan = new Plan(out, "Distributed Cache");
  plan.setDefaultParallelism(numSubTasks);
  return plan;
}

static Plan getTestPlanPlan(int numSubTasks, String input, String output) {
  
  FileDataSource source = new FileDataSource(new TextInputFormat(), input, "Input Lines");
  source.setParameter(TextInputFormat.CHARSET_NAME, "ASCII");
  MapOperator mapper = MapOperator.builder(new TokenizeLine())
    .input(source)
    .name("Tokenize Lines")
    .build();
  ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0)
    .input(mapper)
    .name("Count Words")
    .build();
  @SuppressWarnings("unchecked")
  FileDataSink out = new FileDataSink(new CsvOutputFormat("\n"," ", StringValue.class, IntValue.class), output, reducer, "Word Counts");
  Plan plan = new Plan(out, "WordCount Example");
  plan.setDefaultParallelism(numSubTasks);
  
  return plan;
}

@Override
public Plan getPlan(String... args) {
  int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
  String dataInput = (args.length > 1 ? args[1] : "");
  String output = (args.length > 2 ? args[2] : "");
  FileDataSource source = new FileDataSource(new TextInputFormat(), dataInput, "Input Lines");
  MapOperator mapper = MapOperator.builder(new TokenizeLine()).input(source).name("Tokenize Lines").build();
  
  ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0).input(mapper)
      .name("Count Words").build();
  
  FileDataSink out = new FileDataSink(new CsvOutputFormat(), output, reducer, "Word Counts");
  
  CsvOutputFormat.configureRecordFormat(out).recordDelimiter('\n')
      .fieldDelimiter(' ').field(StringValue.class, 0)
      .field(IntValue.class, 1);
  Plan plan = new Plan(out, "WordCount Example");
  plan.setDefaultParallelism(numSubTasks);
  return plan;
}

private static Plan getPlan(int numSubTasks, String input, String output) {
  FileDataSource initialInput = new FileDataSource(new PointInFormat(), input, "Input");
  initialInput.setDegreeOfParallelism(1);
  
  BulkIteration iteration = new BulkIteration("Loop");
  iteration.setInput(initialInput);
  iteration.setMaximumNumberOfIterations(2);
  @SuppressWarnings("unchecked")
  MapOperator map2 = MapOperator.builder(new IdentityMapper()).input(iteration.getPartialSolution(), iteration.getPartialSolution()).name("map").build();
  
  iteration.setNextPartialSolution(map2);
  FileDataSink finalResult = new FileDataSink(new PointOutFormat(), output, iteration, "Output");
  Plan plan = new Plan(finalResult, "Iteration with union test");
  plan.setDefaultParallelism(numSubTasks);
  return plan;
}

public void testSlowMapCancelling() throws Exception {
  GenericDataSource<InfiniteIntegerInputFormat> source = new GenericDataSource<InfiniteIntegerInputFormat>(
                                  new InfiniteIntegerInputFormat(), "Source");
  MapOperator mapper = MapOperator.builder(DelayingIdentityMapper.class)
    .input(source)
    .name("Delay Mapper")
    .build();
  GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), mapper, "Sink");
  
  
  Plan p = new Plan(sink);
  p.setDefaultParallelism(4);
  
  runAndCancelJob(p, 5 * 1000, 10 * 1000);
}

public void testMapWithLongCancellingResponse() throws Exception {
  GenericDataSource<InfiniteIntegerInputFormat> source = new GenericDataSource<InfiniteIntegerInputFormat>(
                                  new InfiniteIntegerInputFormat(), "Source");
  MapOperator mapper = MapOperator.builder(LongCancelTimeIdentityMapper.class)
    .input(source)
    .name("Long Cancelling Time Mapper")
    .build();
  GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), mapper, "Sink");
  
  
  Plan p = new Plan(sink);
  p.setDefaultParallelism(4);
  
  runAndCancelJob(p, 10 * 1000, 10 * 1000);
}

public static void main(String[] args) throws Exception {
  
  GenericDataSource<UserGeneratingInputFormat> source = new GenericDataSource<UserGeneratingInputFormat>(UserGeneratingInputFormat.class);
  
  MapOperator mapper = MapOperator.builder(new NumberExtractingMapper())
      .input(source).name("le mapper").build();
  
  ReduceOperator reducer = ReduceOperator.builder(new ConcatenatingReducer(), IntValue.class, 1)
      .input(mapper).name("le reducer").build();
  
  GenericDataSink sink = new GenericDataSink(PrintingOutputFormat.class, reducer);
  
  Plan p = new Plan(sink);
  p.setDefaultParallelism(4);
  
  LocalExecutor.execute(p);
}

public void testMapCancelling() throws Exception {
  GenericDataSource<InfiniteIntegerInputFormat> source = new GenericDataSource<InfiniteIntegerInputFormat>(
                                  new InfiniteIntegerInputFormat(), "Source");
  MapOperator mapper = MapOperator.builder(IdentityMapper.class)
    .input(source)
    .name("Identity Mapper")
    .build();
  GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), mapper, "Sink");
  
  
  Plan p = new Plan(sink);
  p.setDefaultParallelism(4);
  
  runAndCancelJob(p, 5 * 1000, 10 * 1000);
}

public void testMapPriorToFirstRecordReading() throws Exception {
  GenericDataSource<InfiniteIntegerInputFormat> source = new GenericDataSource<InfiniteIntegerInputFormat>(
                                  new InfiniteIntegerInputFormat(), "Source");
  MapOperator mapper = MapOperator.builder(StuckInOpenIdentityMapper.class)
    .input(source)
    .name("Stuck-In-Open Mapper")
    .build();
  GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), mapper, "Sink");
  
  
  Plan p = new Plan(sink);
  p.setDefaultParallelism(4);
  
  runAndCancelJob(p, 10 * 1000, 10 * 1000);
}

Javadoc

Sets the input.

Popular methods of MapOperator$Builder

build
Creates and returns a MapOperator from using the values given to the builder.
name
<init>
setBroadcastVariable
Binds the result produced by a plan rooted at root to a variable used by the UDF wrapped in this ope

Popular in Java

Parsing JSON documents to java classes using gson
getApplicationContext (Context)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
Pointer (com.sun.jna)
An abstraction for a native pointer data type. A Pointer instance represents, on the Java side, a na
RandomAccessFile (java.io)
Allows reading from and writing to a file in a random-access manner. This is different from the uni-
TimerTask (java.util)
The TimerTask class represents a task to run at a specified time. The task may be run once or repeat
ThreadPoolExecutor (java.util.concurrent)
An ExecutorService that executes each submitted task using one of possibly several pooled threads, n
GridBagLayout (java.awt)
The GridBagLayout class is a flexible layout manager that aligns components vertically and horizonta
Modifier (javassist)
The Modifier class provides static methods and constants to decode class and member access modifiers
Top PhpStorm plugins

How to use inputmethodin eu.stratosphere.api.java.record.operators.MapOperator$Builder

Best Java code snippets using eu.stratosphere.api.java.record.operators.MapOperator$Builder.input (Showing top 20 results out of 315)

How to use
input
method
in
eu.stratosphere.api.java.record.operators.MapOperator$Builder