eu.stratosphere.api.java.record.io.CsvOutputFormat$ConfigBuilder.field java code examples

.recordDelimiter('\n')
.fieldDelimiter(' ')
.field(StringValue.class, 0)
.field(IntValue.class, 1);

public Plan getPlan(int numSubTasks, String output) {
  List<Object> tmp = new ArrayList<Object>();
  int pos = 0;
  for (String s : WordCountData.COUNTS.split("\n")) {
    List<Object> tmpInner = new ArrayList<Object>();
    tmpInner.add(pos++);
    tmpInner.add(Integer.parseInt(s.split(" ")[1]));
    tmp.add(tmpInner);
  }
  // test serializable iterator input, the input record is {id, word}
  CollectionDataSource source = new CollectionDataSource(new SerializableIteratorTest(), "test_iterator");
  // test collection input, the input record is {id, count}
  CollectionDataSource source2 = new CollectionDataSource(tmp, "test_collection");
  JoinOperator join = JoinOperator.builder(Join.class, IntValue.class, 0, 0)
    .input1(source).input2(source2).build();
  FileDataSink out = new FileDataSink(new CsvOutputFormat(), output, join, "Collection Join");
  CsvOutputFormat.configureRecordFormat(out)
    .recordDelimiter('\n')
    .fieldDelimiter(' ')
    .field(StringValue.class, 0)
    .field(IntValue.class, 1);
  Plan plan = new Plan(out, "CollectionDataSource");
  plan.setDefaultParallelism(numSubTasks);
  return plan;
}

@Override
public Plan getPlan(String... args) throws IllegalArgumentException {
  // parse program parameters
  int numSubtasks       = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
  String recordsPath    = (args.length > 1 ? args[1] : "");
  String output        = (args.length > 2 ? args[2] : "");
  
  FileDataSource source = new FileDataSource(CsvInputFormat.class, recordsPath);
  source.setDegreeOfParallelism(numSubtasks);
  CsvInputFormat.configureRecordFormat(source)
    .recordDelimiter('\n')
    .fieldDelimiter('|')
    .field(IntValue.class, 0);
  
  FileDataSink sink =
    new FileDataSink(CsvOutputFormat.class, output);
  sink.setDegreeOfParallelism(numSubtasks);
  CsvOutputFormat.configureRecordFormat(sink)
    .recordDelimiter('\n')
    .fieldDelimiter('|')
    .lenient(true)
    .field(IntValue.class, 0);
  
  sink.setGlobalOrder(new Ordering(0, IntValue.class, Order.ASCENDING), new UniformIntegerDistribution(Integer.MIN_VALUE, Integer.MAX_VALUE));
  sink.setInput(source);
  
  return new Plan(sink);
}

.recordDelimiter('\n')
.fieldDelimiter(' ')
.field(LongValue.class, 0)
.field(DoubleValue.class, 1);

.fieldDelimiter(' ')
.lenient(true)
.field(StringValue.class, 0)
.field(IntValue.class, 1);

.recordDelimiter('\n')
.fieldDelimiter(' ')
.field(LongValue.class, 0)
.field(LongValue.class, 1);

.fieldDelimiter('|')
.lenient(true)
.field(IntValue.class, 0)
.field(IntValue.class, 1)
.field(IntValue.class, 2);

.recordDelimiter('\n')
.fieldDelimiter(' ')
.field(LongValue.class, 0)
.field(LongValue.class, 1);

.recordDelimiter('\n')
.fieldDelimiter(' ')
.field(LongValue.class, 0)
.field(LongValue.class, 1);

.recordDelimiter('\n')
.fieldDelimiter(' ')
.field(LongValue.class, 0)
.field(LongValue.class, 1);

.recordDelimiter('\n')
.fieldDelimiter(' ')
.field(StringValue.class, 0)
.field(IntValue.class, 1);

.recordDelimiter('\n')
.fieldDelimiter(' ')
.field(StringValue.class, 0)
.field(StringValue.class, 1)
.field(StringValue.class, 2);

.fieldDelimiter(',')
.lenient(true)
.field(IntValue.class, 0)
.field(IntValue.class, 1)
.field(IntValue.class, 2);

@Override
public Plan getPlan(String... args) {
  
  // parse job parameters
  int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
  String dataInput = (args.length > 1 ? args[1] : "");
  String output = (args.length > 2 ? args[2] : "");
  @SuppressWarnings("unchecked")
  CsvInputFormat format = new CsvInputFormat(' ', IntValue.class, IntValue.class);
  FileDataSource input = new FileDataSource(format, dataInput, "Input");
  
  // create the reduce contract and sets the key to the first field
  ReduceOperator sorter = ReduceOperator.builder(new IdentityReducer(), IntValue.class, 0)
    .input(input)
    .name("Reducer")
    .build();
  // sets the group sorting to the second field
  sorter.setGroupOrder(new Ordering(1, IntValue.class, Order.ASCENDING));
  // create and configure the output format
  FileDataSink out = new FileDataSink(new CsvOutputFormat(), output, sorter, "Sorted Output");
  CsvOutputFormat.configureRecordFormat(out)
    .recordDelimiter('\n')
    .fieldDelimiter(' ')
    .field(IntValue.class, 0)
    .field(IntValue.class, 1);
  
  Plan plan = new Plan(out, "SecondarySort Example");
  plan.setDefaultParallelism(numSubTasks);
  return plan;
}

.recordDelimiter('\n')
.fieldDelimiter(' ')
.field(StringValue.class, 0);

.recordDelimiter('\n')
.fieldDelimiter(' ')
.field(StringValue.class, 0);

private static Plan getTestPlanPlan(int numSubTasks, String input, String output) {
  FileDataSource initialInput = new FileDataSource(TextInputFormat.class, input, "input");
  
  BulkIteration iteration = new BulkIteration("Loop");
  iteration.setInput(initialInput);
  iteration.setMaximumNumberOfIterations(5);
  
  Assert.assertTrue(iteration.getMaximumNumberOfIterations() > 1);
  ReduceOperator sumReduce = ReduceOperator.builder(new PickOneReducer())
      .input(iteration.getPartialSolution())
      .name("Compute sum (Reduce)")
      .build();
  
  iteration.setNextPartialSolution(sumReduce);
  FileDataSink finalResult = new FileDataSink(CsvOutputFormat.class, output, iteration, "Output");
  CsvOutputFormat.configureRecordFormat(finalResult)
    .recordDelimiter('\n')
    .fieldDelimiter(' ')
    .field(StringValue.class, 0);
  Plan plan = new Plan(finalResult, "Iteration with AllReducer (keyless Reducer)");
  
  plan.setDefaultParallelism(numSubTasks);
  Assert.assertTrue(plan.getDefaultParallelism() > 1);
  
  return plan;
}

@Override
protected Plan getTestJob() {
  
  int dop = this.config.getInteger("GroupOrderTest#NumSubtasks", 1);
  
  @SuppressWarnings("unchecked")
  CsvInputFormat format = new CsvInputFormat(',', IntValue.class, IntValue.class);
  FileDataSource source = new FileDataSource(format, this.textPath, "Source");
  
  ReduceOperator reducer = ReduceOperator.builder(CheckingReducer.class)
    .keyField(IntValue.class, 0)
    .input(source)
    .name("Ordered Reducer")
    .build();
  reducer.setGroupOrder(new Ordering(1, IntValue.class, Order.ASCENDING));
  
  FileDataSink sink = new FileDataSink(CsvOutputFormat.class, this.resultPath, reducer, "Sink");
  CsvOutputFormat.configureRecordFormat(sink)
    .recordDelimiter('\n')
    .fieldDelimiter(',')
    .field(IntValue.class, 0)
    .field(IntValue.class, 1);
  
  Plan p = new Plan(sink);
  p.setDefaultParallelism(dop);
  return p;
}

public Plan getPlan(int numSubTasks, String dataInput, String output) {
  // input is {word, count} pair
  FileDataSource source = new FileDataSource(new TextInputFormat(), dataInput, "Input Lines");
  //do a selection using cached file
  MapOperator mapper = MapOperator.builder(new TokenizeLine())
    .input(source)
    .name("Tokenize Lines")
    .build();
  FileDataSink out = new FileDataSink(new CsvOutputFormat(), output, mapper, "Selection");
  CsvOutputFormat.configureRecordFormat(out)
    .recordDelimiter('\n')
    .fieldDelimiter(' ')
    .field(StringValue.class, 0)
    .field(IntValue.class, 1);
  Plan plan = new Plan(out, "Distributed Cache");
  plan.setDefaultParallelism(numSubTasks);
  return plan;
}

@Override
public Plan getPlan(String... args) {
  int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
  String dataInput = (args.length > 1 ? args[1] : "");
  String output = (args.length > 2 ? args[2] : "");
  FileDataSource source = new FileDataSource(new TextInputFormat(), dataInput, "Input Lines");
  MapOperator mapper = MapOperator.builder(new TokenizeLine()).input(source).name("Tokenize Lines").build();
  
  ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0).input(mapper)
      .name("Count Words").build();
  
  FileDataSink out = new FileDataSink(new CsvOutputFormat(), output, reducer, "Word Counts");
  
  CsvOutputFormat.configureRecordFormat(out).recordDelimiter('\n')
      .fieldDelimiter(' ').field(StringValue.class, 0)
      .field(IntValue.class, 1);
  Plan plan = new Plan(out, "WordCount Example");
  plan.setDefaultParallelism(numSubTasks);
  return plan;
}

Popular methods of CsvOutputFormat$ConfigBuilder

fieldDelimiter
recordDelimiter
<init>
Creates a new builder for the given configuration.
lenient

Popular in Java

Running tasks concurrently on multiple threads
setRequestProperty (URLConnection)
getSharedPreferences (Context)
getSupportFragmentManager (FragmentActivity)
MalformedURLException (java.net)
This exception is thrown when a program attempts to create an URL from an incorrect specification.
Proxy (java.net)
This class represents proxy server settings. A created instance of Proxy stores a type and an addres
ResultSet (java.sql)
An interface for an object which represents a database table entry, returned as the result of the qu
Dictionary (java.util)
Note: Do not use this class since it is obsolete. Please use the Map interface for new implementatio
ThreadPoolExecutor (java.util.concurrent)
An ExecutorService that executes each submitted task using one of possibly several pooled threads, n
Base64 (org.apache.commons.codec.binary)
Provides Base64 encoding and decoding as defined by RFC 2045.This class implements section 6.8. Base
Github Copilot alternatives

How to use fieldmethodin eu.stratosphere.api.java.record.io.CsvOutputFormat$ConfigBuilder

Best Java code snippets using eu.stratosphere.api.java.record.io.CsvOutputFormat$ConfigBuilder.field (Showing top 20 results out of 315)

How to use
field
method
in
eu.stratosphere.api.java.record.io.CsvOutputFormat$ConfigBuilder