eu.stratosphere.api.java.record.io.CsvOutputFormat.<init> java code examples

  .name("Count Words")
  .build();
FileDataSink out = new FileDataSink(new CsvOutputFormat(), output, reducer, "Word Counts");
CsvOutputFormat.configureRecordFormat(out)
  .recordDelimiter('\n')

public Plan getPlan(int numSubTasks, String output) {
  List<Object> tmp = new ArrayList<Object>();
  int pos = 0;
  for (String s : WordCountData.COUNTS.split("\n")) {
    List<Object> tmpInner = new ArrayList<Object>();
    tmpInner.add(pos++);
    tmpInner.add(Integer.parseInt(s.split(" ")[1]));
    tmp.add(tmpInner);
  }
  // test serializable iterator input, the input record is {id, word}
  CollectionDataSource source = new CollectionDataSource(new SerializableIteratorTest(), "test_iterator");
  // test collection input, the input record is {id, count}
  CollectionDataSource source2 = new CollectionDataSource(tmp, "test_collection");
  JoinOperator join = JoinOperator.builder(Join.class, IntValue.class, 0, 0)
    .input1(source).input2(source2).build();
  FileDataSink out = new FileDataSink(new CsvOutputFormat(), output, join, "Collection Join");
  CsvOutputFormat.configureRecordFormat(out)
    .recordDelimiter('\n')
    .fieldDelimiter(' ')
    .field(StringValue.class, 0)
    .field(IntValue.class, 1);
  Plan plan = new Plan(out, "CollectionDataSource");
  plan.setDefaultParallelism(numSubTasks);
  return plan;
}

static Plan getTestPlanPlan(int numSubTasks, String input, String output) {
  FileDataSource initialInput = new FileDataSource(TextInputFormat.class, input, "input");
  
  BulkIteration iteration = new BulkIteration("Loop");
  iteration.setInput(initialInput);
  iteration.setMaximumNumberOfIterations(NUM_ITERATIONS);
  ReduceOperator sumReduce = ReduceOperator.builder(new SumReducer())
      .input(iteration.getPartialSolution())
      .name("Compute sum (Reduce)")
      .build();
  
  iteration.setNextPartialSolution(sumReduce);
  @SuppressWarnings("unchecked")
  FileDataSink finalResult = new FileDataSink(new CsvOutputFormat("\n",  " ", StringValue.class), output, iteration, "Output");
  Plan plan = new Plan(finalResult, "Iteration with AllReducer (keyless Reducer)");
  plan.setDefaultParallelism(numSubTasks);
  return plan;
}

@Override
public Plan getPlan(String... args) {
  // parse job parameters
  int numSubTasks   = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
  String dataInput = (args.length > 1 ? args[1] : "");
  String output    = (args.length > 2 ? args[2] : "");
  FileDataSource source = new FileDataSource(new TextInputFormat(), dataInput, "Input Lines");
  MapOperator mapper = MapOperator.builder(new TokenizeLine())
    .input(source)
    .name("Tokenize Lines")
    .build();
  ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0)
    .input(mapper)
    .name("Count Words")
    .build();
  
  @SuppressWarnings("unchecked")
  FileDataSink out = new FileDataSink(new CsvOutputFormat("\n", " ", StringValue.class, IntValue.class), output, reducer, "Word Counts");
  
  Plan plan = new Plan(out, "WordCount Example");
  plan.setDefaultParallelism(numSubTasks);
  return plan;
}

private static JobOutputVertex createOutput(JobGraph jobGraph, String resultPath, int numSubTasks, TypeSerializerFactory<?> serializer) {
  JobOutputVertex output = JobGraphUtils.createFileOutput(jobGraph, "Output", numSubTasks, numSubTasks);
  {
    TaskConfig taskConfig = new TaskConfig(output.getConfiguration());
    taskConfig.addInputToGroup(0);
    taskConfig.setInputSerializer(serializer, 0);
    @SuppressWarnings("unchecked")
    CsvOutputFormat outFormat = new CsvOutputFormat("\n", " ", LongValue.class, LongValue.class, LongValue.class);
    outFormat.setOutputFilePath(new Path(resultPath));
    
    taskConfig.setStubWrapper(new UserCodeObjectWrapper<CsvOutputFormat>(outFormat));
  }
  return output;
}

static Plan getTestPlanPlan(int numSubTasks, String input, String output) {
  
  FileDataSource source = new FileDataSource(new TextInputFormat(), input, "Input Lines");
  source.setParameter(TextInputFormat.CHARSET_NAME, "ASCII");
  MapOperator mapper = MapOperator.builder(new TokenizeLine())
    .input(source)
    .name("Tokenize Lines")
    .build();
  ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0)
    .input(mapper)
    .name("Count Words")
    .build();
  @SuppressWarnings("unchecked")
  FileDataSink out = new FileDataSink(new CsvOutputFormat("\n"," ", StringValue.class, IntValue.class), output, reducer, "Word Counts");
  Plan plan = new Plan(out, "WordCount Example");
  plan.setDefaultParallelism(numSubTasks);
  
  return plan;
}

@Override
public Plan getPlan(String... args) {
  int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
  String dataInput = (args.length > 1 ? args[1] : "");
  String output = (args.length > 2 ? args[2] : "");
  FileDataSource source = new FileDataSource(new TextInputFormat(), dataInput, "Input Lines");
  MapOperator mapper = MapOperator.builder(new TokenizeLine()).input(source).name("Tokenize Lines").build();
  
  ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0).input(mapper)
      .name("Count Words").build();
  
  FileDataSink out = new FileDataSink(new CsvOutputFormat(), output, reducer, "Word Counts");
  
  CsvOutputFormat.configureRecordFormat(out).recordDelimiter('\n')
      .fieldDelimiter(' ').field(StringValue.class, 0)
      .field(IntValue.class, 1);
  Plan plan = new Plan(out, "WordCount Example");
  plan.setDefaultParallelism(numSubTasks);
  return plan;
}

public Plan getPlan(int numSubTasks, String dataInput, String output) {
  // input is {word, count} pair
  FileDataSource source = new FileDataSource(new TextInputFormat(), dataInput, "Input Lines");
  //do a selection using cached file
  MapOperator mapper = MapOperator.builder(new TokenizeLine())
    .input(source)
    .name("Tokenize Lines")
    .build();
  FileDataSink out = new FileDataSink(new CsvOutputFormat(), output, mapper, "Selection");
  CsvOutputFormat.configureRecordFormat(out)
    .recordDelimiter('\n')
    .fieldDelimiter(' ')
    .field(StringValue.class, 0)
    .field(IntValue.class, 1);
  Plan plan = new Plan(out, "Distributed Cache");
  plan.setDefaultParallelism(numSubTasks);
  return plan;
}

@Override
public Plan getPlan(String... args) {
  
  // parse job parameters
  int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
  String dataInput = (args.length > 1 ? args[1] : "");
  String output = (args.length > 2 ? args[2] : "");
  @SuppressWarnings("unchecked")
  CsvInputFormat format = new CsvInputFormat(' ', IntValue.class, IntValue.class);
  FileDataSource input = new FileDataSource(format, dataInput, "Input");
  
  // create the reduce contract and sets the key to the first field
  ReduceOperator sorter = ReduceOperator.builder(new IdentityReducer(), IntValue.class, 0)
    .input(input)
    .name("Reducer")
    .build();
  // sets the group sorting to the second field
  sorter.setGroupOrder(new Ordering(1, IntValue.class, Order.ASCENDING));
  // create and configure the output format
  FileDataSink out = new FileDataSink(new CsvOutputFormat(), output, sorter, "Sorted Output");
  CsvOutputFormat.configureRecordFormat(out)
    .recordDelimiter('\n')
    .fieldDelimiter(' ')
    .field(IntValue.class, 0)
    .field(IntValue.class, 1);
  
  Plan plan = new Plan(out, "SecondarySort Example");
  plan.setDefaultParallelism(numSubTasks);
  return plan;
}

FileDataSink result = new FileDataSink(new CsvOutputFormat(), output, joinLiO, "Output");
CsvOutputFormat.configureRecordFormat(result)
  .recordDelimiter('\n')

closeTriads.setParameter("LOCAL_STRATEGY", "LOCAL_STRATEGY_HASH_BUILD_SECOND");
FileDataSink triangles = new FileDataSink(new CsvOutputFormat(), output, "Output");
CsvOutputFormat.configureRecordFormat(triangles)
  .recordDelimiter('\n')

FileDataSink result = new FileDataSink(new CsvOutputFormat(), output, iteration, "Result");
CsvOutputFormat.configureRecordFormat(result)
  .recordDelimiter('\n')

FileDataSink result = new FileDataSink(new CsvOutputFormat(), output, iteration, "Result");
CsvOutputFormat.configureRecordFormat(result)
  .recordDelimiter('\n')

FileDataSink result = new FileDataSink(new CsvOutputFormat(), output, "Output");
result.setDegreeOfParallelism(numSubtasks);
CsvOutputFormat.configureRecordFormat(result)

FileDataSink result = new FileDataSink(new CsvOutputFormat(), output, iteration, "Result");
CsvOutputFormat.configureRecordFormat(result)
  .recordDelimiter('\n')

FileDataSink result = new FileDataSink(new CsvOutputFormat("\n", " ", LongValue.class, LongValue.class), output, iteration, "Result");

FileDataSink result = new FileDataSink(new CsvOutputFormat(), output, iteration, "Result");
CsvOutputFormat.configureRecordFormat(result)
  .recordDelimiter('\n')

FileDataSink result = new FileDataSink(new CsvOutputFormat(), output, iteration, "Result");
CsvOutputFormat.configureRecordFormat(result)
  .recordDelimiter('\n')

  .name("Count Words")
  .build();
FileDataSink out = new FileDataSink(new CsvOutputFormat(), OUT_FILE, reduceNode, "Word Counts");
CsvOutputFormat.configureRecordFormat(out)
  .recordDelimiter('\n')

FileDataSink result = new FileDataSink(new CsvOutputFormat(), output, aggLiO, "Output");
CsvOutputFormat.configureRecordFormat(result)
  .recordDelimiter('\n')

Javadoc

Creates an instance of CsvOutputFormat. The position of the fields in the record is determined by the order in which the classes are given to this constructor.

Popular methods of CsvOutputFormat

Popular in Java

Reading from database using SQL prepared statement
setScale (BigDecimal)
getApplicationContext (Context)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
Runnable (java.lang)
Represents a command that can be executed. Often used to run code in a different Thread.
ServerSocket (java.net)
This class represents a server-side socket that waits for incoming client connections. A ServerSocke
Timestamp (java.sql)
A Java representation of the SQL TIMESTAMP type. It provides the capability of representing the SQL
Queue (java.util)
A collection designed for holding elements prior to processing. Besides basic java.util.Collection o
Font (java.awt)
The Font class represents fonts, which are used to render text in a visible way. A font provides the
JPanel (javax.swing)
Best plugins for Eclipse

How to use eu.stratosphere.api.java.record.io.CsvOutputFormatconstructor

Best Java code snippets using eu.stratosphere.api.java.record.io.CsvOutputFormat.<init> (Showing top 20 results out of 315)

How to use
eu.stratosphere.api.java.record.io.CsvOutputFormat
constructor