.name("Count Words") .build(); FileDataSink out = new FileDataSink(new CsvOutputFormat(), output, reducer, "Word Counts"); CsvOutputFormat.configureRecordFormat(out) .recordDelimiter('\n')
public Plan getPlan(int numSubTasks, String output) { List<Object> tmp = new ArrayList<Object>(); int pos = 0; for (String s : WordCountData.COUNTS.split("\n")) { List<Object> tmpInner = new ArrayList<Object>(); tmpInner.add(pos++); tmpInner.add(Integer.parseInt(s.split(" ")[1])); tmp.add(tmpInner); } // test serializable iterator input, the input record is {id, word} CollectionDataSource source = new CollectionDataSource(new SerializableIteratorTest(), "test_iterator"); // test collection input, the input record is {id, count} CollectionDataSource source2 = new CollectionDataSource(tmp, "test_collection"); JoinOperator join = JoinOperator.builder(Join.class, IntValue.class, 0, 0) .input1(source).input2(source2).build(); FileDataSink out = new FileDataSink(new CsvOutputFormat(), output, join, "Collection Join"); CsvOutputFormat.configureRecordFormat(out) .recordDelimiter('\n') .fieldDelimiter(' ') .field(StringValue.class, 0) .field(IntValue.class, 1); Plan plan = new Plan(out, "CollectionDataSource"); plan.setDefaultParallelism(numSubTasks); return plan; }
static Plan getTestPlanPlan(int numSubTasks, String input, String output) { FileDataSource initialInput = new FileDataSource(TextInputFormat.class, input, "input"); BulkIteration iteration = new BulkIteration("Loop"); iteration.setInput(initialInput); iteration.setMaximumNumberOfIterations(NUM_ITERATIONS); ReduceOperator sumReduce = ReduceOperator.builder(new SumReducer()) .input(iteration.getPartialSolution()) .name("Compute sum (Reduce)") .build(); iteration.setNextPartialSolution(sumReduce); @SuppressWarnings("unchecked") FileDataSink finalResult = new FileDataSink(new CsvOutputFormat("\n", " ", StringValue.class), output, iteration, "Output"); Plan plan = new Plan(finalResult, "Iteration with AllReducer (keyless Reducer)"); plan.setDefaultParallelism(numSubTasks); return plan; }
@Override public Plan getPlan(String... args) { // parse job parameters int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1); String dataInput = (args.length > 1 ? args[1] : ""); String output = (args.length > 2 ? args[2] : ""); FileDataSource source = new FileDataSource(new TextInputFormat(), dataInput, "Input Lines"); MapOperator mapper = MapOperator.builder(new TokenizeLine()) .input(source) .name("Tokenize Lines") .build(); ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0) .input(mapper) .name("Count Words") .build(); @SuppressWarnings("unchecked") FileDataSink out = new FileDataSink(new CsvOutputFormat("\n", " ", StringValue.class, IntValue.class), output, reducer, "Word Counts"); Plan plan = new Plan(out, "WordCount Example"); plan.setDefaultParallelism(numSubTasks); return plan; }
private static JobOutputVertex createOutput(JobGraph jobGraph, String resultPath, int numSubTasks, TypeSerializerFactory<?> serializer) { JobOutputVertex output = JobGraphUtils.createFileOutput(jobGraph, "Output", numSubTasks, numSubTasks); { TaskConfig taskConfig = new TaskConfig(output.getConfiguration()); taskConfig.addInputToGroup(0); taskConfig.setInputSerializer(serializer, 0); @SuppressWarnings("unchecked") CsvOutputFormat outFormat = new CsvOutputFormat("\n", " ", LongValue.class, LongValue.class, LongValue.class); outFormat.setOutputFilePath(new Path(resultPath)); taskConfig.setStubWrapper(new UserCodeObjectWrapper<CsvOutputFormat>(outFormat)); } return output; }
static Plan getTestPlanPlan(int numSubTasks, String input, String output) { FileDataSource source = new FileDataSource(new TextInputFormat(), input, "Input Lines"); source.setParameter(TextInputFormat.CHARSET_NAME, "ASCII"); MapOperator mapper = MapOperator.builder(new TokenizeLine()) .input(source) .name("Tokenize Lines") .build(); ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0) .input(mapper) .name("Count Words") .build(); @SuppressWarnings("unchecked") FileDataSink out = new FileDataSink(new CsvOutputFormat("\n"," ", StringValue.class, IntValue.class), output, reducer, "Word Counts"); Plan plan = new Plan(out, "WordCount Example"); plan.setDefaultParallelism(numSubTasks); return plan; }
@Override public Plan getPlan(String... args) { int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1); String dataInput = (args.length > 1 ? args[1] : ""); String output = (args.length > 2 ? args[2] : ""); FileDataSource source = new FileDataSource(new TextInputFormat(), dataInput, "Input Lines"); MapOperator mapper = MapOperator.builder(new TokenizeLine()).input(source).name("Tokenize Lines").build(); ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0).input(mapper) .name("Count Words").build(); FileDataSink out = new FileDataSink(new CsvOutputFormat(), output, reducer, "Word Counts"); CsvOutputFormat.configureRecordFormat(out).recordDelimiter('\n') .fieldDelimiter(' ').field(StringValue.class, 0) .field(IntValue.class, 1); Plan plan = new Plan(out, "WordCount Example"); plan.setDefaultParallelism(numSubTasks); return plan; }
public Plan getPlan(int numSubTasks, String dataInput, String output) { // input is {word, count} pair FileDataSource source = new FileDataSource(new TextInputFormat(), dataInput, "Input Lines"); //do a selection using cached file MapOperator mapper = MapOperator.builder(new TokenizeLine()) .input(source) .name("Tokenize Lines") .build(); FileDataSink out = new FileDataSink(new CsvOutputFormat(), output, mapper, "Selection"); CsvOutputFormat.configureRecordFormat(out) .recordDelimiter('\n') .fieldDelimiter(' ') .field(StringValue.class, 0) .field(IntValue.class, 1); Plan plan = new Plan(out, "Distributed Cache"); plan.setDefaultParallelism(numSubTasks); return plan; }
@Override public Plan getPlan(String... args) { // parse job parameters int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1); String dataInput = (args.length > 1 ? args[1] : ""); String output = (args.length > 2 ? args[2] : ""); @SuppressWarnings("unchecked") CsvInputFormat format = new CsvInputFormat(' ', IntValue.class, IntValue.class); FileDataSource input = new FileDataSource(format, dataInput, "Input"); // create the reduce contract and sets the key to the first field ReduceOperator sorter = ReduceOperator.builder(new IdentityReducer(), IntValue.class, 0) .input(input) .name("Reducer") .build(); // sets the group sorting to the second field sorter.setGroupOrder(new Ordering(1, IntValue.class, Order.ASCENDING)); // create and configure the output format FileDataSink out = new FileDataSink(new CsvOutputFormat(), output, sorter, "Sorted Output"); CsvOutputFormat.configureRecordFormat(out) .recordDelimiter('\n') .fieldDelimiter(' ') .field(IntValue.class, 0) .field(IntValue.class, 1); Plan plan = new Plan(out, "SecondarySort Example"); plan.setDefaultParallelism(numSubTasks); return plan; }
FileDataSink result = new FileDataSink(new CsvOutputFormat(), output, joinLiO, "Output"); CsvOutputFormat.configureRecordFormat(result) .recordDelimiter('\n')
closeTriads.setParameter("LOCAL_STRATEGY", "LOCAL_STRATEGY_HASH_BUILD_SECOND"); FileDataSink triangles = new FileDataSink(new CsvOutputFormat(), output, "Output"); CsvOutputFormat.configureRecordFormat(triangles) .recordDelimiter('\n')
FileDataSink result = new FileDataSink(new CsvOutputFormat(), output, iteration, "Result"); CsvOutputFormat.configureRecordFormat(result) .recordDelimiter('\n')
FileDataSink result = new FileDataSink(new CsvOutputFormat(), output, iteration, "Result"); CsvOutputFormat.configureRecordFormat(result) .recordDelimiter('\n')
FileDataSink result = new FileDataSink(new CsvOutputFormat(), output, "Output"); result.setDegreeOfParallelism(numSubtasks); CsvOutputFormat.configureRecordFormat(result)
FileDataSink result = new FileDataSink(new CsvOutputFormat(), output, iteration, "Result"); CsvOutputFormat.configureRecordFormat(result) .recordDelimiter('\n')
FileDataSink result = new FileDataSink(new CsvOutputFormat("\n", " ", LongValue.class, LongValue.class), output, iteration, "Result");
FileDataSink result = new FileDataSink(new CsvOutputFormat(), output, iteration, "Result"); CsvOutputFormat.configureRecordFormat(result) .recordDelimiter('\n')
FileDataSink result = new FileDataSink(new CsvOutputFormat(), output, iteration, "Result"); CsvOutputFormat.configureRecordFormat(result) .recordDelimiter('\n')
.name("Count Words") .build(); FileDataSink out = new FileDataSink(new CsvOutputFormat(), OUT_FILE, reduceNode, "Word Counts"); CsvOutputFormat.configureRecordFormat(out) .recordDelimiter('\n')
FileDataSink result = new FileDataSink(new CsvOutputFormat(), output, aggLiO, "Output"); CsvOutputFormat.configureRecordFormat(result) .recordDelimiter('\n')