@Override public Plan getPlan(String... args) { // parse job parameters int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1); String dataInput = (args.length > 1 ? args[1] : ""); String output = (args.length > 2 ? args[2] : ""); HadoopDataSource<LongWritable, Text> source = new HadoopDataSource<LongWritable, Text>( new TextInputFormat(), new JobConf(), "Input Lines"); TextInputFormat.addInputPath(source.getJobConf(), new Path(dataInput)); MapOperator mapper = MapOperator.builder(new TokenizeLine()) .input(source) .name("Tokenize Lines") .build(); ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0) .input(mapper) .name("Count Words") .build(); HadoopDataSink<Text, IntWritable> out = new HadoopDataSink<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(),new JobConf(), "Hadoop TextOutputFormat", reducer, Text.class, IntWritable.class); TextOutputFormat.setOutputPath(out.getJobConf(), new Path(output)); Plan plan = new Plan(out, "Hadoop OutputFormat Example"); plan.setDefaultParallelism(numSubTasks); return plan; }
@SuppressWarnings({ "deprecation", "unchecked" }) @Override protected Plan getTestJob() { String input1Path = config.getString("UnionTest#Input1Path", "").equals("empty") ? emptyInPath : inPath; String input2Path = config.getString("UnionTest#Input2Path", "").equals("empty") ? emptyInPath : inPath; FileDataSource input1 = new FileDataSource( new ContractITCaseInputFormat(), input1Path); DelimitedInputFormat.configureDelimitedFormat(input1) .recordDelimiter('\n'); input1.setDegreeOfParallelism(config.getInteger("UnionTest#NoSubtasks", 1)); FileDataSource input2 = new FileDataSource( new ContractITCaseInputFormat(), input2Path); DelimitedInputFormat.configureDelimitedFormat(input2) .recordDelimiter('\n'); input2.setDegreeOfParallelism(config.getInteger("UnionTest#NoSubtasks", 1)); MapOperator testMapper = MapOperator.builder(new TestMapper()).build(); testMapper.setDegreeOfParallelism(config.getInteger("UnionTest#NoSubtasks", 1)); FileDataSink output = new FileDataSink( new ContractITCaseOutputFormat(), resultPath); output.setDegreeOfParallelism(1); output.setInput(testMapper); testMapper.addInput(input1); testMapper.addInput(input2); return new Plan(output); }
@Override public Plan getPlan(String... args) { // parse job parameters int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1); String dataInput = (args.length > 1 ? args[1] : ""); String output = (args.length > 2 ? args[2] : ""); FileDataSource source = new FileDataSource(new TextInputFormat(), dataInput, "Input Lines"); MapOperator mapper = MapOperator.builder(new TokenizeLine()) .input(source) .name("Tokenize Lines") .build(); ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0) .input(mapper) .name("Count Words") .build(); @SuppressWarnings("unchecked") FileDataSink out = new FileDataSink(new CsvOutputFormat("\n", " ", StringValue.class, IntValue.class), output, reducer, "Word Counts"); Plan plan = new Plan(out, "WordCount Example"); plan.setDefaultParallelism(numSubTasks); return plan; }
.input(sumReduce) .name("Compute termination criterion (Map)") .build();
.input(iteration.getPartialSolution()) .name("Compute termination criterion (Map)") .build();
@Override protected Plan getTestJob() { // Sc1 generates M parameters a,b,c for second degree polynomials P(x) = ax^2 + bx + c identified by id FileDataSource sc1 = new FileDataSource(new CsvInputFormat(), sc1Path); CsvInputFormat.configureRecordFormat(sc1).fieldDelimiter(' ').field(StringValue.class, 0).field(IntValue.class, 1) .field(IntValue.class, 2).field(IntValue.class, 3); // Sc2 generates N x values to be evaluated with the polynomial identified by id FileDataSource sc2 = new FileDataSource(new CsvInputFormat(), sc2Path); CsvInputFormat.configureRecordFormat(sc2).fieldDelimiter(' ').field(StringValue.class, 0).field(IntValue.class, 1); // Sc3 generates N y values to be evaluated with the polynomial identified by id FileDataSource sc3 = new FileDataSource(new CsvInputFormat(), sc3Path); CsvInputFormat.configureRecordFormat(sc3).fieldDelimiter(' ').field(StringValue.class, 0).field(IntValue.class, 1); // Jn1 matches x and y values on id and emits (id, x, y) triples JoinOperator jn1 = JoinOperator.builder(Jn1.class, StringValue.class, 0, 0).input1(sc2).input2(sc3).build(); // Jn2 matches polynomial and arguments by id, computes p = min(P(x),P(y)) and emits (id, p) tuples JoinOperator jn2 = JoinOperator.builder(Jn2.class, StringValue.class, 0, 0).input1(jn1).input2(sc1).build(); // Mp1 selects (id, x, y) triples where x = y and broadcasts z (=x=y) to Mp2 MapOperator mp1 = MapOperator.builder(Mp1.class).input(jn1).build(); // Mp2 filters out all p values which can be divided by z MapOperator mp2 = MapOperator.builder(Mp2.class).setBroadcastVariable("z", mp1).input(jn2).build(); FileDataSink output = new FileDataSink(new ContractITCaseOutputFormat(), resultPath); output.setDegreeOfParallelism(1); output.setInput(mp2); return new Plan(output); }
@Override protected JobGraph getFailingJobGraph() throws Exception { // init data source FileDataSource input = new FileDataSource(new ContractITCaseInputFormat(), inputPath); // init failing map task MapOperator testMapper = MapOperator.builder(FailingMapper.class).build(); // init data sink FileDataSink output = new FileDataSink(new ContractITCaseOutputFormat(), resultPath); // compose failing program output.setInput(testMapper); testMapper.setInput(input); // generate plan Plan plan = new Plan(output); plan.setDefaultParallelism(4); // optimize and compile plan PactCompiler pc = new PactCompiler(new DataStatistics()); OptimizedPlan op = pc.compile(plan); // return job graph of failing job NepheleJobGraphGenerator jgg = new NepheleJobGraphGenerator(); return jgg.compileJobGraph(op); }
@Override protected JobGraph getJobGraph() throws Exception { // init data source FileDataSource input = new FileDataSource(new ContractITCaseInputFormat(), inputPath); // init (working) map task MapOperator testMapper = MapOperator.builder(TestMapper.class).build(); // init data sink FileDataSink output = new FileDataSink(new ContractITCaseOutputFormat(), resultPath); // compose working program output.setInput(testMapper); testMapper.setInput(input); // generate plan Plan plan = new Plan(output); plan.setDefaultParallelism(4); // optimize and compile plan PactCompiler pc = new PactCompiler(new DataStatistics()); OptimizedPlan op = pc.compile(plan); // return job graph of working job NepheleJobGraphGenerator jgg = new NepheleJobGraphGenerator(); return jgg.compileJobGraph(op); }
@SuppressWarnings({ "deprecation", "unchecked" }) @Override protected JobGraph getJobGraph() throws Exception { String path1 = config.getBoolean("input1PathHasData", false) ? textInput : emptyInput; String path2 = config.getBoolean("input2PathHasData", false) ? textInput : emptyInput; FileDataSource input1 = new FileDataSource(new ContractITCaseInputFormat(), path1); FileDataSource input2 = new FileDataSource(new ContractITCaseInputFormat(), path2); MapOperator testMapper1 = MapOperator.builder(new TestMapper()).build(); MapOperator testMapper2 = MapOperator.builder(new TestMapper()).build(); FileDataSink output = new FileDataSink(new ContractITCaseOutputFormat(), resultDir); testMapper1.setInput(input1); testMapper2.setInput(input2); output.addInput(testMapper1); output.addInput(testMapper2); Plan plan = new Plan(output); plan.setDefaultParallelism(4); PactCompiler pc = new PactCompiler(new DataStatistics()); OptimizedPlan op = pc.compile(plan); NepheleJobGraphGenerator jgg = new NepheleJobGraphGenerator(); return jgg.compileJobGraph(op); }
static Plan getTestPlan(int numSubTasks, String input, String output) { FileDataSource initialInput = new FileDataSource(new PointInFormat(), input, "Input"); initialInput.setDegreeOfParallelism(1); BulkIteration iteration = new BulkIteration("Loop"); iteration.setInput(initialInput); iteration.setMaximumNumberOfIterations(2); ReduceOperator dummyReduce = ReduceOperator.builder(new DummyReducer(), IntValue.class, 0) .input(iteration.getPartialSolution()) .name("Reduce something") .build(); MapOperator dummyMap = MapOperator.builder(new IdentityMapper()).input(dummyReduce).build(); iteration.setNextPartialSolution(dummyMap); FileDataSink finalResult = new FileDataSink(new PointOutFormat(), output, iteration, "Output"); Plan plan = new Plan(finalResult, "Iteration with chained map test"); plan.setDefaultParallelism(numSubTasks); return plan; } }
public Plan getPlan(int numSubTasks, String dataInput, String output) { // input is {word, count} pair FileDataSource source = new FileDataSource(new TextInputFormat(), dataInput, "Input Lines"); //do a selection using cached file MapOperator mapper = MapOperator.builder(new TokenizeLine()) .input(source) .name("Tokenize Lines") .build(); FileDataSink out = new FileDataSink(new CsvOutputFormat(), output, mapper, "Selection"); CsvOutputFormat.configureRecordFormat(out) .recordDelimiter('\n') .fieldDelimiter(' ') .field(StringValue.class, 0) .field(IntValue.class, 1); Plan plan = new Plan(out, "Distributed Cache"); plan.setDefaultParallelism(numSubTasks); return plan; }
static Plan getTestPlanPlan(int numSubTasks, String input, String output) { FileDataSource source = new FileDataSource(new TextInputFormat(), input, "Input Lines"); source.setParameter(TextInputFormat.CHARSET_NAME, "ASCII"); MapOperator mapper = MapOperator.builder(new TokenizeLine()) .input(source) .name("Tokenize Lines") .build(); ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0) .input(mapper) .name("Count Words") .build(); @SuppressWarnings("unchecked") FileDataSink out = new FileDataSink(new CsvOutputFormat("\n"," ", StringValue.class, IntValue.class), output, reducer, "Word Counts"); Plan plan = new Plan(out, "WordCount Example"); plan.setDefaultParallelism(numSubTasks); return plan; }
@Override protected Plan getTestJob() { FileDataSource input = new FileDataSource( new ContractITCaseInputFormat(), inPath); DelimitedInputFormat.configureDelimitedFormat(input) .recordDelimiter('\n'); input.setDegreeOfParallelism(config.getInteger("MapTest#NoSubtasks", 1)); MapOperator testMapper = MapOperator.builder(new TestMapper()).build(); testMapper.setDegreeOfParallelism(config.getInteger("MapTest#NoSubtasks", 1)); FileDataSink output = new FileDataSink( new ContractITCaseOutputFormat(), resultPath); output.setDegreeOfParallelism(1); output.setInput(testMapper); testMapper.setInput(input); return new Plan(output); }
@Override public Plan getPlan(String... args) { int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1); String dataInput = (args.length > 1 ? args[1] : ""); String output = (args.length > 2 ? args[2] : ""); FileDataSource source = new FileDataSource(new TextInputFormat(), dataInput, "Input Lines"); MapOperator mapper = MapOperator.builder(new TokenizeLine()).input(source).name("Tokenize Lines").build(); ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0).input(mapper) .name("Count Words").build(); FileDataSink out = new FileDataSink(new CsvOutputFormat(), output, reducer, "Word Counts"); CsvOutputFormat.configureRecordFormat(out).recordDelimiter('\n') .fieldDelimiter(' ').field(StringValue.class, 0) .field(IntValue.class, 1); Plan plan = new Plan(out, "WordCount Example"); plan.setDefaultParallelism(numSubTasks); return plan; }
private static Plan getPlan(int numSubTasks, String input, String output) { FileDataSource initialInput = new FileDataSource(new PointInFormat(), input, "Input"); initialInput.setDegreeOfParallelism(1); BulkIteration iteration = new BulkIteration("Loop"); iteration.setInput(initialInput); iteration.setMaximumNumberOfIterations(2); @SuppressWarnings("unchecked") MapOperator map2 = MapOperator.builder(new IdentityMapper()).input(iteration.getPartialSolution(), iteration.getPartialSolution()).name("map").build(); iteration.setNextPartialSolution(map2); FileDataSink finalResult = new FileDataSink(new PointOutFormat(), output, iteration, "Output"); Plan plan = new Plan(finalResult, "Iteration with union test"); plan.setDefaultParallelism(numSubTasks); return plan; }
public void testSlowMapCancelling() throws Exception { GenericDataSource<InfiniteIntegerInputFormat> source = new GenericDataSource<InfiniteIntegerInputFormat>( new InfiniteIntegerInputFormat(), "Source"); MapOperator mapper = MapOperator.builder(DelayingIdentityMapper.class) .input(source) .name("Delay Mapper") .build(); GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), mapper, "Sink"); Plan p = new Plan(sink); p.setDefaultParallelism(4); runAndCancelJob(p, 5 * 1000, 10 * 1000); }
public void testMapWithLongCancellingResponse() throws Exception { GenericDataSource<InfiniteIntegerInputFormat> source = new GenericDataSource<InfiniteIntegerInputFormat>( new InfiniteIntegerInputFormat(), "Source"); MapOperator mapper = MapOperator.builder(LongCancelTimeIdentityMapper.class) .input(source) .name("Long Cancelling Time Mapper") .build(); GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), mapper, "Sink"); Plan p = new Plan(sink); p.setDefaultParallelism(4); runAndCancelJob(p, 10 * 1000, 10 * 1000); }
public static void main(String[] args) throws Exception { GenericDataSource<UserGeneratingInputFormat> source = new GenericDataSource<UserGeneratingInputFormat>(UserGeneratingInputFormat.class); MapOperator mapper = MapOperator.builder(new NumberExtractingMapper()) .input(source).name("le mapper").build(); ReduceOperator reducer = ReduceOperator.builder(new ConcatenatingReducer(), IntValue.class, 1) .input(mapper).name("le reducer").build(); GenericDataSink sink = new GenericDataSink(PrintingOutputFormat.class, reducer); Plan p = new Plan(sink); p.setDefaultParallelism(4); LocalExecutor.execute(p); }
public void testMapCancelling() throws Exception { GenericDataSource<InfiniteIntegerInputFormat> source = new GenericDataSource<InfiniteIntegerInputFormat>( new InfiniteIntegerInputFormat(), "Source"); MapOperator mapper = MapOperator.builder(IdentityMapper.class) .input(source) .name("Identity Mapper") .build(); GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), mapper, "Sink"); Plan p = new Plan(sink); p.setDefaultParallelism(4); runAndCancelJob(p, 5 * 1000, 10 * 1000); }
public void testMapPriorToFirstRecordReading() throws Exception { GenericDataSource<InfiniteIntegerInputFormat> source = new GenericDataSource<InfiniteIntegerInputFormat>( new InfiniteIntegerInputFormat(), "Source"); MapOperator mapper = MapOperator.builder(StuckInOpenIdentityMapper.class) .input(source) .name("Stuck-In-Open Mapper") .build(); GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), mapper, "Sink"); Plan p = new Plan(sink); p.setDefaultParallelism(4); runAndCancelJob(p, 10 * 1000, 10 * 1000); }