@Override public Word reduce(Word value1, Word value2) throws Exception { return new Word(value1.word, value1.frequency + value2.frequency); } });
@Override protected void testProgram() throws Exception { WordCount.main(new String[] { "--input", textPath, "--output", resultPath }); } }
text = WordCountData.getDefaultTextLineDataSet(env); text.flatMap(new Tokenizer())
text = WordCountData.getDefaultTextLineDataSet(env); text.flatMap(new Tokenizer())
public static void main(String[] args) throws Exception { if(!parseParameters(args)) { return; } // set up the execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // get input data DataSet<String> text = getTextDataSet(env); DataSet<Tuple2<String, Integer>> counts = // split up the lines in pairs (2-tuples) containing: (word,1) text.flatMap(new Tokenizer()) // group by the tuple field "0" and sum up tuple field "1" .groupBy(0) .sum(1); // emit result if(fileOutput) { counts.writeAsCsv(outputPath, "\n", " "); // execute program env.execute("WordCount Example"); } else { counts.print(); } }
@Override protected void testProgram() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> text = env.fromElements(WordCountData.TEXT); DataSet<Tuple2<String, Integer>> words = text.flatMap(new WordCount.Tokenizer()); DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1); result.output(new LocalCollectionOutputFormat<Tuple2<String, Integer>>(resultsCollected)); env.execute("Word Count Collection"); } }
public static void main(String[] args) throws Exception { if(!parseParameters(args)) { return; DataSet<String> text = getTextDataSet(env); text.flatMap(new Tokenizer())
@Override public Word reduce(Word value1, Word value2) throws Exception { return new Word(value1.word,value1.frequency + value2.frequency); } });
@Override public void flatMap(String value, Collector<Word> out) { // normalize and split the line String[] tokens = value.toLowerCase().split("\\W+"); // emit the pairs for (String token : tokens) { if (token.length() > 0) { out.collect(new Word(token, 1)); } } } }
@Test public void dumpWordCount() { // prepare the test environment PreviewPlanEnvironment env = new PreviewPlanEnvironment(); env.setAsContext(); try { WordCount.main(new String[] { "--input", IN_FILE, "--output", OUT_FILE}); } catch (OptimizerPlanEnvironment.ProgramAbortException pae) { // all good. } catch (Exception e) { e.printStackTrace(); Assert.fail("WordCount failed with an exception"); } dump(env.getPlan()); }
@Override public void flatMap(String value, Collector<Word> out) { // normalize and split the line String[] tokens = value.toLowerCase().split("\\W+"); // emit the pairs for (String token : tokens) { if (token.length() > 0) { out.collect(new Word(token, 1)); } } } }
@Override public Word reduce(Word value1, Word value2) throws Exception { return new Word(value1.word, value1.frequency + value2.frequency); } });
@Test public void dumpWordCount() { // prepare the test environment PreviewPlanEnvironment env = new PreviewPlanEnvironment(); env.setAsContext(); try { WordCount.main(new String[] { "--input", IN_FILE, "--output", OUT_FILE}); } catch (OptimizerPlanEnvironment.ProgramAbortException pae) { // all good. } catch (Exception e) { e.printStackTrace(); Assert.fail("WordCount failed with an exception"); } dump(env.getPlan()); }
@Override public Word reduce(Word value1, Word value2) throws Exception { return new Word(value1.word, value1.frequency + value2.frequency); } });
@Test public void testWordCountPlan() { try { // without arguments try { final int parallelism = 1; // some ops have DOP 1 forced JsonValidator validator = new GenericValidator(parallelism, 3); TestingExecutionEnvironment.setAsNext(validator, parallelism); WordCount.main(new String[0]); } catch (AbortError ignored) {} // with arguments try { final int parallelism = 17; JsonValidator validator = new GenericValidator(parallelism, 3); TestingExecutionEnvironment.setAsNext(validator, parallelism); String tmpDir = tempFolder.newFolder().getAbsolutePath(); WordCount.main(new String[] { "--input", tmpDir, "--output", tmpDir}); } catch (AbortError ignored) {} } catch (Exception e) { restoreStreams(); e.printStackTrace(); fail(e.getMessage()); } }
@Override public Word reduce(Word value1, Word value2) throws Exception { return new Word(value1.word,value1.frequency + value2.frequency); } });
public static void main(String[] args) throws Exception { WordCount.main(args); }
@Override public void flatMap(String value, Collector<Word> out) { // normalize and split the line String[] tokens = value.toLowerCase().split("\\W+"); // emit the pairs for (String token : tokens) { if (token.length() > 0) { out.collect(new Word(token, 1)); } } } }
@Override public void flatMap(String value, Collector<Word> out) { // normalize and split the line String[] tokens = value.toLowerCase().split("\\W+"); // emit the pairs for (String token : tokens) { if (token.length() > 0) { out.collect(new Word(token, 1)); } } } }
@Override public void flatMap(String value, Collector<Word> out) { // normalize and split the line String[] tokens = value.toLowerCase().split("\\W+"); // emit the pairs for (String token : tokens) { if (token.length() > 0) { out.collect(new Word(token, 1)); } } } }