/** * Increases the result vector component at the specified position by 1. */ @Override public void add(Integer position) { updateResultVector(position, 1); }
@Override public boolean filter(final StringTriple t) { boolean containsEmptyFields = false; // iterate over the tuple fields looking for empty ones for (int pos = 0; pos < t.getArity(); pos++) { final String field = t.getField(pos); if (field == null || field.trim().isEmpty()) { containsEmptyFields = true; // if an empty field is encountered, update the // accumulator this.emptyFieldCounter.add(pos); } } return !containsEmptyFields; } }
@Override public Accumulator<Integer, ArrayList<Integer>> clone() { return new VectorAccumulator(new ArrayList<Integer>(resultVector)); }
DataSet<Tuple2<String, String>> documents = getDocumentsDataSet(env, params); DataSet<Tuple3<Integer, String, Integer>> ranks = getRanksDataSet(env, params); DataSet<Tuple2<String, String>> visits = getVisitsDataSet(env, params); .filter(new FilterDocByKeyWords()) .project(0); .filter(new FilterByRank()); .filter(new FilterVisitsByDate()) .project(0); joinDocsRanks.coGroup(filterVisits) .where(1).equalTo(0) .with(new AntiJoinVisits());
getCustomerDataSet(env, params.get("customer")); getOrdersDataSet(env, params.get("orders")); getLineitemDataSet(env, params.get("lineitem")); getNationsDataSet(env, params.get("nation"));
DataSet<Lineitem> lineitems = getLineitemDataSet(env, params.get("lineitem")); DataSet<Customer> customers = getCustomerDataSet(env, params.get("customer")); DataSet<Order> orders = getOrdersDataSet(env, params.get("orders"));
public static void main(final String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); // get the data set final DataSet<StringTriple> file = getDataSet(env, params); // filter lines with empty fields final DataSet<StringTriple> filteredLines = file.filter(new EmptyFieldFilter()); // Here, we could do further processing with the filtered lines... JobExecutionResult result; // output the filtered lines if (params.has("output")) { filteredLines.writeAsCsv(params.get("output")); // execute program result = env.execute("Accumulator example"); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); filteredLines.print(); result = env.getLastJobExecutionResult(); } // get the accumulator result via its registration key final List<Integer> emptyFields = result.getAccumulatorResult(EMPTY_FIELD_ACCUMULATOR); System.out.format("Number of detected empty fields per column: %s\n", emptyFields); }
@Override public void open(final Configuration parameters) throws Exception { super.open(parameters); // register the accumulator instance getRuntimeContext().addAccumulator(EMPTY_FIELD_ACCUMULATOR, this.emptyFieldCounter); }
@SuppressWarnings("unchecked") private static DataSet<StringTriple> getDataSet(ExecutionEnvironment env, ParameterTool params) { if (params.has("input")) { return env.readCsvFile(params.get("input")) .fieldDelimiter(";") .pojoType(StringTriple.class); } else { System.out.println("Executing EmptyFieldsCountAccumulator example with default input data set."); System.out.println("Use --input to specify file input."); return env.fromCollection(getExampleInputTuples()); } }
private static Collection<StringTriple> getExampleInputTuples() { Collection<StringTriple> inputTuples = new ArrayList<StringTriple>(); inputTuples.add(new StringTriple("John", "Doe", "Foo Str.")); inputTuples.add(new StringTriple("Joe", "Johnson", "")); inputTuples.add(new StringTriple(null, "Kate Morn", "Bar Blvd.")); inputTuples.add(new StringTriple("Tim", "Rinny", "")); inputTuples.add(new StringTriple("Alicia", "Jackson", " ")); return inputTuples; }
@Override protected void testProgram() throws Exception { WebLogAnalysis.main(new String[] { "--documents", docsPath, "--ranks", ranksPath, "--visits", visitsPath, "--output", resultPath}); } }
@Test public void dumpTPCH3() { // prepare the test environment PreviewPlanEnvironment env = new PreviewPlanEnvironment(); env.setAsContext(); try { TPCHQuery3.main(new String[] { "--lineitem", IN_FILE, "--customer", IN_FILE, "--orders", OUT_FILE, "--output", "123"}); } catch (OptimizerPlanEnvironment.ProgramAbortException pae) { // all good. } catch (Exception e) { e.printStackTrace(); Assert.fail("TPCH3 failed with an exception"); } dump(env.getPlan()); }
@Override public void merge(final Accumulator<Integer, ArrayList<Integer>> other) { // merge two vector accumulators by adding their up their vector components final List<Integer> otherVector = other.getLocalValue(); for (int index = 0; index < otherVector.size(); index++) { updateResultVector(index, otherVector.get(index)); } }
@Test public void dumpWebLogAnalysis() { // prepare the test environment PreviewPlanEnvironment env = new PreviewPlanEnvironment(); env.setAsContext(); try { WebLogAnalysis.main(new String[] { "--documents", IN_FILE, "--ranks", IN_FILE, "--visits", OUT_FILE, "--output", "123"}); } catch (OptimizerPlanEnvironment.ProgramAbortException pae) { // all good. } catch (Exception e) { e.printStackTrace(); Assert.fail("WebLogAnalysis failed with an exception"); } dump(env.getPlan()); }
@Test public void dumpTPCH3() { // prepare the test environment PreviewPlanEnvironment env = new PreviewPlanEnvironment(); env.setAsContext(); try { TPCHQuery3.main(new String[] { "--lineitem", IN_FILE, "--customer", IN_FILE, "--orders", OUT_FILE, "--output", "123"}); } catch (OptimizerPlanEnvironment.ProgramAbortException pae) { // all good. } catch (Exception e) { e.printStackTrace(); Assert.fail("TPCH3 failed with an exception"); } dump(env.getPlan()); }
/** * Increases the result vector component at the specified position by 1. */ @Override public void add(Integer position) { updateResultVector(position, 1); }
@Test public void dumpWebLogAnalysis() { // prepare the test environment PreviewPlanEnvironment env = new PreviewPlanEnvironment(); env.setAsContext(); try { org.apache.flink.examples.java.relational.WebLogAnalysis.main(new String[] { "--documents", IN_FILE, "--ranks", IN_FILE, "--visits", OUT_FILE, "--output", "123"}); } catch (OptimizerPlanEnvironment.ProgramAbortException pae) { // all good. } catch (Exception e) { e.printStackTrace(); Assert.fail("WebLogAnalysis failed with an exception"); } dump(env.getPlan()); }
/** * Increases the result vector component at the specified position by 1. */ @Override public void add(Integer position) { updateResultVector(position, 1); }
/** * Increases the result vector component at the specified position by 1. */ @Override public void add(Integer position) { updateResultVector(position, 1); }