/** * Gets the accumulator with the given name. Returns {@code null}, if no accumulator with * that name was produced. * * @param accumulatorName The name of the accumulator * @param <A> The generic type of the accumulator value * @return The value of the accumulator with the given name */ public <A> A getAccumulator(ExecutionEnvironment env, String accumulatorName) { JobExecutionResult result = env.getLastJobExecutionResult(); Preconditions.checkNotNull(result, "No result found for job, was execute() called before getting the result?"); return result.getAccumulatorResult(id + SEPARATOR + accumulatorName); } }
/** * Convenience method to get the count (number of elements) of a DataSet * as well as the checksum (sum over element hashes). * * @return A ChecksumHashCode that represents the count and checksum of elements in the data set. * @deprecated replaced with {@code org.apache.flink.graph.asm.dataset.ChecksumHashCode} in Gelly */ @Deprecated public static <T> Utils.ChecksumHashCode checksumHashCode(DataSet<T> input) throws Exception { final String id = new AbstractID().toString(); input.output(new Utils.ChecksumHashCodeHelper<T>(id)).name("ChecksumHashCode"); JobExecutionResult res = input.getExecutionEnvironment().execute(); return res.<Utils.ChecksumHashCode> getAccumulatorResult(id); }
@Override protected void postSubmit() throws Exception { compareResultsByLinesInMemory(EXPECTED, resultPath); // Test accumulator results System.out.println("Accumulator results:"); JobExecutionResult res = this.result; System.out.println(AccumulatorHelper.getResultsFormatted(res.getAllAccumulatorResults())); Assert.assertEquals(Integer.valueOf(3), res.getAccumulatorResult("num-lines")); Assert.assertEquals(Integer.valueOf(3), res.getIntCounterResult("num-lines")); Assert.assertEquals(Double.valueOf(getParallelism()), res.getAccumulatorResult("open-close-counter")); // Test histogram (words per line distribution) Map<Integer, Integer> dist = new HashMap<>(); dist.put(1, 1); dist.put(2, 1); dist.put(3, 1); Assert.assertEquals(dist, res.getAccumulatorResult("words-per-line")); // Test distinct words (custom accumulator) Set<StringValue> distinctWords = new HashSet<>(); distinctWords.add(new StringValue("one")); distinctWords.add(new StringValue("two")); distinctWords.add(new StringValue("three")); Assert.assertEquals(distinctWords, res.getAccumulatorResult("distinct-words")); }
/** * Convenience method to get the count (number of elements) of a DataSet. * * @return A long integer that represents the number of elements in the data set. */ public long count() throws Exception { final String id = new AbstractID().toString(); output(new Utils.CountHelper<T>(id)).name("count()"); JobExecutionResult res = getExecutionEnvironment().execute(); return res.<Long> getAccumulatorResult(id); }
/** * Convenience method to get the elements of a DataSet as a List. * As DataSet can contain a lot of data, this method should be used with caution. * * @return A List containing the elements of the DataSet */ public List<T> collect() throws Exception { final String id = new AbstractID().toString(); final TypeSerializer<T> serializer = getType().createSerializer(getExecutionEnvironment().getConfig()); this.output(new Utils.CollectHelper<>(id, serializer)).name("collect()"); JobExecutionResult res = getExecutionEnvironment().execute(); ArrayList<byte[]> accResult = res.getAccumulatorResult(id); if (accResult != null) { try { return SerializedListAccumulator.deserializeList(accResult, serializer); } catch (ClassNotFoundException e) { throw new RuntimeException("Cannot find type class of collected data type.", e); } catch (IOException e) { throw new RuntimeException("Serialization error while deserializing collected data", e); } } else { throw new RuntimeException("The call to collect() could not retrieve the DataSet."); } }
public static void main(String[] args) throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.fromElements(1, 2).output(new DiscardingOutputFormat<Integer>()); env.execute().getAccumulatorResult(ACCUMULATOR_NAME); } }
public static void main(final String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); // get the data set final DataSet<StringTriple> file = getDataSet(env, params); // filter lines with empty fields final DataSet<StringTriple> filteredLines = file.filter(new EmptyFieldFilter()); // Here, we could do further processing with the filtered lines... JobExecutionResult result; // output the filtered lines if (params.has("output")) { filteredLines.writeAsCsv(params.get("output")); // execute program result = env.execute("Accumulator example"); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); filteredLines.print(); result = env.getLastJobExecutionResult(); } // get the accumulator result via its registration key final List<Integer> emptyFields = result.getAccumulatorResult(EMPTY_FIELD_ACCUMULATOR); System.out.format("Number of detected empty fields per column: %s\n", emptyFields); }
@Test public void testAccumulator() { try { final int numElements = 100; ExecutionEnvironment env = ExecutionEnvironment.createCollectionsEnvironment(); env.generateSequence(1, numElements) .map(new CountingMapper()) .output(new DiscardingOutputFormat<Long>()); JobExecutionResult result = env.execute(); assertTrue(result.getNetRuntime() >= 0); assertEquals(numElements, (int) result.getAccumulatorResult(ACCUMULATOR_NAME)); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
private void runTest( SourceFunction<SessionEvent<Integer, TestEventPayload>> dataSource, WindowFunction<SessionEvent<Integer, TestEventPayload>, String, Tuple, TimeWindow> windowFunction) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); WindowedStream<SessionEvent<Integer, TestEventPayload>, Tuple, TimeWindow> windowedStream = env.addSource(dataSource).keyBy("sessionKey") .window(EventTimeSessionWindows.withGap(Time.milliseconds(MAX_SESSION_EVENT_GAP_MS))); if (ALLOWED_LATENESS_MS != Long.MAX_VALUE) { windowedStream = windowedStream.allowedLateness(Time.milliseconds(ALLOWED_LATENESS_MS)); } if (PURGE_WINDOW_ON_FIRE) { windowedStream = windowedStream.trigger(PurgingTrigger.of(EventTimeTrigger.create())); } windowedStream.apply(windowFunction).print(); JobExecutionResult result = env.execute(); // check that overall event counts match with our expectations. remember that late events within lateness will // each trigger a window! Assert.assertEquals( (LATE_EVENTS_PER_SESSION + 1) * NUMBER_OF_SESSIONS * EVENTS_PER_SESSION, (long) result.getAccumulatorResult(SESSION_COUNTER_ON_TIME_KEY)); Assert.assertEquals( NUMBER_OF_SESSIONS * (LATE_EVENTS_PER_SESSION * (LATE_EVENTS_PER_SESSION + 1) / 2), (long) result.getAccumulatorResult(SESSION_COUNTER_LATE_KEY)); }
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(NUM_SUBTASKS); IterativeDataSet<Integer> iteration = env.fromElements(1, 2, 3).iterate(NUM_ITERATIONS); iteration.closeWith(iteration.reduceGroup(new SumReducer())).output(new DiscardingOutputFormat<Integer>()); Assert.assertEquals(NUM_ITERATIONS * 6, (int) env.execute().getAccumulatorResult(ACC_NAME)); }
/** * Gets the accumulator with the given name. Returns {@code null}, if no accumulator with * that name was produced. * * @param accumulatorName The name of the accumulator * @param <A> The generic type of the accumulator value * @return The value of the accumulator with the given name */ public <A> A getAccumulator(ExecutionEnvironment env, String accumulatorName) { JobExecutionResult result = env.getLastJobExecutionResult(); Preconditions.checkNotNull(result, "No result found for job, was execute() called before getting the result?"); return result.getAccumulatorResult(id + SEPARATOR + accumulatorName); } }
/** * Gets the accumulator with the given name. Returns {@code null}, if no accumulator with * that name was produced. * * @param accumulatorName The name of the accumulator * @param <A> The generic type of the accumulator value * @return The value of the accumulator with the given name */ public <A> A getAccumulator(ExecutionEnvironment env, String accumulatorName) { JobExecutionResult result = env.getLastJobExecutionResult(); Preconditions.checkNotNull(result, "No result found for job, was execute() called before getting the result?"); return result.getAccumulatorResult(id + SEPARATOR + accumulatorName); } }
/** * Gets the accumulator with the given name. Returns {@code null}, if no accumulator with * that name was produced. * * @param accumulatorName The name of the accumulator * @param <A> The generic type of the accumulator value * @return The value of the accumulator with the given name */ public <A> A getAccumulator(ExecutionEnvironment env, String accumulatorName) { JobExecutionResult result = env.getLastJobExecutionResult(); Preconditions.checkNotNull(result, "No result found for job, was execute() called before getting the result?"); return result.getAccumulatorResult(id + SEPARATOR + accumulatorName); } }
/** * Convenience method to get the count (number of elements) of a DataSet * as well as the checksum (sum over element hashes). * * @return A ChecksumHashCode that represents the count and checksum of elements in the data set. * @deprecated replaced with {@code org.apache.flink.graph.asm.dataset.ChecksumHashCode} in Gelly */ @Deprecated public static <T> Utils.ChecksumHashCode checksumHashCode(DataSet<T> input) throws Exception { final String id = new AbstractID().toString(); input.output(new Utils.ChecksumHashCodeHelper<T>(id)).name("ChecksumHashCode"); JobExecutionResult res = input.getExecutionEnvironment().execute(); return res.<Utils.ChecksumHashCode> getAccumulatorResult(id); }
/** * Convenience method to get the count (number of elements) of a DataSet * as well as the checksum (sum over element hashes). * * @return A ChecksumHashCode that represents the count and checksum of elements in the data set. * @deprecated replaced with {@code org.apache.flink.graph.asm.dataset.ChecksumHashCode} in Gelly */ @Deprecated public static <T> Utils.ChecksumHashCode checksumHashCode(DataSet<T> input) throws Exception { final String id = new AbstractID().toString(); input.output(new Utils.ChecksumHashCodeHelper<T>(id)).name("ChecksumHashCode"); JobExecutionResult res = input.getExecutionEnvironment().execute(); return res.<Utils.ChecksumHashCode> getAccumulatorResult(id); }
/** * Convenience method to get the count (number of elements) of a DataSet. * * @return A long integer that represents the number of elements in the data set. */ public long count() throws Exception { final String id = new AbstractID().toString(); output(new Utils.CountHelper<T>(id)).name("count()"); JobExecutionResult res = getExecutionEnvironment().execute(); return res.<Long> getAccumulatorResult(id); }
/** * Convenience method to get the count (number of elements) of a DataSet. * * @return A long integer that represents the number of elements in the data set. */ public long count() throws Exception { final String id = new AbstractID().toString(); output(new Utils.CountHelper<T>(id)).name("count()"); JobExecutionResult res = getExecutionEnvironment().execute(); return res.<Long> getAccumulatorResult(id); }
/** * Convenience method to get the elements of a DataSet as a List. * As DataSet can contain a lot of data, this method should be used with caution. * * @return A List containing the elements of the DataSet */ public List<T> collect() throws Exception { final String id = new AbstractID().toString(); final TypeSerializer<T> serializer = getType().createSerializer(getExecutionEnvironment().getConfig()); this.output(new Utils.CollectHelper<>(id, serializer)).name("collect()"); JobExecutionResult res = getExecutionEnvironment().execute(); ArrayList<byte[]> accResult = res.getAccumulatorResult(id); if (accResult != null) { try { return SerializedListAccumulator.deserializeList(accResult, serializer); } catch (ClassNotFoundException e) { throw new RuntimeException("Cannot find type class of collected data type.", e); } catch (IOException e) { throw new RuntimeException("Serialization error while deserializing collected data", e); } } else { throw new RuntimeException("The call to collect() could not retrieve the DataSet."); } }
public static void main(final String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); // get the data set final DataSet<StringTriple> file = getDataSet(env, params); // filter lines with empty fields final DataSet<StringTriple> filteredLines = file.filter(new EmptyFieldFilter()); // Here, we could do further processing with the filtered lines... JobExecutionResult result; // output the filtered lines if (params.has("output")) { filteredLines.writeAsCsv(params.get("output")); // execute program result = env.execute("Accumulator example"); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); filteredLines.print(); result = env.getLastJobExecutionResult(); } // get the accumulator result via its registration key final List<Integer> emptyFields = result.getAccumulatorResult(EMPTY_FIELD_ACCUMULATOR); System.out.format("Number of detected empty fields per column: %s\n", emptyFields); }
public static void main(final String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); // get the data set final DataSet<StringTriple> file = getDataSet(env, params); // filter lines with empty fields final DataSet<StringTriple> filteredLines = file.filter(new EmptyFieldFilter()); // Here, we could do further processing with the filtered lines... JobExecutionResult result; // output the filtered lines if (params.has("output")) { filteredLines.writeAsCsv(params.get("output")); // execute program result = env.execute("Accumulator example"); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); filteredLines.print(); result = env.getLastJobExecutionResult(); } // get the accumulator result via its registration key final List<Integer> emptyFields = result.getAccumulatorResult(EMPTY_FIELD_ACCUMULATOR); System.out.format("Number of detected empty fields per column: %s\n", emptyFields); }