private <IN, OT extends StreamOperator<IN>> StreamTask<IN, OT> createMockTask( StreamConfig streamConfig, Environment environment) { final Object checkpointLock = new Object(); @SuppressWarnings("unchecked") StreamTask<IN, OT> mockTask = mock(StreamTask.class); when(mockTask.getName()).thenReturn("Mock Task"); when(mockTask.getCheckpointLock()).thenReturn(checkpointLock); when(mockTask.getConfiguration()).thenReturn(streamConfig); when(mockTask.getEnvironment()).thenReturn(environment); when(mockTask.getExecutionConfig()).thenReturn(new ExecutionConfig().enableObjectReuse()); return mockTask; }
private static <T, B extends CopyableIterator<T>> void testReducePerformance (B iterator, TypeInformation<T> typeInfo, CombineHint hint, int numRecords, boolean print) throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().enableObjectReuse(); @SuppressWarnings("unchecked") DataSet<T> output = env.fromParallelCollection(new SplittableRandomIterator<T, B>(numRecords, iterator), typeInfo) .groupBy("0") .reduce(new SumReducer()).setCombineHint(hint); long start = System.currentTimeMillis(); System.out.println(output.count()); long end = System.currentTimeMillis(); if (print) { System.out.println("=== Time for " + iterator.getClass().getSimpleName() + " with hint " + hint.toString() + ": " + (end - start) + "ms ==="); } }
private void testOuterJoin(List<String> leftInput, List<String> rightInput, List<String> expected) throws Exception { executionConfig.disableObjectReuse(); List<String> resultSafe = baseOperator.executeOnCollections(leftInput, rightInput, runtimeContext, executionConfig); executionConfig.enableObjectReuse(); List<String> resultRegular = baseOperator.executeOnCollections(leftInput, rightInput, runtimeContext, executionConfig); assertEquals(expected, resultSafe); assertEquals(expected, resultRegular); assertTrue(joiner.opened.get()); assertTrue(joiner.closed.get()); }
@Test public void testMultiChainingWithObjectReuse() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.getConfig().enableObjectReuse(); testMultiChaining(env); }
@Test public void testMultiChainingWithSplitWithObjectReuse() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.getConfig().enableObjectReuse(); testMultiChainingWithSplit(env); }
private void testExecuteOnCollection(FlatMapFunction<String, String> udf, List<String> input, boolean mutableSafe) throws Exception { ExecutionConfig executionConfig = new ExecutionConfig(); if (mutableSafe) { executionConfig.disableObjectReuse(); } else { executionConfig.enableObjectReuse(); } final TaskInfo taskInfo = new TaskInfo("Test UDF", 4, 0, 4, 0); // run on collections final List<String> result = getTestFlatMapOperator(udf) .executeOnCollections(input, new RuntimeUDFContext( taskInfo, null, executionConfig, new HashMap<String, Future<Path>>(), new HashMap<String, Accumulator<?, ?>>(), new UnregisteredMetricsGroup()), executionConfig); Assert.assertEquals(input.size(), result.size()); Assert.assertEquals(input, result); }
@Test public void testMapPlain() { try { final MapFunction<String, Integer> parser = new MapFunction<String, Integer>() { @Override public Integer map(String value) { return Integer.parseInt(value); } }; MapOperatorBase<String, Integer, MapFunction<String, Integer>> op = new MapOperatorBase<String, Integer, MapFunction<String,Integer>>( parser, new UnaryOperatorInformation<String, Integer>(BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO), "TestMapper"); List<String> input = new ArrayList<String>(asList("1", "2", "3", "4", "5", "6")); ExecutionConfig executionConfig = new ExecutionConfig(); executionConfig.disableObjectReuse(); List<Integer> resultMutableSafe = op.executeOnCollections(input, null, executionConfig); executionConfig.enableObjectReuse(); List<Integer> resultRegular = op.executeOnCollections(input, null, executionConfig); assertEquals(asList(1, 2, 3, 4, 5, 6), resultMutableSafe); assertEquals(asList(1, 2, 3, 4, 5, 6), resultRegular); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
public TestEnvironment( JobExecutor jobExecutor, int parallelism, boolean isObjectReuseEnabled, Collection<Path> jarFiles, Collection<URL> classPaths) { this.jobExecutor = Preconditions.checkNotNull(jobExecutor); this.jarFiles = Preconditions.checkNotNull(jarFiles); this.classPaths = Preconditions.checkNotNull(classPaths); setParallelism(parallelism); // disabled to improve build time getConfig().setCodeAnalysisMode(CodeAnalysisMode.DISABLE); if (isObjectReuseEnabled) { getConfig().enableObjectReuse(); } else { getConfig().disableObjectReuse(); } lastEnv = null; }
@Before public void setupEnvironment() { TestEnvironment testEnvironment; switch(mode){ case CLUSTER: // This only works because of the quirks we built in the TestEnvironment. // We should refactor this in the future!!! testEnvironment = miniClusterResource.getTestEnvironment(); testEnvironment.getConfig().disableObjectReuse(); testEnvironment.setAsContext(); break; case CLUSTER_OBJECT_REUSE: // This only works because of the quirks we built in the TestEnvironment. // We should refactor this in the future!!! testEnvironment = miniClusterResource.getTestEnvironment(); testEnvironment.getConfig().enableObjectReuse(); testEnvironment.setAsContext(); break; case COLLECTION: new CollectionTestEnvironment().setAsContext(); break; } }
@Test public void testOperatorChainWithObjectReuseAndNoOutputOperators() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.getConfig().enableObjectReuse(); DataStream<Integer> input = env.fromElements(1, 2, 3); input.flatMap(new FlatMapFunction<Integer, Integer>() { @Override public void flatMap(Integer value, Collector<Integer> out) throws Exception { out.collect(value << 1); } }); env.execute(); } }
@Test public void testDataSourcePlain() { try { TestNonRichInputFormat in = new TestNonRichInputFormat(); GenericDataSourceBase<String, TestNonRichInputFormat> source = new GenericDataSourceBase<String, TestNonRichInputFormat>( in, new OperatorInformation<String>(BasicTypeInfo.STRING_TYPE_INFO), "testSource"); ExecutionConfig executionConfig = new ExecutionConfig(); executionConfig.disableObjectReuse(); List<String> resultMutableSafe = source.executeOnCollections(null, executionConfig); in.reset(); executionConfig.enableObjectReuse(); List<String> resultRegular = source.executeOnCollections(null, executionConfig); assertEquals(asList(TestIOData.NAMES), resultMutableSafe); assertEquals(asList(TestIOData.NAMES), resultRegular); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
public void testGroupedReduce(ExecutionEnvironment env) throws Exception { /* * Test ReduceCombineDriver and ReduceDriver */ LOG.info("Testing grouped reduce"); env.getConfig().enableObjectReuse(); List<Tuple2<IntValue, IntValue>> enabledResult = getDataSet(env) .groupBy(0) .reduce(new OverwriteObjectsReduce(true)) .collect(); Collections.sort(enabledResult, comparator); env.getConfig().disableObjectReuse(); List<Tuple2<IntValue, IntValue>> disabledResult = getDataSet(env) .groupBy(0) .reduce(new OverwriteObjectsReduce(true)) .collect(); Collections.sort(disabledResult, comparator); Assert.assertThat(disabledResult, is(enabledResult)); }
@Test public void testDataSourcePlain() { try { TestNonRichOutputFormat out = new TestNonRichOutputFormat(); GenericDataSinkBase<String> sink = new GenericDataSinkBase<String>( out, new UnaryOperatorInformation<String, Nothing>(BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.getInfoFor(Nothing.class)), "test_sink"); sink.setInput(source); ExecutionConfig executionConfig = new ExecutionConfig(); executionConfig.disableObjectReuse(); in.reset(); sink.executeOnCollections(asList(TestIOData.NAMES), null, executionConfig); assertEquals(out.output, asList(TestIOData.NAMES)); executionConfig.enableObjectReuse(); out.clear(); in.reset(); sink.executeOnCollections(asList(TestIOData.NAMES), null, executionConfig); assertEquals(out.output, asList(TestIOData.NAMES)); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test public void testKeySelection() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().enableObjectReuse(); Path in = new Path(inFile.getAbsoluteFile().toURI()); AvroInputFormat<User> users = new AvroInputFormat<>(in, User.class); DataSet<User> usersDS = env.createInput(users); DataSet<Tuple2<String, Integer>> res = usersDS .groupBy("name") .reduceGroup((GroupReduceFunction<User, Tuple2<String, Integer>>) (values, out) -> { for (User u : values) { out.collect(new Tuple2<>(u.getName().toString(), 1)); } }) .returns(Types.TUPLE(Types.STRING, Types.INT)); res.writeAsText(resultPath); env.execute("Avro Key selection"); expected = "(Alyssa,1)\n(Charlie,1)\n"; }
public void testReduce(ExecutionEnvironment env) throws Exception { /* * Test ChainedAllReduceDriver */ LOG.info("Testing reduce"); env.getConfig().enableObjectReuse(); Tuple2<IntValue, IntValue> enabledResult = getDataSet(env) .reduce(new OverwriteObjectsReduce(false)) .collect() .get(0); env.getConfig().disableObjectReuse(); Tuple2<IntValue, IntValue> disabledResult = getDataSet(env) .reduce(new OverwriteObjectsReduce(false)) .collect() .get(0); Assert.assertEquals(NUMBER_OF_ELEMENTS, enabledResult.f1.getValue()); Assert.assertEquals(NUMBER_OF_ELEMENTS, disabledResult.f1.getValue()); Assert.assertEquals(disabledResult, enabledResult); }
@Test public void testKeyedReduce() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); if (objectReuse) { env.getConfig().enableObjectReuse(); } else { env.getConfig().disableObjectReuse(); } DataSet<Tuple2<String, Integer>> input = env.fromCollection(REDUCE_DATA); DataSet<Tuple2<String, Integer>> result = input .groupBy(0) .reduce(new ReduceFunction<Tuple2<String, Integer>>() { @Override public Tuple2<String, Integer> reduce(Tuple2<String, Integer> value1, Tuple2<String, Integer> value2) { value2.f1 += value1.f1; return value2; } }); Tuple2<String, Integer> res = result.collect().get(0); assertEquals(new Tuple2<>("a", 60), res); }
@Test public void testGlobalReduce() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); if (objectReuse) { env.getConfig().enableObjectReuse(); } else { env.getConfig().disableObjectReuse(); } DataSet<Tuple2<String, Integer>> input = env.fromCollection(REDUCE_DATA); DataSet<Tuple2<String, Integer>> result = input.reduce( new ReduceFunction<Tuple2<String, Integer>>() { @Override public Tuple2<String, Integer> reduce( Tuple2<String, Integer> value1, Tuple2<String, Integer> value2) { if (value1.f1 % 3 == 0) { value1.f1 += value2.f1; return value1; } else { value2.f1 += value1.f1; return value2; } } }); Tuple2<String, Integer> res = result.collect().get(0); assertEquals(new Tuple2<>("a", 60), res); }
@Test public void testSideOutputWithMultipleConsumersWithObjectReuse() throws Exception { final OutputTag<String> sideOutputTag = new OutputTag<String>("side"){}; TestListResultSink<String> sideOutputResultSink1 = new TestListResultSink<>(); TestListResultSink<String> sideOutputResultSink2 = new TestListResultSink<>(); TestListResultSink<Integer> resultSink = new TestListResultSink<>(); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.getConfig().enableObjectReuse(); env.setParallelism(3); DataStream<Integer> dataStream = env.fromCollection(elements); SingleOutputStreamOperator<Integer> passThroughtStream = dataStream .process(new ProcessFunction<Integer, Integer>() { private static final long serialVersionUID = 1L; @Override public void processElement( Integer value, Context ctx, Collector<Integer> out) throws Exception { out.collect(value); ctx.output(sideOutputTag, "sideout-" + String.valueOf(value)); } }); passThroughtStream.getSideOutput(sideOutputTag).addSink(sideOutputResultSink1); passThroughtStream.getSideOutput(sideOutputTag).addSink(sideOutputResultSink2); passThroughtStream.addSink(resultSink); env.execute(); assertEquals(Arrays.asList("sideout-1", "sideout-2", "sideout-3", "sideout-4", "sideout-5"), sideOutputResultSink1.getSortedResult()); assertEquals(Arrays.asList("sideout-1", "sideout-2", "sideout-3", "sideout-4", "sideout-5"), sideOutputResultSink2.getSortedResult()); assertEquals(Arrays.asList(1, 2, 3, 4, 5), resultSink.getSortedResult()); }
@Test public void testDifferentSideOutputTypes() throws Exception { final OutputTag<String> sideOutputTag1 = new OutputTag<String>("string"){}; final OutputTag<Integer> sideOutputTag2 = new OutputTag<Integer>("int"){}; TestListResultSink<String> sideOutputResultSink1 = new TestListResultSink<>(); TestListResultSink<Integer> sideOutputResultSink2 = new TestListResultSink<>(); TestListResultSink<Integer> resultSink = new TestListResultSink<>(); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.getConfig().enableObjectReuse(); env.setParallelism(3); DataStream<Integer> dataStream = env.fromCollection(elements); SingleOutputStreamOperator<Integer> passThroughtStream = dataStream .process(new ProcessFunction<Integer, Integer>() { private static final long serialVersionUID = 1L; @Override public void processElement( Integer value, Context ctx, Collector<Integer> out) throws Exception { out.collect(value); ctx.output(sideOutputTag1, "sideout-" + String.valueOf(value)); ctx.output(sideOutputTag2, 13); } }); passThroughtStream.getSideOutput(sideOutputTag1).addSink(sideOutputResultSink1); passThroughtStream.getSideOutput(sideOutputTag2).addSink(sideOutputResultSink2); passThroughtStream.addSink(resultSink); env.execute(); assertEquals(Arrays.asList("sideout-1", "sideout-2", "sideout-3", "sideout-4", "sideout-5"), sideOutputResultSink1.getSortedResult()); assertEquals(Arrays.asList(13, 13, 13, 13, 13), sideOutputResultSink2.getSortedResult()); assertEquals(Arrays.asList(1, 2, 3, 4, 5), resultSink.getSortedResult()); }
private static void runPartitioningProgram(int parallelism) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); env.getConfig().enableObjectReuse(); env.setBufferTimeout(5L); env.enableCheckpointing(1000, CheckpointingMode.AT_LEAST_ONCE); env .addSource(new TimeStampingSource()) .map(new IdMapper<Tuple2<Long, Long>>()) .keyBy(0) .addSink(new TimestampingSink()); env.execute("Partitioning Program"); }