@VisibleForTesting protected void replaceTransforms(Pipeline pipeline) { boolean streaming = options.isStreaming() || containsUnboundedPCollection(pipeline); // Ensure all outputs of all reads are consumed before potentially replacing any // Read PTransforms UnconsumedReads.ensureAllReadsConsumed(pipeline); pipeline.replaceAll(getOverrides(streaming)); }
@Override public ApexRunnerResult run(final Pipeline pipeline) { pipeline.replaceAll(getOverrides());
public static void performDirectOverrides(Pipeline p) { p.replaceAll( DirectRunner.fromOptions(PipelineOptionsFactory.create().as(DirectOptions.class)) .defaultTransformOverrides()); }
@Test public void shouldUseTransformOverrides() { boolean[] testParameters = {true, false}; for (boolean streaming : testParameters) { FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class); options.setStreaming(streaming); options.setRunner(FlinkRunner.class); FlinkPipelineExecutionEnvironment flinkEnv = new FlinkPipelineExecutionEnvironment(options); Pipeline p = Mockito.spy(Pipeline.create(options)); flinkEnv.translate(p); ArgumentCaptor<ImmutableList> captor = ArgumentCaptor.forClass(ImmutableList.class); Mockito.verify(p).replaceAll(captor.capture()); ImmutableList<PTransformOverride> overridesList = captor.getValue(); assertThat(overridesList.isEmpty(), is(false)); assertThat( overridesList.size(), is(FlinkTransformOverrides.getDefaultOverrides(options).size())); } }
/** * Depending on if the job is a Streaming or a Batch one, this method creates * the necessary execution environment and pipeline translator, and translates * the {@link org.apache.beam.sdk.values.PCollection} program into * a {@link org.apache.flink.api.java.DataSet} * or {@link org.apache.flink.streaming.api.datastream.DataStream} one. * */ public void translate(FlinkRunner flinkRunner, Pipeline pipeline) { this.flinkBatchEnv = null; this.flinkStreamEnv = null; pipeline.replaceAll(FlinkTransformOverrides.getDefaultOverrides(options.isStreaming())); PipelineTranslationOptimizer optimizer = new PipelineTranslationOptimizer(TranslationMode.BATCH, options); optimizer.translate(pipeline); TranslationMode translationMode = optimizer.getTranslationMode(); FlinkPipelineTranslator translator; if (translationMode == TranslationMode.STREAMING) { this.flinkStreamEnv = createStreamExecutionEnvironment(); translator = new FlinkStreamingPipelineTranslator(flinkRunner, flinkStreamEnv, options); } else { this.flinkBatchEnv = createBatchExecutionEnvironment(); translator = new FlinkBatchPipelineTranslator(flinkBatchEnv, options); } translator.translate(pipeline); }
@Override public PipelineResult run(Pipeline pipeline) { pipeline.replaceAll(ImmutableList.of(JavaReadViaImpulse.boundedOverride()));
pipeline.replaceAll(SparkTransformOverrides.getDefaultOverrides(mOptions.isStreaming()));
TranslationMode translationMode = optimizer.getTranslationMode(); pipeline.replaceAll( FlinkTransformOverrides.getDefaultOverrides(translationMode == TranslationMode.STREAMING));
/** * Depending on if the job is a Streaming or a Batch one, this method creates the necessary * execution environment and pipeline translator, and translates the {@link * org.apache.beam.sdk.values.PCollection} program into a {@link * org.apache.flink.api.java.DataSet} or {@link * org.apache.flink.streaming.api.datastream.DataStream} one. */ public void translate(Pipeline pipeline) { this.flinkBatchEnv = null; this.flinkStreamEnv = null; PipelineTranslationModeOptimizer optimizer = new PipelineTranslationModeOptimizer(options); optimizer.translate(pipeline); // Needs to be done before creating the Flink ExecutionEnvironments prepareFilesToStageForRemoteClusterExecution(options); FlinkPipelineTranslator translator; if (options.isStreaming()) { this.flinkStreamEnv = FlinkExecutionEnvironments.createStreamExecutionEnvironment( options, options.getFilesToStage()); translator = new FlinkStreamingPipelineTranslator(flinkStreamEnv, options); } else { this.flinkBatchEnv = FlinkExecutionEnvironments.createBatchExecutionEnvironment( options, options.getFilesToStage()); translator = new FlinkBatchPipelineTranslator(flinkBatchEnv, options); } pipeline.replaceAll(FlinkTransformOverrides.getDefaultOverrides(options)); translator.translate(pipeline); }
pipeline.replaceAll(Collections.singletonList(JavaReadViaImpulse.boundedOverride()));
@Override public DirectPipelineResult run(Pipeline pipeline) { pipeline.replaceAll(defaultTransformOverrides()); MetricsEnvironment.setMetricsSupported(true); try {
public void translate(Pipeline pipeline) { List<PTransformOverride> overrides = ImmutableList.<PTransformOverride>builder() .add( PTransformOverride.of( PTransformMatchers.classEqualTo(View.CreatePCollectionView.class), new CreateStreamingGearpumpView.Factory())) .add( PTransformOverride.of( PTransformMatchers.splittableParDo(), new SplittableParDo.OverrideFactory())) .add( PTransformOverride.of( PTransformMatchers.splittableProcessKeyedBounded(), new SplittableParDoNaiveBounded.OverrideFactory())) // TODO: [BEAM-5361] Support @RequiresStableInput on Gearpump runner .add( PTransformOverride.of( PTransformMatchers.requiresStableInputParDoMulti(), UnsupportedOverrideFactory.withMessage( "Gearpump runner currently doesn't support @RequiresStableInput annotation."))) .build(); pipeline.replaceAll(overrides); pipeline.traverseTopologically(this); }
@Test public void testGBK() throws Exception { Pipeline p = Pipeline.create(); PAssert.that( p.apply(Create.of(KV.of(42, 0), KV.of(42, 1), KV.of(42, 2))) // Will create one bundle for each value, since direct runner uses 1 bundle per key .apply(Reshuffle.viaRandomKey()) // Multiple bundles will emit values onto the same key 42. // They must be processed sequentially rather than in parallel, since // the trigger firing code expects to receive values sequentially for a key. .apply(GroupByKey.create())) .satisfies( input -> { KV<Integer, Iterable<Integer>> kv = Iterables.getOnlyElement(input); assertEquals(42, kv.getKey().intValue()); assertThat(kv.getValue(), containsInAnyOrder(0, 1, 2)); return null; }); p.replaceAll(Collections.singletonList(JavaReadViaImpulse.boundedOverride())); ReferenceRunner runner = ReferenceRunner.forInProcessPipeline( PipelineTranslation.toProto(p), PipelineOptionsTranslation.toProto(PipelineOptionsFactory.create())); runner.execute(); }
KV.of("ccccc", 4))); p.replaceAll( Arrays.asList( JavaReadViaImpulse.boundedOverride(),
p.replaceAll(Collections.singletonList(JavaReadViaImpulse.boundedOverride()));
KV.of("foo", ImmutableSet.of(3))); p.replaceAll(Collections.singletonList(JavaReadViaImpulse.boundedOverride()));
p.replaceAll(Collections.singletonList(JavaReadViaImpulse.boundedOverride()));