@Test @Category(NeedsRunner.class) public void testReadWatchForNewFiles() throws IOException, InterruptedException { final Path basePath = tempFolder.getRoot().toPath().resolve("readWatch"); basePath.toFile().mkdir(); p.apply(GenerateSequence.from(0).to(10).withRate(1, Duration.millis(100))) .apply( Window.<Long>into(FixedWindows.of(Duration.millis(150))) .withAllowedLateness(Duration.ZERO) .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1))) .discardingFiredPanes()) .apply(ToString.elements()) .apply( TextIO.write() .to(basePath.resolve("data").toString()) .withNumShards(1) .withWindowedWrites()); PCollection<String> lines = p.apply( TextIO.read() .from(basePath.resolve("*").toString()) .watchForNewFiles( Duration.millis(100), Watch.Growth.afterTimeSinceNewOutput(Duration.standardSeconds(3)))); PAssert.that(lines).containsInAnyOrder("0", "1", "2", "3", "4", "5", "6", "7", "8", "9"); p.run(); } }
@Test @Category(NeedsRunner.class) public void testWindowedWritesWithOnceTrigger() throws Throwable { // Tests for https://issues.apache.org/jira/browse/BEAM-3169 PCollection<String> data = p.apply(Create.of("0", "1", "2")) .apply( Window.<String>into(FixedWindows.of(Duration.standardSeconds(1))) // According to this trigger, all data should be written. // However, the continuation of this trigger is elementCountAtLeast(1), // so with a buggy implementation that used a GBK before renaming files, // only 1 file would be renamed. .triggering(AfterPane.elementCountAtLeast(3)) .withAllowedLateness(Duration.standardMinutes(1)) .discardingFiredPanes()); PCollection<String> filenames = data.apply( TextIO.write() .to(new File(tempFolder.getRoot(), "windowed-writes").getAbsolutePath()) .withNumShards(2) .withWindowedWrites() .<Void>withOutputFilenames()) .getPerDestinationOutputFilenames() .apply(Values.create()); PAssert.that(filenames.apply(TextIO.readAll())).containsInAnyOrder("0", "1", "2"); p.run(); }
.apply(TextIO.write().to(options.getOutput()).withWindowedWrites().withNumShards(1));
"Write File(s)", TextIO.write() .withWindowedWrites() .withNumShards(options.getNumShards()) .to(
public final static void main(String args[]) throws Exception { final Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline pipeline = Pipeline.create(options); pipeline .apply(KafkaIO.<Long, String>read() .withBootstrapServers(options.getBootstrap()) .withTopic(options.getTopic()) .withKeyDeserializer(LongDeserializer.class) .withValueDeserializer(StringDeserializer.class) .withoutMetadata()) .apply(Values.<String>create()) .apply(Window.<String>into(FixedWindows.of(Duration.standardSeconds(10))) .triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.standardSeconds(10)))) .withAllowedLateness(Duration.ZERO) .discardingFiredPanes() ) .apply(TextIO.write() .to(options.getOutput()) .withWindowedWrites() .withNumShards(1)); pipeline.run().waitUntilFinish(); } }
public final static void main(String[] args) throws Exception { ActiveMQConnectionFactory connectionFactory = new ActiveMQConnectionFactory("tcp://localhost:61616"); PipelineOptions options = PipelineOptionsFactory.fromArgs(args).create(); Pipeline pipeline = Pipeline.create(options); pipeline .apply(JmsIO.read().withConnectionFactory(connectionFactory).withQueue("BEAM")) .apply(ParDo.of(new DoFn<JmsRecord, String>() { @ProcessElement public void processElement(ProcessContext processContext) { JmsRecord element = processContext.element(); processContext.output(element.getPayload()); } })) .apply(Window.<String>into(FixedWindows.of(Duration.standardSeconds(30)))) .apply(TextIO.write() .to("hdfs://localhost/uc2") .withWindowedWrites() .withNumShards(1)); pipeline.run(); } }
@Test public void shouldRecognizeAndTranslateStreamingPipeline() { FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class); options.setRunner(TestFlinkRunner.class); options.setFlinkMaster("[auto]"); FlinkPipelineExecutionEnvironment flinkEnv = new FlinkPipelineExecutionEnvironment(options); Pipeline pipeline = Pipeline.create(); pipeline .apply(GenerateSequence.from(0).withRate(1, Duration.standardSeconds(1))) .apply( ParDo.of( new DoFn<Long, String>() { @ProcessElement public void processElement(ProcessContext c) throws Exception { c.output(Long.toString(c.element())); } })) .apply(Window.into(FixedWindows.of(Duration.standardHours(1)))) .apply(TextIO.write().withNumShards(1).withWindowedWrites().to("/dummy/path")); flinkEnv.translate(pipeline); // no exception should be thrown }
public final static void main(String[] args) throws Exception { PipelineOptions options = PipelineOptionsFactory.fromArgs(args).create(); Pipeline pipeline = Pipeline.create(options); pipeline .apply(MqttIO.read().withConnectionConfiguration(MqttIO.ConnectionConfiguration.create("tcp://localhost:1883", "BEAM", "BEAM")).withMaxNumRecords(5)) .apply(ParDo.of(new DoFn<byte[], String>() { @ProcessElement public void processElement(ProcessContext processContext) { byte[] element = processContext.element(); processContext.output(new String(element)); } })) .apply(Window.<String>into(FixedWindows.of(Duration.standardSeconds(30)))) .apply(TextIO.write() .to("hdfs://localhost/uc2") .withWindowedWrites() .withNumShards(1)); pipeline.run(); } }
@Test public void shouldRecognizeAndTranslateStreamingPipeline() { FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class); options.setRunner(TestFlinkRunner.class); options.setFlinkMaster("[auto]"); FlinkPipelineExecutionEnvironment flinkEnv = new FlinkPipelineExecutionEnvironment(options); Pipeline pipeline = Pipeline.create(); pipeline .apply(GenerateSequence.from(0).withRate(1, Duration.standardSeconds(1))) .apply( ParDo.of( new DoFn<Long, String>() { @ProcessElement public void processElement(ProcessContext c) throws Exception { c.output(Long.toString(c.element())); } })) .apply(Window.into(FixedWindows.of(Duration.standardHours(1)))) .apply(TextIO.write().withNumShards(1).withWindowedWrites().to("/dummy/path")); flinkEnv.translate(pipeline); // no exception should be thrown }
@Override public PDone expand(final PCollection<String> input) { final ResourceId resource = FileBasedSink.convertToFileResourceIfPossible(filenamePrefix); TextIO.Write write = TextIO.write() .to(new PerWindowFiles(resource)) .withTempDirectory(resource.getCurrentDirectory()) .withWindowedWrites(); if (numShards != null) { write = write.withNumShards(numShards); } return input.apply(write); } /**
@Override public PDone expand(PCollection<String> input) { ResourceId resource = FileBasedSink.convertToFileResourceIfPossible(filenamePrefix); TextIO.Write write = TextIO.write() .to(new PerWindowFiles(resource)) .withTempDirectory(resource.getCurrentDirectory()) .withWindowedWrites(); if (numShards != null) { write = write.withNumShards(numShards); } return input.apply(write); }
@Override public PDone expand(PCollection<String> input) { // Verify that the input has a compatible window type. checkArgument( input.getWindowingStrategy().getWindowFn().windowCoder() == IntervalWindow.getCoder()); ResourceId resource = FileBasedSink.convertToFileResourceIfPossible(filenamePrefix); return input.apply( TextIO.write() .to(new PerWindowFiles(resource)) .withTempDirectory(resource.getCurrentDirectory()) .withWindowedWrites() .withNumShards(3)); } }