private void runTestWrite(String[] elems, String header, String footer, int numShards) throws Exception { String outputName = "file.txt"; Path baseDir = Files.createTempDirectory(tempFolder.getRoot().toPath(), "testwrite"); ResourceId baseFilename = FileBasedSink.convertToFileResourceIfPossible(baseDir.resolve(outputName).toString()); PCollection<String> input = p.apply("CreateInput", Create.of(Arrays.asList(elems)).withCoder(StringUtf8Coder.of())); TextIO.TypedWrite<String, Void> write = TextIO.write().to(baseFilename).withHeader(header).withFooter(footer).withOutputFilenames(); if (numShards == 1) { write = write.withoutSharding(); } else if (numShards > 0) { write = write.withNumShards(numShards).withShardNameTemplate(ShardNameTemplate.INDEX_OF_MAX); } input.apply(write); p.run(); assertOutputFiles( elems, header, footer, numShards, baseFilename, firstNonNull( write.getShardTemplate(), DefaultFilenamePolicy.DEFAULT_UNWINDOWED_SHARD_TEMPLATE)); }
@Test @Category(NeedsRunner.class) public void testWindowedWritesWithOnceTrigger() throws Throwable { // Tests for https://issues.apache.org/jira/browse/BEAM-3169 PCollection<String> data = p.apply(Create.of("0", "1", "2")) .apply( Window.<String>into(FixedWindows.of(Duration.standardSeconds(1))) // According to this trigger, all data should be written. // However, the continuation of this trigger is elementCountAtLeast(1), // so with a buggy implementation that used a GBK before renaming files, // only 1 file would be renamed. .triggering(AfterPane.elementCountAtLeast(3)) .withAllowedLateness(Duration.standardMinutes(1)) .discardingFiredPanes()); PCollection<String> filenames = data.apply( TextIO.write() .to(new File(tempFolder.getRoot(), "windowed-writes").getAbsolutePath()) .withNumShards(2) .withWindowedWrites() .<Void>withOutputFilenames()) .getPerDestinationOutputFilenames() .apply(Values.create()); PAssert.that(filenames.apply(TextIO.readAll())).containsInAnyOrder("0", "1", "2"); p.run(); }