@Override public PCollection<MatchResult.Metadata> expand(PBegin input) { return input .apply("Create filepattern", Create.ofProvider(getFilepattern(), StringUtf8Coder.of())) .apply("Via MatchAll", matchAll().withConfiguration(getConfiguration())); } }
p.apply(Create.of(basePath.resolve("*").toString())) .apply( FileIO.matchAll() .continuously( Duration.millis(100),
@Override public PCollection<String> expand(PCollection<String> input) { return input .apply(FileIO.matchAll().withConfiguration(getMatchConfiguration())) .apply( FileIO.readMatches() .withCompression(getCompression()) .withDirectoryTreatment(DirectoryTreatment.PROHIBIT)) .apply(readFiles().withDelimiter(getDelimiter())); }
@Override public PDone expand(PCollection<KV<DestinationT, String>> input) { input .apply(Values.create()) .apply(FileIO.matchAll().withEmptyMatchTreatment(EmptyMatchTreatment.DISALLOW)); return PDone.in(input.getPipeline()); } }
@Override public PCollection<T> expand(PCollection<String> input) { checkNotNull(getSchema(), "schema"); return input .apply(FileIO.matchAll().withConfiguration(getMatchConfiguration())) .apply(FileIO.readMatches().withDirectoryTreatment(DirectoryTreatment.PROHIBIT)) .apply( "Read all via FileBasedSource", new ReadAllViaFileBasedSource<>( getDesiredBundleSizeBytes(), new CreateSourceFn<>(getRecordClass(), getSchema().toString()), AvroCoder.of(getRecordClass(), getSchema()))); }
@Test @Category(NeedsRunner.class) public void testReadFiles() throws IOException { Path tempFolderPath = tempFolder.getRoot().toPath(); writeToFile(TINY, tempFolder, "readAllTiny1.zip", ZIP); writeToFile(TINY, tempFolder, "readAllTiny2.txt", UNCOMPRESSED); writeToFile(LARGE, tempFolder, "readAllLarge1.zip", ZIP); writeToFile(LARGE, tempFolder, "readAllLarge2.txt", UNCOMPRESSED); PCollection<String> lines = p.apply( Create.of( tempFolderPath.resolve("readAllTiny*").toString(), tempFolderPath.resolve("readAllLarge*").toString())) .apply(FileIO.matchAll()) .apply(FileIO.readMatches().withCompression(AUTO)) .apply(TextIO.readFiles().withDesiredBundleSizeBytes(10)); PAssert.that(lines).containsInAnyOrder(Iterables.concat(TINY, TINY, LARGE, LARGE)); p.run(); }
@Test @Category(NeedsRunner.class) public void testMatchAllDisallowEmptyExplicit() throws IOException { p.apply(Create.of(tmpFolder.getRoot().getAbsolutePath() + "/*")) .apply(FileIO.matchAll().withEmptyMatchTreatment(EmptyMatchTreatment.DISALLOW)); thrown.expectCause(isA(FileNotFoundException.class)); p.run(); }
@Test @Category(NeedsRunner.class) public void testMatchAllDisallowEmptyNonWildcard() throws IOException { p.apply(Create.of(tmpFolder.getRoot().getAbsolutePath() + "/blah")) .apply(FileIO.matchAll().withEmptyMatchTreatment(EmptyMatchTreatment.ALLOW_IF_WILDCARD)); thrown.expectCause(isA(FileNotFoundException.class)); p.run(); }
@Override public PCollection<T> expand(PCollection<String> input) { final Coder<T> coder = Parse.inferCoder(getCoder(), getParseFn(), input.getPipeline().getCoderRegistry()); final SerializableFunction<GenericRecord, T> parseFn = getParseFn(); final SerializableFunction<String, FileBasedSource<T>> createSource = new CreateParseSourceFn<>(parseFn, coder); return input .apply(FileIO.matchAll().withConfiguration(getMatchConfiguration())) .apply(FileIO.readMatches().withDirectoryTreatment(DirectoryTreatment.PROHIBIT)) .apply( "Parse all via FileBasedSource", new ReadAllViaFileBasedSource<>(getDesiredBundleSizeBytes(), createSource, coder)); }
PAssert.that( p.apply("Create existing", Create.of(tmpFolder.getRoot().getAbsolutePath() + "/*")) .apply("MatchAll existing", FileIO.matchAll())) .containsInAnyOrder(metadata(firstPath, firstSize), metadata(secondPath, secondSize)); .apply( "MatchAll non-existing ALLOW", FileIO.matchAll().withEmptyMatchTreatment(EmptyMatchTreatment.ALLOW))) .containsInAnyOrder(); .apply( "MatchAll non-existing ALLOW_IF_WILDCARD", FileIO.matchAll() .withEmptyMatchTreatment(EmptyMatchTreatment.ALLOW_IF_WILDCARD))) .containsInAnyOrder(); "Create non-existing wildcard + default", Create.of(tmpFolder.getRoot().getAbsolutePath() + "/blah*")) .apply("MatchAll non-existing default", FileIO.matchAll())) .containsInAnyOrder();
.getPerDestinationOutputFilenames() .apply(Values.create()) .apply(FileIO.matchAll()) .apply(FileIO.readMatches()) .apply(