@Override public PCollection<T> expand(PBegin input) { checkNotNull(getFilepattern(), "filepattern"); checkNotNull(getSchema(), "schema"); if (getMatchConfiguration().getWatchInterval() == null && !getHintMatchesManyFiles()) { return input.apply( "Read", org.apache.beam.sdk.io.Read.from( createSource( getFilepattern(), getMatchConfiguration().getEmptyMatchTreatment(), getRecordClass(), getSchema()))); } // All other cases go through ReadAll. ReadAll<T> readAll = (getRecordClass() == GenericRecord.class) ? (ReadAll<T>) readAllGenericRecords(getSchema()) : readAll(getRecordClass()); readAll = readAll.withMatchConfiguration(getMatchConfiguration()); return input .apply("Create filepattern", Create.ofProvider(getFilepattern(), StringUtf8Coder.of())) .apply("Via ReadAll", readAll); }
@Override public PCollection<MatchResult.Metadata> expand(PCollection<String> input) { PCollection<MatchResult.Metadata> res; if (getConfiguration().getWatchInterval() == null) { res = input.apply( "Match filepatterns", ParDo.of(new MatchFn(getConfiguration().getEmptyMatchTreatment()))); } else { res = input .apply( "Continuously match filepatterns", Watch.growthOf( Contextful.of(new MatchPollFn(), Requirements.empty()), new ExtractFilenameFn()) .withPollInterval(getConfiguration().getWatchInterval()) .withTerminationPerInput(getConfiguration().getWatchTerminationCondition())) .apply(Values.create()); } return res.apply(Reshuffle.viaRandomKey()); }
protected FileBasedSource<String> getSource() { return CompressedSource.from( new TextSource( getFilepattern(), getMatchConfiguration().getEmptyMatchTreatment(), getDelimiter())) .withCompression(getCompression()); }
@Override public void populateDisplayData(DisplayData.Builder builder) { builder .add( DisplayData.item("emptyMatchTreatment", getEmptyMatchTreatment().toString()) .withLabel("Treatment of filepatterns that match no files")) .addIfNotNull( DisplayData.item("watchForNewFilesInterval", getWatchInterval()) .withLabel("Interval to watch for new files")); } }