@Override public void populateDisplayData(DisplayData.Builder builder) { super.populateDisplayData(builder); builder .add( DisplayData.item("compressionType", getCompression().toString()) .withLabel("Compression Type")) .addIfNotNull(DisplayData.item("filePattern", getFilepattern()).withLabel("File Pattern")) .include("matchConfiguration", getMatchConfiguration()) .addIfNotNull( DisplayData.item("delimiter", Arrays.toString(getDelimiter())) .withLabel("Custom delimiter to split records")); } }
@Override public PCollection<String> expand(PBegin input) { checkNotNull(getFilepattern(), "need to set the filepattern of a TextIO.Read transform"); if (getMatchConfiguration().getWatchInterval() == null && !getHintMatchesManyFiles()) { return input.apply("Read", org.apache.beam.sdk.io.Read.from(getSource())); } // All other cases go through ReadAll. return input .apply("Create filepattern", Create.ofProvider(getFilepattern(), StringUtf8Coder.of())) .apply( "Via ReadAll", readAll() .withCompression(getCompression()) .withMatchConfiguration(getMatchConfiguration()) .withDelimiter(getDelimiter())); }
/** See {@link MatchConfiguration#withEmptyMatchTreatment}. */ public Read withEmptyMatchTreatment(EmptyMatchTreatment treatment) { return withMatchConfiguration(getMatchConfiguration().withEmptyMatchTreatment(treatment)); }
/** * See {@link MatchConfiguration#continuously}. * * <p>This works only in runners supporting {@link Kind#SPLITTABLE_DO_FN}. */ @Experimental(Kind.SPLITTABLE_DO_FN) public Read watchForNewFiles( Duration pollInterval, TerminationCondition<String, ?> terminationCondition) { return withMatchConfiguration( getMatchConfiguration().continuously(pollInterval, terminationCondition)); }
protected FileBasedSource<String> getSource() { return CompressedSource.from( new TextSource( getFilepattern(), getMatchConfiguration().getEmptyMatchTreatment(), getDelimiter())) .withCompression(getCompression()); }