PCollection<String> data = pipeline.apply("ReadFromGDELTFile", TextIO.read().from(options.getInput()));
@Test public void testInitialSplitGzipModeTxt() throws Exception { PipelineOptions options = TestPipeline.testingPipelineOptions(); long desiredBundleSize = 1000; File largeTxt = writeToFile(LARGE, tempFolder, "large.txt", UNCOMPRESSED); // Sanity check: file is at least 2 bundles long. assertThat(largeTxt.length(), greaterThan(2 * desiredBundleSize)); FileBasedSource<String> source = TextIO.read().from(largeTxt.getPath()).withCompression(GZIP).getSource(); List<? extends FileBasedSource<String>> splits = source.split(desiredBundleSize, options); // Exactly 1 split, even though splittable text file, since using GZIP mode. assertThat(splits, hasSize(equalTo(1))); SourceTestUtils.assertSourcesEqualReferenceSource(source, splits, options); }
"ReadFromSource", TextIO.read() .from(options.getInputFilePattern()) .watchForNewFiles(DEFAULT_POLL_INTERVAL, Growth.never()))
.apply("ReadFromHDFS", TextIO.read().from(options.getInput().toString()));
p.apply(TextIO.read().from(options.getInputFile())) .apply(ParDo.of(new ExtractHashtags())) .apply(Window.into(windowFn))
.from(options.getSitesFilepath())) .apply(new SitesToShards.SitesToStreamVariantsShardsTransform(prototype));
.apply("Read from source", TextIO.read().from(options.getInputFilePattern())) .apply( TransformTextViaJavascript.newBuilder()
p.apply("ReadMyFile", TextIO.read().from(inputFile.getPath())) .apply(sample) .apply(Flatten.iterables())
.from(options.getSitesFilepath())) .apply(new SitesToShards.SitesToStreamVariantsShardsTransform(prototype));
p.apply(TextIO.read().from("gs://apache-beam-samples/shakespeare/*"))
if(null != options.getSitesFilepath()) { requests = p.apply("ReadSites", TextIO.read().from(options.getSitesFilepath())) .apply(new SitesToShards.SitesToStreamVariantsShardsTransform(prototype)); } else {
.apply(TextIO.read().from(options.getInput()))
p.apply(TextIO.read().from(inputFilePath))
p.apply("ReadLines", TextIO.read().from(options.getInput())) .apply("ParseVariantIds", ParDo.of(new DoFn<String, String>() { @ProcessElement
p.apply("ReadLines", TextIO.read().from(options.getInput())) .apply("ParseVariantIds", ParDo.of(new DoFn<String, String>() { @ProcessElement
pipeline .apply(TextIO.read().from(options.getInputFile()))
if(null != options.getSitesFilepath()) { requests = p.apply("ReadSites", TextIO.read().from(options.getSitesFilepath())) .apply(new SitesToShards.SitesToStreamVariantsShardsTransform(prototype)); } else {
p.apply("ReadMyFile", TextIO.read().from(options.getInputFile())) .apply("TransformParsingsToBigtable", ParDo.of(MUTATION_TRANSFORM)) .apply("WriteToBigtable", CloudBigtableIO.writeToTable(config));
.apply("ReadFromGDELTFile", TextIO.read().from(options.getInput())) .apply("TakeASample", Sample.<String>any(10)); read.apply(ParDo.of(new DoFn<String, Void>() {
return pipeline.apply(read).apply(MapElements.into(TypeDescriptor.of(String.class)).via(KV::getValue)); } else { return pipeline.apply(TextIO.read().from(path));