org.apache.beam.sdk.io.Read.from java code examples

/**
 * Creates a {@code Read} transform that will read from an {@code HDFSFileSource} with the given file name or
 * pattern ("glob") using the given Hadoop {@link org.apache.hadoop.mapreduce.lib.input.FileInputFormat}, with
 * key-value types specified by the given key class and value class.
 */
public static <K, V, T extends FileInputFormat<K, V>> Read.Bounded<KV<K, V>> readFrom(String filepattern,
    Class<T> formatClass, Class<K> keyClass, Class<V> valueClass) {
  return Read.from(from(filepattern, formatClass, keyClass, valueClass));
}

 @Override
 public PCollection<T> expand(PBegin input) {
  getConfiguration().validate();
  return input.apply(org.apache.beam.sdk.io.Read.from(createSource()));
 }
}

@Override
public PCollection<String> expand(PBegin input) {
 ConnectionConfiguration connectionConfiguration = getConnectionConfiguration();
 checkState(connectionConfiguration != null, "withConnectionConfiguration() is required");
 return input.apply(
   org.apache.beam.sdk.io.Read.from(new BoundedElasticsearchSource(this, null, null, null)));
}

@Override
public PCollection<HCatRecord> expand(PBegin input) {
 checkArgument(getTable() != null, "withTable() is required");
 checkArgument(getConfigProperties() != null, "withConfigProperties() is required");
 return input.apply(org.apache.beam.sdk.io.Read.from(new BoundedHCatalogSource(this)));
}

@Test
public void failsWhenCustomUnboundedSourceIsNotSerializable() {
 thrown.expect(IllegalArgumentException.class);
 Read.from(new NotSerializableUnboundedSource());
}

@Test
public void failsWhenCustomBoundedSourceIsNotSerializable() {
 thrown.expect(IllegalArgumentException.class);
 Read.from(new NotSerializableBoundedSource());
}

@Override
public PCollection<Row> expand(PBegin input) {
 getBigtableConfig().validate();
 BigtableSource source =
   new BigtableSource(getBigtableConfig(), getRowFilter(), getKeyRanges(), null);
 return input.getPipeline().apply(org.apache.beam.sdk.io.Read.from(source));
}

@Before
public void setup() {
 source = CountingSource.unboundedWithTimestampFn(new LongToInstantFn());
 longs = p.apply(Read.from(source));
 options = PipelineOptionsFactory.create();
 context = mock(EvaluationContext.class);
 factory = new UnboundedReadEvaluatorFactory(context, options);
 output = bundleFactory.createBundle(longs);
 graph = DirectGraphs.getGraph(p);
 when(context.createBundle(longs)).thenReturn(output);
}

@Before
public void setup() {
 MockitoAnnotations.initMocks(this);
 source = CountingSource.upTo(10L);
 longs = p.apply(Read.from(source));
 options = PipelineOptionsFactory.create();
 factory =
   new BoundedReadEvaluatorFactory(
     context, options, Long.MAX_VALUE /* minimum size for dynamic splits */);
 bundleFactory = ImmutableListBundleFactory.create();
 longsProducer = DirectGraphs.getProducer(longs);
}

@Test
public void matcherProducesUnconsumedValueUnboundedRead() {
 Unbounded<Long> transform = Read.from(CountingSource.unbounded());
 pipeline.apply(transform);
 UnconsumedReads.ensureAllReadsConsumed(pipeline);
 validateConsumed();
}

@Test
public void matcherProducesUnconsumedValueBoundedRead() {
 Bounded<Long> transform = Read.from(CountingSource.upTo(20L));
 pipeline.apply(transform);
 UnconsumedReads.ensureAllReadsConsumed(pipeline);
 validateConsumed();
}

@Test
public void testForwardsDisplayData() {
 TestCountingSource src =
   new TestCountingSource(1234) {
    @Override
    public void populateDisplayData(DisplayData.Builder builder) {
     builder.add(DisplayData.item("foo", "bar"));
    }
   };
 BoundedReadFromUnboundedSource<KV<Integer, Integer>> read = Read.from(src).withMaxNumRecords(5);
 assertThat(DisplayData.from(read), includesDisplayDataFor("source", src));
}

@Override
public PCollection<IndexedRecord> read(PBegin in) {
  LazyAvroCoder<IndexedRecord> lac = LazyAvroCoder.of();
  
  ExcelHdfsFileSource source = ExcelHdfsFileSource.of(doAs, path, lac, limit, encoding, sheetName, header, footer, excelFormat.name());
  source.getExtraHadoopConfiguration().addFrom(getExtraHadoopConfiguration());
  source.setLimit(limit);
  PCollection<KV<Void, IndexedRecord>> pc1 = in.apply(Read.from(source)).setCoder(source.getDefaultOutputCoder());
  PCollection<IndexedRecord> pc2 = pc1.apply(Values.<IndexedRecord>create());
  return pc2;
}

public static void main(String[] args) {
 Pipeline p = initializePipeline(args);
 KafkaOptions options = getOptions(p);
 FlinkKafkaConsumer08<MyType> kafkaConsumer =
   new FlinkKafkaConsumer08<>(options.getKafkaAvroTopic(),
     new AvroSerializationDeserializationSchema<>(MyType.class), getKafkaProps(options));
 p
   .apply(Read.from(UnboundedFlinkSource.of(kafkaConsumer)))
     .setCoder(AvroCoder.of(MyType.class))
   .apply(ParDo.of(new PrintFn<>()));
 p.run();
}

@Test
@Category(NeedsRunner.class)
public void testBoundedSource() {
 long numElements = 1000;
 PCollection<Long> input = p.apply(Read.from(CountingSource.upTo(numElements)));
 addCountingAsserts(input, numElements);
 p.run();
}

@Test
@Category(NeedsRunner.class)
public void testDataflowFile() throws IOException {
 List<String> data = createStringDataset(3, 50);
 String fileName = "file";
 File file = createFileWithData(fileName, data);
 TestFileBasedSource source = new TestFileBasedSource(file.getPath(), 64, null);
 PCollection<String> output = p.apply("ReadFileData", Read.from(source));
 PAssert.that(output).containsInAnyOrder(data);
 p.run();
}

@Test
@Category(NeedsRunner.class)
public void testEmptyBoundedSource() {
 PCollection<Long> input = p.apply(Read.from(CountingSource.upTo(0)));
 PAssert.that(input).empty();
 p.run();
}

@Test
@Category(NeedsRunner.class)
public void testUnboundedSource() {
 long numElements = 1000;
 PCollection<Long> input =
   p.apply(Read.from(CountingSource.unbounded()).withMaxNumRecords(numElements));
 addCountingAsserts(input, numElements);
 p.run();
}

@Test
public void splitsInputs() {
 Pipeline p = getPipeline();
 PCollection<Long> longs = p.apply(Read.from(MustSplitSource.of(CountingSource.upTo(3))));
 PAssert.that(longs).containsInAnyOrder(0L, 1L, 2L);
 p.run();
}

@Test
public void retainOnlyPrimitivesWithOnlyPrimitivesUnchanged() {
 Pipeline p = Pipeline.create();
 p.apply("Read", Read.from(CountingSource.unbounded()))
   .apply(
     "multi-do",
     ParDo.of(new TestFn()).withOutputTags(new TupleTag<>(), TupleTagList.empty()));
 Components originalComponents = PipelineTranslation.toProto(p).getComponents();
 Collection<String> primitiveComponents =
   QueryablePipeline.getPrimitiveTransformIds(originalComponents);
 assertThat(primitiveComponents, equalTo(originalComponents.getTransformsMap().keySet()));
}

Javadoc

Returns a new Read.Bounded PTransform reading from the given BoundedSource.

Popular in Java

Making http post requests using okhttp
getResourceAsStream (ClassLoader)
getSharedPreferences (Context)
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
DateFormat (java.text)
Formats or parses dates and times.This class provides factories for obtaining instances configured f
Dictionary (java.util)
Note: Do not use this class since it is obsolete. Please use the Map interface for new implementatio
Pattern (java.util.regex)
Patterns are compiled regular expressions. In many cases, convenience methods such as String#matches
Container (java.awt)
A generic Abstract Window Toolkit(AWT) container object is a component that can contain other AWT co
JCheckBox (javax.swing)
Reflections (org.reflections)
Reflections one-stop-shop objectReflections scans your classpath, indexes the metadata, allows you t
Top Vim plugins

How to use frommethodin org.apache.beam.sdk.io.Read

Best Java code snippets using org.apache.beam.sdk.io.Read.from (Showing top 20 results out of 315)

How to use
from
method
in
org.apache.beam.sdk.io.Read