/** * Creates a {@code Read} transform that will read from an {@code HDFSFileSource} with the given file name or * pattern ("glob") using the given Hadoop {@link org.apache.hadoop.mapreduce.lib.input.FileInputFormat}, with * key-value types specified by the given key class and value class. */ public static <K, V, T extends FileInputFormat<K, V>> Read.Bounded<KV<K, V>> readFrom(String filepattern, Class<T> formatClass, Class<K> keyClass, Class<V> valueClass) { return Read.from(from(filepattern, formatClass, keyClass, valueClass)); }
/** * Creates a {@code Read} transform that will read from an {@code HDFSFileSource} with the given file name or * pattern ("glob") using the given Hadoop {@link org.apache.hadoop.mapreduce.lib.input.FileInputFormat}, with * key-value types specified by the given key class and value class. */ public static <K, V, T extends FileInputFormat<K, V>> Read.Bounded<KV<K, V>> readFrom(String filepattern, Class<T> formatClass, Class<K> keyClass, Class<V> valueClass) { return Read.from(from(filepattern, formatClass, keyClass, valueClass)); }
/** Read from the synthetic source options. */ public static Read.Bounded<KV<byte[], byte[]>> readFrom(SyntheticSourceOptions options) { checkNotNull(options, "Input synthetic source options should not be null."); return Read.from(new SyntheticBoundedSource(options)); }
@Override public PCollection<T> expand(PBegin input) { getConfiguration().validate(); return input.apply(org.apache.beam.sdk.io.Read.from(createSource())); } }
@Override public PCollection<T> expand(PBegin input) { if (source.isBounded()) { org.apache.beam.sdk.io.Read.Bounded<T> bounded = org.apache.beam.sdk.io.Read.from(new BoundedSourceWrapper<>(source.asBounded(), this)); return input.apply(bounded); } else { throw new UnsupportedOperationException("Unbounded is not supported for now."); } }
@Override public PCollection<T> expand(PBegin input) { return input.getPipeline().apply(Read.from(new BoundedToUnboundedSourceAdapter<>(source))); }
@Override public PCollection<String> expand(PBegin input) { ConnectionConfiguration connectionConfiguration = getConnectionConfiguration(); checkState(connectionConfiguration != null, "withConnectionConfiguration() is required"); return input.apply( org.apache.beam.sdk.io.Read.from(new BoundedElasticsearchSource(this, null, null, null))); }
@Test public void succeedsWhenCustomBoundedSourceIsSerializable() { Read.from(new SerializableBoundedSource()); }
@Override public PCollection<HCatRecord> expand(PBegin input) { checkArgument(getTable() != null, "withTable() is required"); checkArgument(getConfigProperties() != null, "withConfigProperties() is required"); return input.apply(org.apache.beam.sdk.io.Read.from(new BoundedHCatalogSource(this))); }
@Test public void failsWhenCustomUnboundedSourceIsNotSerializable() { thrown.expect(IllegalArgumentException.class); Read.from(new NotSerializableUnboundedSource()); }
@Test public void failsWhenCustomBoundedSourceIsNotSerializable() { thrown.expect(IllegalArgumentException.class); Read.from(new NotSerializableBoundedSource()); }
@Override public PCollection<PubsubMessage> expand(PBegin input) { return input .getPipeline() .begin() .apply(Read.from(new PubsubSource(this))) .apply( "PubsubUnboundedSource.Stats", ParDo.of( new StatsFn(pubsubFactory, subscription, topic, timestampAttribute, idAttribute))); }
@VisibleForTesting static Pipeline buildPipeline(ImportOptions opts) { Pipeline pipeline = Pipeline.create(Utils.tweakOptions(opts)); pipeline .apply( "Read Sequence File", Read.from(new ShuffledSource<>(createSource(opts.getSourcePattern())))) .apply("Create Mutations", ParDo.of(new HBaseResultToMutationFn())) .apply("Write to Bigtable", createSink(opts)); return pipeline; }
@Override public PCollection<Row> expand(PBegin input) { getBigtableConfig().validate(); BigtableSource source = new BigtableSource(getBigtableConfig(), getRowFilter(), getKeyRanges(), null); return input.getPipeline().apply(org.apache.beam.sdk.io.Read.from(source)); }
@Override public PCollection<Document> expand(PBegin input) { checkArgument(uri() != null, "withUri() is required"); checkArgument(database() != null, "withDatabase() is required"); checkArgument(collection() != null, "withCollection() is required"); return input.apply(org.apache.beam.sdk.io.Read.from(new BoundedMongoDbSource(this))); }
@Override public PCollection<IndexedRecord> expand(PBegin begin) { return begin.apply(Read.from(new FixedFlowInputBoundedSource() // .withSchema(properties.schemaFlow.schema.getValue())// .withValues(properties.values.getValue()) // .withNbRows(properties.nbRows.getValue()))); } }
@Override public PCollection<IndexedRecord> expand(PBegin begin) { return begin.apply(Read.from(new FixedFlowInputBoundedSource() // .withSchema(properties.schemaFlow.schema.getValue())// .withValues(properties.values.getValue()) // .withNbRows(properties.nbRows.getValue()))); } }
@Before public void setup() { source = CountingSource.unboundedWithTimestampFn(new LongToInstantFn()); longs = p.apply(Read.from(source)); options = PipelineOptionsFactory.create(); context = mock(EvaluationContext.class); factory = new UnboundedReadEvaluatorFactory(context, options); output = bundleFactory.createBundle(longs); graph = DirectGraphs.getGraph(p); when(context.createBundle(longs)).thenReturn(output); }
@Test public void testToFromProtoUnbounded() throws Exception { assumeThat(source, instanceOf(UnboundedSource.class)); UnboundedSource<?, ?> unboundedSource = (UnboundedSource<?, ?>) this.source; Read.Unbounded<?> unboundedRead = Read.from(unboundedSource); SdkComponents components = SdkComponents.create(); components.registerEnvironment(Environments.createDockerEnvironment("java")); ReadPayload payload = ReadTranslation.toProto(unboundedRead, components); assertThat(payload.getIsBounded(), equalTo(RunnerApi.IsBounded.Enum.UNBOUNDED)); UnboundedSource<?, ?> deserializedSource = ReadTranslation.unboundedSourceFromProto(payload); assertThat(deserializedSource, equalTo(source)); }
@Test public void testToFromProtoBounded() throws Exception { // TODO: Split into two tests. assumeThat(source, instanceOf(BoundedSource.class)); BoundedSource<?> boundedSource = (BoundedSource<?>) this.source; Read.Bounded<?> boundedRead = Read.from(boundedSource); SdkComponents components = SdkComponents.create(); components.registerEnvironment(Environments.createDockerEnvironment("java")); ReadPayload payload = ReadTranslation.toProto(boundedRead, components); assertThat(payload.getIsBounded(), equalTo(RunnerApi.IsBounded.Enum.BOUNDED)); BoundedSource<?> deserializedSource = ReadTranslation.boundedSourceFromProto(payload); assertThat(deserializedSource, equalTo(source)); }