/** * Runs this {@link TestPipeline}, unwrapping any {@code AssertionError} that is raised during * testing. */ @Override public PipelineResult run() { return run(getOptions()); }
private <K, V> List<BoundedSource<KV<K, V>>> getBoundedSourceList( Class<?> inputFormatClass, Class<K> inputFormatKeyClass, Class<V> inputFormatValueClass, Coder<K> keyCoder, Coder<V> valueCoder) throws Exception { HadoopInputFormatBoundedSource<K, V> boundedSource = getTestHIFSource( inputFormatClass, inputFormatKeyClass, inputFormatValueClass, keyCoder, valueCoder); return boundedSource.split(0, p.getOptions()); } }
private <K, V> List<BoundedSource<KV<K, V>>> getBoundedSourceList( Class<?> inputFormatClass, Class<K> inputFormatKeyClass, Class<V> inputFormatValueClass, Coder<K> keyCoder, Coder<V> valueCoder) throws Exception { HadoopInputFormatBoundedSource<K, V> boundedSource = getTestHIFSource( inputFormatClass, inputFormatKeyClass, inputFormatValueClass, keyCoder, valueCoder); return boundedSource.split(0, p.getOptions()); } }
@Before public void init() { TestMetricsSink.clear(); PipelineOptions options = pipeline.getOptions(); options.setMetricsSink(TestMetricsSink.class); }
/** This test validates that reader and its parent source reads the same records. */ @Test public void testReaderAndParentSourceReadsSameData() throws Exception { InputSplit mockInputSplit = Mockito.mock(NewObjectsEmployeeInputSplit.class); HadoopInputFormatBoundedSource<Text, Employee> boundedSource = new HadoopInputFormatBoundedSource<>( serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), null, // No key translation required. null, // No value translation required. new SerializableSplit(mockInputSplit)); BoundedReader<KV<Text, Employee>> reader = boundedSource.createReader(p.getOptions()); SourceTestUtils.assertUnstartedReaderReadsSameAsItsSource(reader, p.getOptions()); }
/** This test validates that reader and its parent source reads the same records. */ @Test public void testReaderAndParentSourceReadsSameData() throws Exception { InputSplit mockInputSplit = Mockito.mock(NewObjectsEmployeeInputSplit.class); HadoopInputFormatBoundedSource<Text, Employee> boundedSource = new HadoopInputFormatBoundedSource<>( serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), null, // No key translation required. null, // No value translation required. new SerializableSplit(mockInputSplit)); BoundedReader<KV<Text, Employee>> reader = boundedSource.createReader(p.getOptions()); SourceTestUtils.assertUnstartedReaderReadsSameAsItsSource(reader, p.getOptions()); }
@Test public void testStableUniqueNameError() { pipeline.getOptions().setStableUniqueNames(CheckEnabled.ERROR); pipeline.apply(Create.of(5, 6, 7)); thrown.expectMessage("do not have stable unique names"); pipeline.apply(Create.of(5, 6, 7)); ((Pipeline) pipeline).validate(pipeline.getOptions()); }
/** * This test validates behavior of {@link HadoopInputFormatBoundedSource} if RecordReader object * creation fails. */ @Test public void testReadIfCreateRecordReaderFails() throws Exception { thrown.expect(Exception.class); thrown.expectMessage("Exception in creating RecordReader"); InputFormat<Text, Employee> mockInputFormat = Mockito.mock(EmployeeInputFormat.class); Mockito.when( mockInputFormat.createRecordReader( Mockito.any(InputSplit.class), Mockito.any(TaskAttemptContext.class))) .thenThrow(new IOException("Exception in creating RecordReader")); HadoopInputFormatBoundedSource<Text, Employee> boundedSource = new HadoopInputFormatBoundedSource<>( serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), null, // No key translation required. null, // No value translation required. new SerializableSplit()); boundedSource.setInputFormatObj(mockInputFormat); SourceTestUtils.readFromSource(boundedSource, p.getOptions()); }
@Test public void testStableUniqueNameWarning() { pipeline.enableAbandonedNodeEnforcement(false); pipeline.getOptions().setStableUniqueNames(CheckEnabled.WARNING); pipeline.apply(Create.of(5, 6, 7)); pipeline.apply(Create.of(5, 6, 7)); ((Pipeline) pipeline).validate(pipeline.getOptions()); logged.verifyWarn("do not have stable unique names"); }
@Test public void testStableUniqueNameOff() { pipeline.enableAbandonedNodeEnforcement(false); pipeline.getOptions().setStableUniqueNames(CheckEnabled.OFF); pipeline.apply(Create.of(5, 6, 7)); pipeline.apply(Create.of(5, 6, 7)); ((Pipeline) pipeline).validate(pipeline.getOptions()); logged.verifyNotLogged("do not have stable unique names"); }
@Test public void testCreationOfPipelineOptionsFromReallyVerboselyNamedTestCase() throws Exception { PipelineOptions options = pipeline.getOptions(); assertThat( options.as(ApplicationNameOptions.class).getAppName(), startsWith( "TestPipelineTest$TestPipelineCreationTest" + "-testCreationOfPipelineOptionsFromReallyVerboselyNamedTestCase")); }
@Test @Category(NeedsRunner.class) public void pipelineOptionsDisplayDataExceptionShouldFail() { Object brokenValueType = new Object() { @JsonValue public int getValue() { return 42; } @Override public String toString() { throw new RuntimeException("oh noes!!"); } }; p.getOptions().as(ObjectPipelineOptions.class).setValue(brokenValueType); p.apply(Create.of(1, 2, 3)); expectedException.expectMessage( ProxyInvocationHandler.PipelineOptionsDisplayData.class.getName()); expectedException.expectMessage("oh noes!!"); p.run(); }
/** Tests that checkpoints finalized after the reader is closed succeed. */ @Test public void closeWithActiveCheckpoints() throws Exception { setupOneMessage(); PubsubReader reader = primSource.createReader(p.getOptions(), null); reader.start(); PubsubCheckpoint checkpoint = reader.getCheckpointMark(); reader.close(); checkpoint.finalizeCheckpoint(); } }
@Category({ValidatesRunner.class, UsesAttemptedMetrics.class, UsesCounterMetrics.class}) @Test public void test() throws Exception { pipeline .apply( // Use maxReadTime to force unbounded mode. GenerateSequence.from(0).to(NUM_ELEMENTS).withMaxReadTime(Duration.standardDays(1))) .apply(ParDo.of(new CountingDoFn())); pipeline.run(); // give metrics pusher time to push Thread.sleep((pipeline.getOptions().getMetricsPushPeriod() + 1L) * 1000); assertThat(TestMetricsSink.getCounterValue(), is(NUM_ELEMENTS)); }
@Test @Category({ValidatesRunner.class, DataflowPortabilityApiUnsupported.class}) public void testUnboundedSourceSplits() throws Exception { long numElements = 1000; int numSplits = 10; UnboundedSource<Long, ?> initial = CountingSource.unbounded(); List<? extends UnboundedSource<Long, ?>> splits = initial.split(numSplits, p.getOptions()); assertEquals("Expected exact splitting", numSplits, splits.size()); long elementsPerSplit = numElements / numSplits; assertEquals("Expected even splits", numElements, elementsPerSplit * numSplits); PCollectionList<Long> pcollections = PCollectionList.empty(p); for (int i = 0; i < splits.size(); ++i) { pcollections = pcollections.and( p.apply("split" + i, Read.from(splits.get(i)).withMaxNumRecords(elementsPerSplit))); } PCollection<Long> input = pcollections.apply(Flatten.pCollections()); addCountingAsserts(input, numElements); p.run(); }
/** * This test validates behavior of {@link * HadoopInputFormatBoundedSource#createReader(PipelineOptions)} createReader()} method when * {@link HadoopInputFormatBoundedSource#split(long, PipelineOptions)} is not called. */ @Test public void testCreateReaderIfSplitNotCalled() throws Exception { HadoopInputFormatBoundedSource<Text, Employee> hifSource = getTestHIFSource( EmployeeInputFormat.class, Text.class, Employee.class, WritableCoder.of(Text.class), AvroCoder.of(Employee.class)); thrown.expect(IOException.class); thrown.expectMessage("Cannot create reader as source is not split yet."); hifSource.createReader(p.getOptions()); }
/** * This test validates behavior of {@link * HadoopInputFormatBoundedSource#createReader(PipelineOptions)} createReader()} method when * {@link HadoopInputFormatBoundedSource#split(long, PipelineOptions)} is not called. */ @Test public void testCreateReaderIfSplitNotCalled() throws Exception { HadoopInputFormatBoundedSource<Text, Employee> hifSource = getTestHIFSource( EmployeeInputFormat.class, Text.class, Employee.class, WritableCoder.of(Text.class), AvroCoder.of(Employee.class)); thrown.expect(IOException.class); thrown.expectMessage("Cannot create reader as source is not split yet."); hifSource.createReader(p.getOptions()); }
/** A basic smoke test that ensures there is no crash at pipeline construction time. */ @Test public void testMinimalWordCount() throws Exception { p.getOptions().as(GcsOptions.class).setGcsUtil(buildMockGcsUtil()); p.apply(TextIO.read().from("gs://apache-beam-samples/shakespeare/*")) .apply( FlatMapElements.into(TypeDescriptors.strings()) .via((String word) -> Arrays.asList(word.split("[^a-zA-Z']+")))) .apply(Filter.by((String word) -> !word.isEmpty())) .apply(Count.perElement()) .apply( MapElements.into(TypeDescriptors.strings()) .via( (KV<String, Long> wordCount) -> wordCount.getKey() + ": " + wordCount.getValue())) .apply(TextIO.write().to("gs://your-output-bucket/and-output-prefix")); }
@Test public void readOneMessage() throws IOException { setupOneMessage(); PubsubReader reader = primSource.createReader(p.getOptions(), null); // Read one message. assertTrue(reader.start()); assertEquals(DATA, data(reader.getCurrent())); assertFalse(reader.advance()); // ACK the message. PubsubCheckpoint checkpoint = reader.getCheckpointMark(); checkpoint.finalizeCheckpoint(); reader.close(); }
@Test @Category(ValidatesRunner.class) public void testPipelineOptionsParameter() { PCollection<String> results = pipeline .apply(Create.of(1)) .apply( ParDo.of( new DoFn<Integer, String>() { @ProcessElement public void process(OutputReceiver<String> r, PipelineOptions options) { r.output(options.as(MyOptions.class).getFakeOption()); } })); String testOptionValue = "not fake anymore"; pipeline.getOptions().as(MyOptions.class).setFakeOption(testOptionValue); PAssert.that(results).containsInAnyOrder("not fake anymore"); pipeline.run(); } }