/** * Read elements from a {@link Source.Reader} until n elements are read. * * <p>There must be at least n elements remaining in the reader, except for the case when n is * {@code Integer.MAX_VALUE}, which means "read all remaining elements". */ private static <T> List<T> readNItemsFromReader(Source.Reader<T> reader, int n, boolean started) throws IOException { List<T> res = new ArrayList<>(); for (int i = 0; i < n; i++) { boolean shouldStart = (i == 0 && !started); boolean more = shouldStart ? reader.start() : reader.advance(); if (n != Integer.MAX_VALUE) { assertTrue(more); } if (!more) { break; } res.add(reader.getCurrent()); } return res; }
private void close() { closed = true; try { reader.close(); } catch (final IOException e) { throw new RuntimeException(e); } }
@Test public void testReadXMLInvalidRecordClassWithCustomEventHandler() throws IOException { File file = tempFolder.newFile("trainXMLSmall"); Files.write(file.toPath(), trainXML.getBytes(StandardCharsets.UTF_8)); ValidationEventHandler validationEventHandler = event -> { throw new RuntimeException("MyCustomValidationEventHandler failure mesage"); }; BoundedSource<WrongTrainType> source = XmlIO.<WrongTrainType>read() .from(file.toPath().toString()) .withRootElement("trains") .withRecordElement("train") .withRecordClass(WrongTrainType.class) .withValidationEventHandler(validationEventHandler) .createSource(); exception.expect(RuntimeException.class); // JAXB internationalizes the error message. So this is all we can match for. exception.expectMessage("MyCustomValidationEventHandler failure mesage"); try (Reader<WrongTrainType> reader = source.createReader(null)) { List<WrongTrainType> results = new ArrayList<>(); for (boolean available = reader.start(); available; available = reader.advance()) { WrongTrainType train = reader.getCurrent(); results.add(train); } } }
@Test public void testCloseUnstartedFilePatternReader() throws IOException { PipelineOptions options = PipelineOptionsFactory.create(); List<KV<IntWritable, Text>> data1 = createRandomRecords(3, 10, 0); File file1 = createFileWithData("file1", data1); List<KV<IntWritable, Text>> data2 = createRandomRecords(3, 10, 10); createFileWithData("file2", data2); List<KV<IntWritable, Text>> data3 = createRandomRecords(3, 10, 20); createFileWithData("file3", data3); List<KV<IntWritable, Text>> data4 = createRandomRecords(3, 10, 30); createFileWithData("otherfile", data4); HDFSFileSource<KV<IntWritable, Text>, IntWritable, Text> source = HDFSFileSource.from( new File(file1.getParent(), "file*").toString(), SequenceFileInputFormat.class, IntWritable.class, Text.class); Source.Reader<KV<IntWritable, Text>> reader = source.createReader(options); // Closing an unstarted FilePatternReader should not throw an exception. try { reader.close(); } catch (Exception e) { fail("Closing an unstarted FilePatternReader should not throw an exception"); } }
@Test public void testCloseUnstartedFilePatternReader() throws IOException { PipelineOptions options = PipelineOptionsFactory.create(); List<String> data1 = createStringDataset(3, 50); File file1 = createFileWithData("file1", data1); List<String> data2 = createStringDataset(3, 50); createFileWithData("file2", data2); List<String> data3 = createStringDataset(3, 50); createFileWithData("file3", data3); List<String> data4 = createStringDataset(3, 50); createFileWithData("otherfile", data4); TestFileBasedSource source = new TestFileBasedSource(new File(file1.getParent(), "file*").getPath(), 64, null); Reader<String> reader = source.createReader(options); // Closing an unstarted FilePatternReader should not throw an exception. try { reader.close(); } catch (Exception e) { throw new AssertionError( "Closing an unstarted FilePatternReader should not throw an exception", e); } }
private boolean tryProduceNext() { try (Closeable ignored = MetricsEnvironment.scopedMetricsContainer(metricsContainer)) { if (closed) { return FAILED_TO_OBTAIN_NEXT; } else { checkState(next == null, "unexpected non-null value for next"); if (seekNext()) { next = WindowedValue.timestampedValueInGlobalWindow( reader.getCurrent(), reader.getCurrentTimestamp()); return SUCCESSFULLY_OBTAINED_NEXT; } else { close(); return FAILED_TO_OBTAIN_NEXT; } } } catch (final Exception e) { throw new RuntimeException("Failed to read data.", e); } }
@Override public Message read() { Message message = null; try { if (available) { T data = reader.getCurrent(); org.joda.time.Instant timestamp = reader.getCurrentTimestamp(); message = new DefaultMessage( WindowedValue.timestampedValueInGlobalWindow(data, timestamp), timestamp.getMillis()); } available = reader.advance(); } catch (Exception e) { close(); throw new RuntimeException(e); } return message; }
public boolean invokeStart(ReaderT reader) throws IOException { if (enableMetrics) { try (Closeable ignored = MetricsEnvironment.scopedMetricsContainer(container.getMetricsContainer(stepName))) { boolean result = reader.start(); container.updateMetrics(); return result; } } else { return reader.start(); } } public boolean invokeAdvance(ReaderT reader) throws IOException {
public boolean invokeAdvance(ReaderT reader) throws IOException { if (enableMetrics) { try (Closeable ignored = MetricsEnvironment.scopedMetricsContainer(container.getMetricsContainer(stepName))) { boolean result = reader.advance(); container.updateMetrics(stepName); return result; } } else { return reader.advance(); } } }
public boolean invokeAdvance(ReaderT reader) throws IOException { if (enableMetrics) { try (Closeable ignored = MetricsEnvironment.scopedMetricsContainer(container.getMetricsContainer(stepName))) { boolean result = reader.advance(); container.updateMetrics(stepName); return result; } } else { return reader.advance(); } } }
@Override public void open(TaskContext context, Instant startTime) { try { PipelineOptions options = serializedOptions.get(); this.reader = createReader(options); this.available = reader.start(); } catch (Exception e) { close(); throw new RuntimeException(e); } }
public boolean invokeStart(ReaderT reader) throws IOException { if (enableMetrics) { try (Closeable ignored = MetricsEnvironment.scopedMetricsContainer(container.getMetricsContainer(stepName))) { boolean result = reader.start(); container.updateMetrics(stepName); return result; } } else { return reader.start(); } }
public boolean invokeStart(ReaderT reader) throws IOException { if (enableMetrics) { try (Closeable ignored = MetricsEnvironment.scopedMetricsContainer(container.getMetricsContainer(stepName))) { boolean result = reader.start(); container.updateMetrics(stepName); return result; } } else { return reader.start(); } }
@Override public void onRemoval( final RemovalNotification<MicrobatchSource<?, ?>, Source.Reader<?>> notification) { try { notification.getValue().close(); } catch (final IOException e) { throw new RuntimeException(e); } } }
private <T> List<T> readEverythingFromReader(Reader<T> reader) throws IOException { List<T> results = new ArrayList<>(); for (boolean available = reader.start(); available; available = reader.advance()) { T train = reader.getCurrent(); results.add(train); } return results; }
@Override public void close() { try { if (reader != null) { reader.close(); } } catch (IOException e) { throw new RuntimeException(e); } }
public boolean invokeAdvance(ReaderT reader) throws IOException { if (enableMetrics) { try (Closeable ignored = MetricsEnvironment.scopedMetricsContainer(container.getMetricsContainer(stepName))) { boolean result = reader.advance(); container.updateMetrics(); return result; } } else { return reader.advance(); } } }