@Test public void testSourceSplitAtFraction() throws Exception { List<Integer> elements = new ArrayList<>(); Random random = new Random(); for (int i = 0; i < 25; i++) { elements.add(random.nextInt()); } CreateSource<Integer> source = CreateSource.fromIterable(elements, VarIntCoder.of()); SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create()); } }
@Test public void testSplittingSource() throws Exception { TextSource source = prepareSource(line.getBytes(UTF_8)); SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create()); }
@Test public void testSplittingSourceWithCustomDelimiter() throws Exception { List<String> testCases = Lists.newArrayList(); String infix = "first|*second|*|*third"; String[] affixes = new String[] {"", "|", "*", "|*"}; for (String prefix : affixes) { for (String suffix : affixes) { testCases.add(prefix + infix + suffix); } } for (String testCase : testCases) { SourceTestUtils.assertSplitAtFractionExhaustive( TextIOReadTest.prepareSource( tempFolder, testCase.getBytes(UTF_8), new byte[] {'|', '*'}), PipelineOptionsFactory.create()); } }
@Test public void testSplitAtFractionExhaustive() throws Exception { PipelineOptions options = PipelineOptionsFactory.create(); CoarseRangeSource original = new CoarseRangeSource(13, 35, 1, 10); assertSplitAtFractionExhaustive(original, options); }
@Test public void testSplitAtFractionExhaustive() throws Exception { PipelineOptions options = PipelineOptionsFactory.create(); // Smaller file for exhaustive testing. File file = createFileWithData("file", createStringDataset(3, 20)); Metadata metadata = FileSystems.matchSingleFileSpec(file.getPath()); TestFileBasedSource source = new TestFileBasedSource(metadata, 1, 0, file.length(), null); assertSplitAtFractionExhaustive(source, options); }
@Test public void testSplitAtFractionExhaustiveSingleByte() throws Exception { PipelineOptions options = PipelineOptionsFactory.create(); File file = tempFolder.newFile("trainXMLSmall"); Files.write(file.toPath(), trainXMLWithAllFeaturesSingleByte.getBytes(StandardCharsets.UTF_8)); BoundedSource<Train> source = XmlIO.<Train>read() .from(file.toPath().toString()) .withRootElement("trains") .withRecordElement("train") .withRecordClass(Train.class) .createSource(); assertSplitAtFractionExhaustive(source, options); }
private void testSplitAtFractionP(long splitPointFrequency) throws Exception { PipelineOptions options = PipelineOptionsFactory.create(); testSourceOptions.splitPointFrequencyRecords = splitPointFrequency; SyntheticBoundedSource source = new SyntheticBoundedSource(testSourceOptions); SourceTestUtils.assertSplitAtFractionExhaustive(source, options); // Can't split if already consumed. SourceTestUtils.assertSplitAtFractionFails(source, 5, 0.3, options); SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent(source, 1, 0.3, options); }
@Test @Ignore( "Multi-byte characters in XML are not supported because the parser " + "currently does not correctly report byte offsets") public void testSplitAtFractionExhaustiveMultiByte() throws Exception { PipelineOptions options = PipelineOptionsFactory.create(); File file = tempFolder.newFile("trainXMLSmall"); Files.write(file.toPath(), trainXMLWithAllFeaturesMultiByte.getBytes(StandardCharsets.UTF_8)); BoundedSource<Train> source = XmlIO.<Train>read() .from(file.toPath().toString()) .withRootElement("දුම්රියන්") .withRecordElement("දුම්රිය") .withRecordClass(Train.class) .createSource(); assertSplitAtFractionExhaustive(source, options); } }
@Test public void testUncompressedFileWithAutoIsSplittable() throws Exception { String baseName = "test-input"; File uncompressedFile = tmpFolder.newFile(baseName + ".bin"); Files.write(generateInput(10), uncompressedFile); CompressedSource<Byte> source = CompressedSource.from(new ByteSource(uncompressedFile.getPath(), 1)); assertTrue(source.isSplittable()); SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create()); }
/** Tests dynamic work rebalancing exhaustively. */ @Test public void testReadingSplitAtFractionExhaustive() throws Exception { final String table = "TEST-FEW-ROWS-SPLIT-EXHAUSTIVE-TABLE"; final int numRows = 10; final int numSamples = 1; final long bytesPerRow = 1L; makeTableData(table, numRows); service.setupSampleRowKeys(table, numSamples, bytesPerRow); BigtableSource source = new BigtableSource( config.withTableId(ValueProvider.StaticValueProvider.of(table)), null, Arrays.asList(service.getTableRange(table)), null); assertSplitAtFractionExhaustive(source, null); }
@Test public void testUncompressedFileWithUncompressedIsSplittable() throws Exception { String baseName = "test-input"; File uncompressedFile = tmpFolder.newFile(baseName + ".bin"); Files.write(generateInput(10), uncompressedFile); CompressedSource<Byte> source = CompressedSource.from(new ByteSource(uncompressedFile.getPath(), 1)) .withDecompression(CompressionMode.UNCOMPRESSED); assertTrue(source.isSplittable()); SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create()); }
@Test public void testSplitAtFractionExhaustive() throws Exception { // A small-sized input is sufficient, because the test verifies that splitting is non-vacuous. List<FixedRecord> expected = createFixedRecords(20); String filename = generateTestFile( "tmp.avro", expected, SyncBehavior.SYNC_REGULAR, 5, AvroCoder.of(FixedRecord.class), DataFileConstants.NULL_CODEC); AvroSource<FixedRecord> source = AvroSource.from(filename).withSchema(FixedRecord.class); SourceTestUtils.assertSplitAtFractionExhaustive(source, null); }