@Test public void testPreconditionFilePathIsNull() { thrown.expect(IllegalArgumentException.class); thrown.expectMessage(containsString("Expected valid file path, but received")); new NumberedShardedFile(null); }
@Test public void testPreconditionFilePathIsEmpty() { thrown.expect(IllegalArgumentException.class); thrown.expectMessage(containsString("Expected valid file path, but received")); new NumberedShardedFile(""); }
@Test public void testReadEmpty() throws Exception { File emptyFile = tmpFolder.newFile("result-000-of-001"); Files.write("", emptyFile, StandardCharsets.UTF_8); NumberedShardedFile shardedFile = new NumberedShardedFile(filePattern); assertThat(shardedFile.readFilesWithRetries(), empty()); }
/** * Constructor using a custom shard template. * * @param checksum expected checksum string used to verify file content. * @param filePath path of files that's to be verified. * @param shardTemplate template of shard name to parse out the total number of shards which is * used in I/O retry to avoid inconsistency of filesystem. Customized template should assign * name "numshards" to capturing group - total shard number. */ public FileChecksumMatcher(String checksum, String filePath, Pattern shardTemplate) { checkArgument( !Strings.isNullOrEmpty(checksum), "Expected valid checksum, but received %s", checksum); checkArgument( !Strings.isNullOrEmpty(filePath), "Expected valid file path, but received %s", filePath); checkNotNull( shardTemplate, "Expected non-null shard pattern. " + "Please call the other constructor to use default pattern: %s", DEFAULT_SHARD_TEMPLATE); this.expectedChecksum = checksum; this.shardedFile = new NumberedShardedFile(filePath, shardTemplate); }
@Test public void testReadCustomTemplate() throws Exception { String contents1 = "To be or not to be, ", contents2 = "it is not a question."; // Customized template: resultSSS-totalNNN File tmpFile1 = tmpFolder.newFile("result0-total2"); File tmpFile2 = tmpFolder.newFile("result1-total2"); Files.write(contents1, tmpFile1, StandardCharsets.UTF_8); Files.write(contents2, tmpFile2, StandardCharsets.UTF_8); Pattern customizedTemplate = Pattern.compile("(?x) result (?<shardnum>\\d+) - total (?<numshards>\\d+)"); NumberedShardedFile shardedFile = new NumberedShardedFile(filePattern, customizedTemplate); assertThat(shardedFile.readFilesWithRetries(), containsInAnyOrder(contents1, contents2)); }
@Test public void testReadWithRetriesFailsWhenOutputDirEmpty() throws Exception { NumberedShardedFile shardedFile = new NumberedShardedFile(filePattern); thrown.expect(IOException.class); thrown.expectMessage( containsString( "Unable to read file(s) after retrying " + NumberedShardedFile.MAX_READ_RETRIES)); shardedFile.readFilesWithRetries(fastClock, backOff); }
@Test public void testReadWithRetriesFailsSinceFilesystemError() throws Exception { File tmpFile = tmpFolder.newFile(); Files.write("Test for file checksum verifier.", tmpFile, StandardCharsets.UTF_8); NumberedShardedFile shardedFile = spy(new NumberedShardedFile(filePattern)); doThrow(IOException.class).when(shardedFile).readLines(anyCollection()); thrown.expect(IOException.class); thrown.expectMessage( containsString( "Unable to read file(s) after retrying " + NumberedShardedFile.MAX_READ_RETRIES)); shardedFile.readFilesWithRetries(fastClock, backOff); }
@Test public void testReadWithRetriesFailsWhenTemplateIncorrect() throws Exception { File tmpFile = tmpFolder.newFile(); Files.write("Test for file checksum verifier.", tmpFile, StandardCharsets.UTF_8); NumberedShardedFile shardedFile = new NumberedShardedFile(filePattern, Pattern.compile("incorrect-template")); thrown.expect(IOException.class); thrown.expectMessage( containsString( "Unable to read file(s) after retrying " + NumberedShardedFile.MAX_READ_RETRIES)); shardedFile.readFilesWithRetries(fastClock, backOff); }
@Test public void testReadWithRetriesFailsWhenRedundantFileLoaded() throws Exception { tmpFolder.newFile("result-000-of-001"); tmpFolder.newFile("tmp-result-000-of-001"); NumberedShardedFile shardedFile = new NumberedShardedFile(filePattern); thrown.expect(IOException.class); thrown.expectMessage( containsString( "Unable to read file(s) after retrying " + NumberedShardedFile.MAX_READ_RETRIES)); shardedFile.readFilesWithRetries(fastClock, backOff); } }
@Test public void testWriteLines() throws Exception { File destinationFile = new File(tempFolder.getRoot(), "lines-outputs"); BeamSqlEnv env = BeamSqlEnv.inMemory(new TextTableProvider()); env.executeDdl( String.format( "CREATE EXTERNAL TABLE test %s TYPE text LOCATION '%s' TBLPROPERTIES '{\"format\":\"lines\"}'", SQL_LINES_SCHEMA, destinationFile.getAbsolutePath())); BeamSqlRelUtils.toPCollection( pipeline, env.parseQuery("INSERT INTO test VALUES ('hello'), ('goodbye')")); pipeline.run(); assertThat( new NumberedShardedFile(destinationFile.getAbsolutePath() + "*") .readFilesWithRetries(Sleeper.DEFAULT, BackOff.STOP_BACKOFF), containsInAnyOrder("hello", "goodbye")); }
@Test public void testWriteCsv() throws Exception { File destinationFile = new File(tempFolder.getRoot(), "csv-outputs"); BeamSqlEnv env = BeamSqlEnv.inMemory(new TextTableProvider()); // NumberedShardedFile env.executeDdl( String.format( "CREATE EXTERNAL TABLE test %s TYPE text LOCATION '%s' TBLPROPERTIES '{\"format\":\"csv\"}'", SQL_CSV_SCHEMA, destinationFile.getAbsolutePath())); BeamSqlRelUtils.toPCollection( pipeline, env.parseQuery("INSERT INTO test VALUES ('hello', 42), ('goodbye', 13)")); pipeline.run(); assertThat( new NumberedShardedFile(destinationFile.getAbsolutePath() + "*") .readFilesWithRetries(Sleeper.DEFAULT, BackOff.STOP_BACKOFF), containsInAnyOrder("hello,42", "goodbye,13")); } }
@Test public void testReadMultipleShards() throws Exception { String contents1 = "To be or not to be, ", contents2 = "it is not a question.", contents3 = "should not be included"; File tmpFile1 = tmpFolder.newFile("result-000-of-002"); File tmpFile2 = tmpFolder.newFile("result-001-of-002"); File tmpFile3 = tmpFolder.newFile("tmp"); Files.write(contents1, tmpFile1, StandardCharsets.UTF_8); Files.write(contents2, tmpFile2, StandardCharsets.UTF_8); Files.write(contents3, tmpFile3, StandardCharsets.UTF_8); filePattern = LocalResources.fromFile(tmpFolder.getRoot(), true) .resolve("result-*", StandardResolveOptions.RESOLVE_FILE) .toString(); NumberedShardedFile shardedFile = new NumberedShardedFile(filePattern); assertThat(shardedFile.readFilesWithRetries(), containsInAnyOrder(contents1, contents2)); }
new IntervalWindow(windowStart, windowStart.plus(Duration.standardMinutes(10)))); expectedOutputFiles.add( new NumberedShardedFile( output .getCurrentDirectory()