/** * Calculate the checksums of a set of files on GCS. * @param gcsUtil - Used to retrieve the files. * @param gcsPaths - List of paths of the files. * @return A List of String representing the MD5 hashes of the files. */ public static List<String> getGcsFileChecksums(GcsUtil gcsUtil, List<GcsPath> gcsPaths) { List<String> checksums = new ArrayList<>(); try { for (StorageObjectOrIOException objectOrIOException : gcsUtil.getObjects(gcsPaths)) { IOException ex = objectOrIOException.ioException(); if (ex != null) { throw ex; } checksums.add(objectOrIOException.storageObject().getMd5Hash()); } } catch (IOException e) { throw new RuntimeException(e); } return checksums; } }
/** * Returns the file size from GCS or throws {@link FileNotFoundException} if the resource does not * exist. */ @VisibleForTesting List<Long> fileSizes(List<GcsPath> paths) throws IOException { List<StorageObjectOrIOException> results = getObjects(paths); ImmutableList.Builder<Long> ret = ImmutableList.builder(); for (StorageObjectOrIOException result : results) { ret.add(toFileSize(result)); } return ret.build(); }
/** * Returns {@link MatchResult MatchResults} for the given {@link GcsPath GcsPaths}. * * <p>The number of returned {@link MatchResult MatchResults} equals to the number of given {@link * GcsPath GcsPaths}. Each {@link MatchResult} contains one {@link Metadata}. */ @VisibleForTesting List<MatchResult> matchNonGlobs(List<GcsPath> gcsPaths) throws IOException { List<StorageObjectOrIOException> results = options.getGcsUtil().getObjects(gcsPaths); ImmutableList.Builder<MatchResult> ret = ImmutableList.builder(); for (StorageObjectOrIOException result : results) { ret.add(toMatchResult(result)); } return ret.build(); }
@Test public void testPackageUploadIsSkippedWhenFileAlreadyExists() throws Exception { File tmpFile = makeFileWithContents("file.txt", "This is a test!"); when(mockGcsUtil.getObjects(anyListOf(GcsPath.class))) .thenReturn( ImmutableList.of( StorageObjectOrIOException.create( createStorageObject(STAGING_PATH, tmpFile.length())))); defaultPackageUtil.stageClasspathElements( ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH, createOptions); verify(mockGcsUtil).getObjects(anyListOf(GcsPath.class)); verifyNoMoreInteractions(mockGcsUtil); }
@Test(expected = RuntimeException.class) public void testPackageUploadFailsWhenIOExceptionThrown() throws Exception { File tmpFile = makeFileWithContents("file.txt", "This is a test!"); when(mockGcsUtil.getObjects(anyListOf(GcsPath.class))) .thenReturn( ImmutableList.of( StorageObjectOrIOException.create(new FileNotFoundException("some/path")))); when(mockGcsUtil.create(any(GcsPath.class), anyString())) .thenThrow(new IOException("Fake Exception: Upload error")); try (PackageUtil directPackageUtil = PackageUtil.withExecutorService(MoreExecutors.newDirectExecutorService())) { directPackageUtil.stageClasspathElements( ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH, fastNanoClockAndSleeper, createOptions); } finally { verify(mockGcsUtil).getObjects(anyListOf(GcsPath.class)); verify(mockGcsUtil, times(5)).create(any(GcsPath.class), anyString()); verifyNoMoreInteractions(mockGcsUtil); } }
@Test public void testPackageUploadEventuallySucceeds() throws Exception { Pipe pipe = Pipe.open(); File tmpFile = makeFileWithContents("file.txt", "This is a test!"); when(mockGcsUtil.getObjects(anyListOf(GcsPath.class))) .thenReturn( ImmutableList.of( StorageObjectOrIOException.create(new FileNotFoundException("some/path")))); when(mockGcsUtil.create(any(GcsPath.class), anyString())) .thenThrow(new IOException("Fake Exception: 410 Gone")) // First attempt fails .thenReturn(pipe.sink()); // second attempt succeeds try (PackageUtil directPackageUtil = PackageUtil.withExecutorService(MoreExecutors.newDirectExecutorService())) { directPackageUtil.stageClasspathElements( ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH, fastNanoClockAndSleeper, createOptions); } finally { verify(mockGcsUtil).getObjects(anyListOf(GcsPath.class)); verify(mockGcsUtil, times(2)).create(any(GcsPath.class), anyString()); verifyNoMoreInteractions(mockGcsUtil); } }
@Test public void testPackageUploadIsNotSkippedWhenSizesAreDifferent() throws Exception { Pipe pipe = Pipe.open(); File tmpDirectory = tmpFolder.newFolder("folder"); tmpFolder.newFolder("folder", "empty_directory"); tmpFolder.newFolder("folder", "directory"); makeFileWithContents("folder/file.txt", "This is a test!"); makeFileWithContents("folder/directory/file.txt", "This is also a test!"); when(mockGcsUtil.getObjects(anyListOf(GcsPath.class))) .thenReturn( ImmutableList.of( StorageObjectOrIOException.create( createStorageObject(STAGING_PATH, Long.MAX_VALUE)))); when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink()); defaultPackageUtil.stageClasspathElements( ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH, createOptions); verify(mockGcsUtil).getObjects(anyListOf(GcsPath.class)); verify(mockGcsUtil).create(any(GcsPath.class), anyString()); verifyNoMoreInteractions(mockGcsUtil); }
@Test public void testPackageUploadWithExplicitPackageName() throws Exception { Pipe pipe = Pipe.open(); File tmpFile = makeFileWithContents("file.txt", "This is a test!"); final String overriddenName = "alias.txt"; when(mockGcsUtil.getObjects(anyListOf(GcsPath.class))) .thenReturn( ImmutableList.of( StorageObjectOrIOException.create(new FileNotFoundException("some/path")))); when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink()); List<DataflowPackage> targets = defaultPackageUtil.stageClasspathElements( ImmutableList.of(overriddenName + "=" + tmpFile.getAbsolutePath()), STAGING_PATH, createOptions); DataflowPackage target = Iterables.getOnlyElement(targets); verify(mockGcsUtil).getObjects(anyListOf(GcsPath.class)); verify(mockGcsUtil).create(any(GcsPath.class), anyString()); verifyNoMoreInteractions(mockGcsUtil); assertThat(target.getName(), equalTo(overriddenName)); assertThat( target.getLocation(), RegexMatcher.matches(STAGING_PATH + "file-" + HASH_PATTERN + ".txt")); }
@Test public void testPackageUploadWithDirectorySucceeds() throws Exception { Pipe pipe = Pipe.open(); File tmpDirectory = tmpFolder.newFolder("folder"); tmpFolder.newFolder("folder", "empty_directory"); tmpFolder.newFolder("folder", "directory"); makeFileWithContents("folder/file.txt", "This is a test!"); makeFileWithContents("folder/directory/file.txt", "This is also a test!"); when(mockGcsUtil.getObjects(anyListOf(GcsPath.class))) .thenReturn( ImmutableList.of( StorageObjectOrIOException.create(new FileNotFoundException("some/path")))); when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink()); defaultPackageUtil.stageClasspathElements( ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH, createOptions); verify(mockGcsUtil).getObjects(anyListOf(GcsPath.class)); verify(mockGcsUtil).create(any(GcsPath.class), anyString()); verifyNoMoreInteractions(mockGcsUtil); List<String> zipEntryNames = new ArrayList<>(); try (ZipInputStream inputStream = new ZipInputStream(Channels.newInputStream(pipe.source()))) { for (ZipEntry entry = inputStream.getNextEntry(); entry != null; entry = inputStream.getNextEntry()) { zipEntryNames.add(entry.getName()); } } assertThat( zipEntryNames, containsInAnyOrder("directory/file.txt", "empty_directory/", "file.txt")); }
@Test public void testPackageUploadFailsWithPermissionsErrorGivesDetailedMessage() throws Exception { File tmpFile = makeFileWithContents("file.txt", "This is a test!"); when(mockGcsUtil.getObjects(anyListOf(GcsPath.class))) .thenReturn( ImmutableList.of( + "login'"))); } finally { verify(mockGcsUtil).getObjects(anyListOf(GcsPath.class)); verify(mockGcsUtil).create(any(GcsPath.class), anyString()); verifyNoMoreInteractions(mockGcsUtil);
@Test public void testPackageUploadWithLargeClasspathLogsWarning() throws Exception { File tmpFile = makeFileWithContents("file.txt", "This is a test!"); when(mockGcsUtil.getObjects(anyListOf(GcsPath.class))) .thenReturn( ImmutableList.of( StorageObjectOrIOException.create( createStorageObject(STAGING_PATH, tmpFile.length())))); List<String> classpathElements = Lists.newLinkedList(); for (int i = 0; i < 1005; ++i) { String eltName = "element" + i; classpathElements.add(eltName + '=' + tmpFile.getAbsolutePath()); } defaultPackageUtil.stageClasspathElements(classpathElements, STAGING_PATH, createOptions); logged.verifyWarn("Your classpath contains 1005 elements, which Google Cloud Dataflow"); }
@Test public void testPackageUploadWithEmptyDirectorySucceeds() throws Exception { Pipe pipe = Pipe.open(); File tmpDirectory = tmpFolder.newFolder("folder"); when(mockGcsUtil.getObjects(anyListOf(GcsPath.class))) .thenReturn( ImmutableList.of( StorageObjectOrIOException.create(new FileNotFoundException("some/path")))); when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink()); List<DataflowPackage> targets = defaultPackageUtil.stageClasspathElements( ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH, createOptions); DataflowPackage target = Iterables.getOnlyElement(targets); verify(mockGcsUtil).getObjects(anyListOf(GcsPath.class)); verify(mockGcsUtil).create(any(GcsPath.class), anyString()); verifyNoMoreInteractions(mockGcsUtil); assertThat(target.getName(), RegexMatcher.matches("folder-" + HASH_PATTERN + ".jar")); assertThat(target.getLocation(), equalTo(STAGING_PATH + target.getName())); try (ZipInputStream zipInputStream = new ZipInputStream(Channels.newInputStream(pipe.source()))) { assertNull(zipInputStream.getNextEntry()); } }
@Test public void testPackageUploadWithFileSucceeds() throws Exception { Pipe pipe = Pipe.open(); String contents = "This is a test!"; File tmpFile = makeFileWithContents("file.txt", contents); when(mockGcsUtil.getObjects(anyListOf(GcsPath.class))) .thenReturn( ImmutableList.of( StorageObjectOrIOException.create(new FileNotFoundException("some/path")))); when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink()); List<DataflowPackage> targets = defaultPackageUtil.stageClasspathElements( ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH, createOptions); DataflowPackage target = Iterables.getOnlyElement(targets); verify(mockGcsUtil).getObjects(anyListOf(GcsPath.class)); verify(mockGcsUtil).create(any(GcsPath.class), anyString()); verifyNoMoreInteractions(mockGcsUtil); assertThat(target.getName(), RegexMatcher.matches("file-" + HASH_PATTERN + ".txt")); assertThat(target.getLocation(), equalTo(STAGING_PATH + target.getName())); assertThat( new LineReader(Channels.newReader(pipe.source(), StandardCharsets.UTF_8.name())).readLine(), equalTo(contents)); }
when(mockGcsUtil.getObjects(anyListOf(GcsPath.class))) .thenAnswer( invocationOnMock -> {
@Test public void testStagingPreservesClasspath() throws Exception { File smallFile = makeFileWithContents("small.txt", "small"); File largeFile = makeFileWithContents("large.txt", "large contents"); when(mockGcsUtil.getObjects(anyListOf(GcsPath.class))) .thenReturn( ImmutableList.of( StorageObjectOrIOException.create(new FileNotFoundException("some/path")))); when(mockGcsUtil.create(any(GcsPath.class), anyString())) .thenAnswer(invocation -> Pipe.open().sink()); List<DataflowPackage> targets = defaultPackageUtil.stageClasspathElements( ImmutableList.of(smallFile.getAbsolutePath(), largeFile.getAbsolutePath()), STAGING_PATH, createOptions); // Verify that the packages are returned small, then large, matching input order even though // the large file would be uploaded first. assertThat(targets.get(0).getName(), startsWith("small")); assertThat(targets.get(1).getName(), startsWith("large")); }
GcsPath.fromUri("gs://testbucket/testdirectory/file4name")); when(mockGcsUtil.getObjects(eq(gcsPaths))).thenReturn(items); List<MatchResult> matchResults = gcsFileSystem.matchNonGlobs(gcsPaths);
GcsPath.fromUri("gs://testbucket/testdirectory/otherfile")); when(mockGcsUtil.getObjects(eq(gcsPaths))) .thenReturn( ImmutableList.of(
when(mockGcsUtil.getObjects(anyListOf(GcsPath.class))) .thenReturn( ImmutableList.of(