@Override public final void writeImpl(FileAwareInputStream fileAwareInputStream) throws IOException { CopyableFile copyableFile = fileAwareInputStream.getFile(); if (encryptionConfig != null) { copyableFile.setDestination(PathUtils.addExtension(copyableFile.getDestination(), "." + EncryptionConfigParser.getEncryptionType(encryptionConfig))); } Path stagingFile = getStagingFilePath(copyableFile); if (this.actualProcessedCopyableFile.isPresent()) { throw new IOException(this.getClass().getCanonicalName() + " can only process one file."); } this.actualProcessedCopyableFile = Optional.of(copyableFile); this.fs.mkdirs(stagingFile.getParent()); writeImpl(fileAwareInputStream.getInputStream(), stagingFile, copyableFile, fileAwareInputStream); this.filesWritten.incrementAndGet(); }
@Test public void testReadRecord() throws Exception { CopyableFile file = getTestCopyableFile("inputStreamExtractorTest/first.txt"); FileAwareInputStreamExtractor extractor = new FileAwareInputStreamExtractor(FileSystem.getLocal(new Configuration()), file); FileAwareInputStream fileAwareInputStream = extractor.readRecord(null); Assert.assertEquals(fileAwareInputStream.getFile().getOrigin().getPath(), file.getOrigin().getPath()); Assert.assertEquals(IOUtils.toString(fileAwareInputStream.getInputStream()), "first"); Assert.assertNull(extractor.readRecord(null)); }
@Test(dataProvider = "fileDataProvider") public void testGz(final String filePath, final String expectedText) throws Exception { UnGzipConverter converter = new UnGzipConverter(); FileSystem fs = FileSystem.getLocal(new Configuration()); String fullPath = getClass().getClassLoader().getResource(filePath).getFile(); FileAwareInputStream fileAwareInputStream = FileAwareInputStream.builder() .file(CopyableFileUtils.getTestCopyableFile(filePath)).inputStream(fs.open(new Path(fullPath))).build(); Iterable<FileAwareInputStream> iterable = converter.convertRecord("outputSchema", fileAwareInputStream, new WorkUnitState()); String actual = readGzipStreamAsString(Iterables.getFirst(iterable, null).getInputStream()); Assert.assertEquals(actual.trim(), expectedText); }
/** * Applies the transformation in {@link #inputStreamTransformation} to the {@link InputStream} in the * {@link FileAwareInputStream}. */ @Override public Iterable<FileAwareInputStream> convertRecord(String outputSchema, FileAwareInputStream fileAwareInputStream, WorkUnitState workUnit) throws DataConversionException { modifyExtensionAtDestination(fileAwareInputStream.getFile()); try { InputStream newInputStream = inputStreamTransformation().apply(fileAwareInputStream.getInputStream()); return new SingleRecordIterable<>(fileAwareInputStream.toBuilder().inputStream(newInputStream).build()); } catch (RuntimeException re) { throw new DataConversionException(re); } }
@Test public void testExtensionStripping() throws DataConversionException, IOException { List<String> helloWorldFiles = ImmutableList.of("helloworld.txt.gzip", "helloworld.txt.gz"); UnGzipConverter converter = new UnGzipConverter(); FileSystem fs = FileSystem.getLocal(new Configuration()); for (String fileName: helloWorldFiles) { String filePath = "unGzipConverterTest/" + fileName; String fullPath = getClass().getClassLoader().getResource(filePath).getFile(); FileAwareInputStream fileAwareInputStream = FileAwareInputStream.builder() .file(CopyableFileUtils.getTestCopyableFile(filePath, "/tmp/" + fileName, null, null)) .inputStream(fs.open(new Path(fullPath))).build(); Iterable<FileAwareInputStream> iterable = converter.convertRecord("outputSchema", fileAwareInputStream, new WorkUnitState()); FileAwareInputStream out = iterable.iterator().next(); Assert.assertEquals(out.getFile().getDestination().getName(), "helloworld.txt"); String contents = IOUtils.toString(out.getInputStream(), StandardCharsets.UTF_8); Assert.assertEquals(contents, "helloworld\n"); } }
@Test (enabled=false) public void testConvertGpgRecord() throws Exception { final String expectedFileContents = "123456789"; final String passphrase = "12"; DecryptConverter converter = new DecryptConverter(); WorkUnitState workUnitState = new WorkUnitState(); try { setEncryptedPassphrase(passphrase, workUnitState); converter.init(workUnitState); FileSystem fs = FileSystem.getLocal(new Configuration()); URL url = getClass().getClassLoader().getResource("decryptConverterTest/decrypt-test.txt.gpg"); Assert.assertNotNull(url); String gpgFilePath = url.getFile(); try (FSDataInputStream gpgFileInput = fs.open(new Path(gpgFilePath))) { FileAwareInputStream fileAwareInputStream = FileAwareInputStream.builder() .file(CopyableFileUtils.getTestCopyableFile()).inputStream(gpgFileInput).build(); Iterable<FileAwareInputStream> iterable = converter.convertRecord("outputSchema", fileAwareInputStream, workUnitState); fileAwareInputStream = Iterables.getFirst(iterable, null); Assert.assertNotNull(fileAwareInputStream); String actual = IOUtils.toString(fileAwareInputStream.getInputStream(), Charsets.UTF_8); Assert.assertEquals(actual, expectedFileContents); } } finally { deleteMasterPwdFile(); converter.close(); } }
@Test (enabled=false) public void testConvertDifferentEncryption() throws IOException, DataConversionException { final String expectedFileContents = "2345678"; WorkUnitState workUnitState = new WorkUnitState(); workUnitState.getJobState() .setProp("converter.encrypt." + EncryptionConfigParser.ENCRYPTION_ALGORITHM_KEY, "insecure_shift"); try (DecryptConverter converter = new DecryptConverter()) { converter.init(workUnitState); FileSystem fs = FileSystem.getLocal(new Configuration()); URL url = getClass().getClassLoader().getResource("decryptConverterTest/decrypt-test.txt.insecure_shift"); Assert.assertNotNull(url); String testFilePath = url.getFile(); try (FSDataInputStream testFileInput = fs.open(new Path(testFilePath))) { FileAwareInputStream fileAwareInputStream = FileAwareInputStream.builder() .file(CopyableFileUtils.getTestCopyableFile()).inputStream(testFileInput).build(); fileAwareInputStream.getFile().setDestination(new Path("file:///tmp/decrypt-test.txt.insecure_shift")); Iterable<FileAwareInputStream> iterable = converter.convertRecord("outputSchema", fileAwareInputStream, workUnitState); FileAwareInputStream decryptedStream = Iterables.getFirst(iterable, null); Assert.assertNotNull(decryptedStream); String actual = IOUtils.toString(decryptedStream.getInputStream(), Charsets.UTF_8); Assert.assertEquals(actual, expectedFileContents); Assert.assertEquals(decryptedStream.getFile().getDestination().getName(), "decrypt-test.txt"); } } }
@Override public final void writeImpl(FileAwareInputStream fileAwareInputStream) throws IOException { CopyableFile copyableFile = fileAwareInputStream.getFile(); if (encryptionConfig != null) { copyableFile.setDestination(PathUtils.addExtension(copyableFile.getDestination(), "." + EncryptionConfigParser.getEncryptionType(encryptionConfig))); } Path stagingFile = getStagingFilePath(copyableFile); if (this.actualProcessedCopyableFile.isPresent()) { throw new IOException(this.getClass().getCanonicalName() + " can only process one file."); } this.actualProcessedCopyableFile = Optional.of(copyableFile); this.fs.mkdirs(stagingFile.getParent()); writeImpl(fileAwareInputStream.getInputStream(), stagingFile, copyableFile, fileAwareInputStream); this.filesWritten.incrementAndGet(); }
/** * Applies the transformation in {@link #inputStreamTransformation} to the {@link InputStream} in the * {@link FileAwareInputStream}. */ @Override public Iterable<FileAwareInputStream> convertRecord(String outputSchema, FileAwareInputStream fileAwareInputStream, WorkUnitState workUnit) throws DataConversionException { modifyExtensionAtDestination(fileAwareInputStream.getFile()); try { InputStream newInputStream = inputStreamTransformation().apply(fileAwareInputStream.getInputStream()); return new SingleRecordIterable<>(fileAwareInputStream.toBuilder().inputStream(newInputStream).build()); } catch (RuntimeException re) { throw new DataConversionException(re); } }