@Test public void testWriteWithEncryption() throws Exception { byte[] streamString = "testEncryptedContents".getBytes("UTF-8"); byte[] expectedContents = new byte[streamString.length]; for (int i = 0; i < streamString.length; i++) { expectedContents[i] = (byte)((streamString[i] + 1) % 256); } FileStatus status = fs.getFileStatus(testTempPath); OwnerAndPermission ownerAndPermission = new OwnerAndPermission(status.getOwner(), status.getGroup(), new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL)); CopyableFile cf = CopyableFileUtils.getTestCopyableFile(ownerAndPermission); CopyableDatasetMetadata metadata = new CopyableDatasetMetadata(new TestCopyableDataset(new Path("/source"))); WorkUnitState state = TestUtils.createTestWorkUnitState(); state.setProp(ConfigurationKeys.WRITER_STAGING_DIR, new Path(testTempPath, "staging").toString()); state.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, new Path(testTempPath, "output").toString()); state.setProp(ConfigurationKeys.WRITER_FILE_PATH, RandomStringUtils.randomAlphabetic(5)); state.setProp("writer.encrypt." + EncryptionConfigParser.ENCRYPTION_ALGORITHM_KEY, "insecure_shift"); CopySource.serializeCopyEntity(state, cf); CopySource.serializeCopyableDataset(state, metadata); FileAwareInputStreamDataWriter dataWriter = new FileAwareInputStreamDataWriter(state, 1, 0); FileAwareInputStream fileAwareInputStream = FileAwareInputStream.builder().file(cf) .inputStream(StreamUtils.convertStream(new ByteArrayInputStream(streamString))).build(); dataWriter.write(fileAwareInputStream); dataWriter.commit(); Path writtenFilePath = new Path(new Path(state.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR), cf.getDatasetAndPartition(metadata).identifier()), cf.getDestination()); Assert.assertTrue(writtenFilePath.getName().endsWith("insecure_shift"), "Expected encryption name to be appended to destination"); Assert.assertEquals(IOUtils.toByteArray(new FileInputStream(writtenFilePath.toString())), expectedContents); }
@Override public FileAwareInputStream readRecord(@Deprecated FileAwareInputStream reuse) throws DataRecordException, IOException { if (!this.recordRead) { Configuration conf = this.state == null ? HadoopUtils.newConfiguration() : HadoopUtils.getConfFromState(this.state); FileSystem fsFromFile = this.file.getOrigin().getPath().getFileSystem(conf); this.recordRead = true; FileAwareInputStream.FileAwareInputStreamBuilder builder = FileAwareInputStream.builder().file(this.file); if (this.file.getFileStatus().isDirectory()) { return builder.inputStream(EmptyInputStream.instance).build(); } FSDataInputStream dataInputStream = fsFromFile.open(this.file.getFileStatus().getPath()); if (this.state != null && DistcpFileSplitter.isSplitWorkUnit(this.state)) { Optional<DistcpFileSplitter.Split> split = DistcpFileSplitter.getSplit(this.state); builder.split(split); if (split.isPresent()) { dataInputStream.seek(split.get().getLowPosition()); } } builder.inputStream(MeteredInputStream.builder().in(dataInputStream).build()); return builder.build(); } return null; }
@Override public FileAwareInputStream readRecord(@Deprecated FileAwareInputStream reuse) throws DataRecordException, IOException { if (!this.recordRead) { Configuration conf = this.state == null ? HadoopUtils.newConfiguration() : HadoopUtils.getConfFromState(this.state); FileSystem fsFromFile = this.file.getOrigin().getPath().getFileSystem(conf); this.recordRead = true; FileAwareInputStream.FileAwareInputStreamBuilder builder = FileAwareInputStream.builder().file(this.file); if (this.file.getFileStatus().isDirectory()) { return builder.inputStream(EmptyInputStream.instance).build(); } FSDataInputStream dataInputStream = fsFromFile.open(this.file.getFileStatus().getPath()); if (this.state != null && DistcpFileSplitter.isSplitWorkUnit(this.state)) { Optional<DistcpFileSplitter.Split> split = DistcpFileSplitter.getSplit(this.state); builder.split(split); if (split.isPresent()) { dataInputStream.seek(split.get().getLowPosition()); } } builder.inputStream(MeteredInputStream.builder().in(dataInputStream).build()); return builder.build(); } return null; }
/** * Applies the transformation in {@link #inputStreamTransformation} to the {@link InputStream} in the * {@link FileAwareInputStream}. */ @Override public Iterable<FileAwareInputStream> convertRecord(String outputSchema, FileAwareInputStream fileAwareInputStream, WorkUnitState workUnit) throws DataConversionException { modifyExtensionAtDestination(fileAwareInputStream.getFile()); try { InputStream newInputStream = inputStreamTransformation().apply(fileAwareInputStream.getInputStream()); return new SingleRecordIterable<>(fileAwareInputStream.toBuilder().inputStream(newInputStream).build()); } catch (RuntimeException re) { throw new DataConversionException(re); } }
FileAwareInputStream fileAwareInputStream = FileAwareInputStream.builder().file(cf) .inputStream(StreamUtils.convertStream(new ByteArrayInputStream(streamString))).build(); dataWriter.write(fileAwareInputStream); dataWriter.commit();
FileAwareInputStream fileAwareInputStream = FileAwareInputStream.builder().file(cf) .inputStream(StreamUtils.convertStream(new ByteArrayInputStream(streamString))).build(); dataWriter.write(fileAwareInputStream); dataWriter.commit();
FSDataInputStream dataInputStream = StreamUtils.convertStream(IOUtils.toInputStream(streamString)); dataInputStream.seek(split.getLowPosition()); FileAwareInputStream fileAwareInputStream = FileAwareInputStream.builder().file(cf) .inputStream(dataInputStream) .split(Optional.of(split)) .build(); dataWriter.write(fileAwareInputStream); dataWriter.commit();
@Test (enabled=false) public void testConvertGpgRecord() throws Exception { final String expectedFileContents = "123456789"; final String passphrase = "12"; DecryptConverter converter = new DecryptConverter(); WorkUnitState workUnitState = new WorkUnitState(); try { setEncryptedPassphrase(passphrase, workUnitState); converter.init(workUnitState); FileSystem fs = FileSystem.getLocal(new Configuration()); URL url = getClass().getClassLoader().getResource("decryptConverterTest/decrypt-test.txt.gpg"); Assert.assertNotNull(url); String gpgFilePath = url.getFile(); try (FSDataInputStream gpgFileInput = fs.open(new Path(gpgFilePath))) { FileAwareInputStream fileAwareInputStream = FileAwareInputStream.builder() .file(CopyableFileUtils.getTestCopyableFile()).inputStream(gpgFileInput).build(); Iterable<FileAwareInputStream> iterable = converter.convertRecord("outputSchema", fileAwareInputStream, workUnitState); fileAwareInputStream = Iterables.getFirst(iterable, null); Assert.assertNotNull(fileAwareInputStream); String actual = IOUtils.toString(fileAwareInputStream.getInputStream(), Charsets.UTF_8); Assert.assertEquals(actual, expectedFileContents); } } finally { deleteMasterPwdFile(); converter.close(); } }
@Test (enabled=false) public void testConvertDifferentEncryption() throws IOException, DataConversionException { final String expectedFileContents = "2345678"; WorkUnitState workUnitState = new WorkUnitState(); workUnitState.getJobState() .setProp("converter.encrypt." + EncryptionConfigParser.ENCRYPTION_ALGORITHM_KEY, "insecure_shift"); try (DecryptConverter converter = new DecryptConverter()) { converter.init(workUnitState); FileSystem fs = FileSystem.getLocal(new Configuration()); URL url = getClass().getClassLoader().getResource("decryptConverterTest/decrypt-test.txt.insecure_shift"); Assert.assertNotNull(url); String testFilePath = url.getFile(); try (FSDataInputStream testFileInput = fs.open(new Path(testFilePath))) { FileAwareInputStream fileAwareInputStream = FileAwareInputStream.builder() .file(CopyableFileUtils.getTestCopyableFile()).inputStream(testFileInput).build(); fileAwareInputStream.getFile().setDestination(new Path("file:///tmp/decrypt-test.txt.insecure_shift")); Iterable<FileAwareInputStream> iterable = converter.convertRecord("outputSchema", fileAwareInputStream, workUnitState); FileAwareInputStream decryptedStream = Iterables.getFirst(iterable, null); Assert.assertNotNull(decryptedStream); String actual = IOUtils.toString(decryptedStream.getInputStream(), Charsets.UTF_8); Assert.assertEquals(actual, expectedFileContents); Assert.assertEquals(decryptedStream.getFile().getDestination().getName(), "decrypt-test.txt"); } } }
@Test public void testWrite() throws Exception { String streamString = "testContents"; FileStatus status = fs.getFileStatus(testTempPath); OwnerAndPermission ownerAndPermission = new OwnerAndPermission(status.getOwner(), status.getGroup(), new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL)); CopyableFile cf = CopyableFileUtils.getTestCopyableFile(ownerAndPermission); CopyableDatasetMetadata metadata = new CopyableDatasetMetadata(new TestCopyableDataset(new Path("/source"))); WorkUnitState state = TestUtils.createTestWorkUnitState(); state.setProp(ConfigurationKeys.WRITER_STAGING_DIR, new Path(testTempPath, "staging").toString()); state.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, new Path(testTempPath, "output").toString()); state.setProp(ConfigurationKeys.WRITER_FILE_PATH, RandomStringUtils.randomAlphabetic(5)); CopySource.serializeCopyEntity(state, cf); CopySource.serializeCopyableDataset(state, metadata); FileAwareInputStreamDataWriter dataWriter = new FileAwareInputStreamDataWriter(state, 1, 0); FileAwareInputStream fileAwareInputStream = FileAwareInputStream.builder().file(cf) .inputStream(StreamUtils.convertStream(IOUtils.toInputStream(streamString))).build(); dataWriter.write(fileAwareInputStream); dataWriter.commit(); Path writtenFilePath = new Path(new Path(state.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR), cf.getDatasetAndPartition(metadata).identifier()), cf.getDestination()); Assert.assertEquals(IOUtils.toString(new FileInputStream(writtenFilePath.toString())), streamString); }
/** * Find the test compressed file <code><filePath/code> in classpath and read it as a {@link FileAwareInputStream} */ private FileAwareInputStream getCompressedInputStream(final String filePath, final String newFileName) throws Exception { UnGzipConverter converter = new UnGzipConverter(); FileSystem fs = FileSystem.getLocal(new Configuration()); String fullPath = getClass().getClassLoader().getResource(filePath).getFile(); FileStatus status = fs.getFileStatus(testTempPath); OwnerAndPermission ownerAndPermission = new OwnerAndPermission(status.getOwner(), status.getGroup(), new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL)); CopyableFile cf = CopyableFileUtils.getTestCopyableFile(filePath, new Path(testTempPath, newFileName).toString(), newFileName, ownerAndPermission); FileAwareInputStream fileAwareInputStream = FileAwareInputStream.builder().file(cf) .inputStream(fs.open(new Path(fullPath))).build(); Iterable<FileAwareInputStream> iterable = converter.convertRecord("outputSchema", fileAwareInputStream, new WorkUnitState()); return Iterables.getFirst(iterable, null); }
@Test public void testExtensionStripping() throws DataConversionException, IOException { List<String> helloWorldFiles = ImmutableList.of("helloworld.txt.gzip", "helloworld.txt.gz"); UnGzipConverter converter = new UnGzipConverter(); FileSystem fs = FileSystem.getLocal(new Configuration()); for (String fileName: helloWorldFiles) { String filePath = "unGzipConverterTest/" + fileName; String fullPath = getClass().getClassLoader().getResource(filePath).getFile(); FileAwareInputStream fileAwareInputStream = FileAwareInputStream.builder() .file(CopyableFileUtils.getTestCopyableFile(filePath, "/tmp/" + fileName, null, null)) .inputStream(fs.open(new Path(fullPath))).build(); Iterable<FileAwareInputStream> iterable = converter.convertRecord("outputSchema", fileAwareInputStream, new WorkUnitState()); FileAwareInputStream out = iterable.iterator().next(); Assert.assertEquals(out.getFile().getDestination().getName(), "helloworld.txt"); String contents = IOUtils.toString(out.getInputStream(), StandardCharsets.UTF_8); Assert.assertEquals(contents, "helloworld\n"); } }
/** * Applies the transformation in {@link #inputStreamTransformation} to the {@link InputStream} in the * {@link FileAwareInputStream}. */ @Override public Iterable<FileAwareInputStream> convertRecord(String outputSchema, FileAwareInputStream fileAwareInputStream, WorkUnitState workUnit) throws DataConversionException { modifyExtensionAtDestination(fileAwareInputStream.getFile()); try { InputStream newInputStream = inputStreamTransformation().apply(fileAwareInputStream.getInputStream()); return new SingleRecordIterable<>(fileAwareInputStream.toBuilder().inputStream(newInputStream).build()); } catch (RuntimeException re) { throw new DataConversionException(re); } }
@Test(dataProvider = "fileDataProvider") public void testGz(final String filePath, final String expectedText) throws Exception { UnGzipConverter converter = new UnGzipConverter(); FileSystem fs = FileSystem.getLocal(new Configuration()); String fullPath = getClass().getClassLoader().getResource(filePath).getFile(); FileAwareInputStream fileAwareInputStream = FileAwareInputStream.builder() .file(CopyableFileUtils.getTestCopyableFile(filePath)).inputStream(fs.open(new Path(fullPath))).build(); Iterable<FileAwareInputStream> iterable = converter.convertRecord("outputSchema", fileAwareInputStream, new WorkUnitState()); String actual = readGzipStreamAsString(Iterables.getFirst(iterable, null).getInputStream()); Assert.assertEquals(actual.trim(), expectedText); }