@Override public final void writeImpl(FileAwareInputStream fileAwareInputStream) throws IOException { CopyableFile copyableFile = fileAwareInputStream.getFile(); if (encryptionConfig != null) { copyableFile.setDestination(PathUtils.addExtension(copyableFile.getDestination(), "." + EncryptionConfigParser.getEncryptionType(encryptionConfig))); } Path stagingFile = getStagingFilePath(copyableFile); if (this.actualProcessedCopyableFile.isPresent()) { throw new IOException(this.getClass().getCanonicalName() + " can only process one file."); } this.actualProcessedCopyableFile = Optional.of(copyableFile); this.fs.mkdirs(stagingFile.getParent()); writeImpl(fileAwareInputStream.getInputStream(), stagingFile, copyableFile, fileAwareInputStream); this.filesWritten.incrementAndGet(); }
@Test public void testReadRecord() throws Exception { CopyableFile file = getTestCopyableFile("inputStreamExtractorTest/first.txt"); FileAwareInputStreamExtractor extractor = new FileAwareInputStreamExtractor(FileSystem.getLocal(new Configuration()), file); FileAwareInputStream fileAwareInputStream = extractor.readRecord(null); Assert.assertEquals(fileAwareInputStream.getFile().getOrigin().getPath(), file.getOrigin().getPath()); Assert.assertEquals(IOUtils.toString(fileAwareInputStream.getInputStream()), "first"); Assert.assertNull(extractor.readRecord(null)); }
/** * Applies the transformation in {@link #inputStreamTransformation} to the {@link InputStream} in the * {@link FileAwareInputStream}. */ @Override public Iterable<FileAwareInputStream> convertRecord(String outputSchema, FileAwareInputStream fileAwareInputStream, WorkUnitState workUnit) throws DataConversionException { modifyExtensionAtDestination(fileAwareInputStream.getFile()); try { InputStream newInputStream = inputStreamTransformation().apply(fileAwareInputStream.getInputStream()); return new SingleRecordIterable<>(fileAwareInputStream.toBuilder().inputStream(newInputStream).build()); } catch (RuntimeException re) { throw new DataConversionException(re); } }
@Test public void testExtensionStripping() throws DataConversionException, IOException { List<String> helloWorldFiles = ImmutableList.of("helloworld.txt.gzip", "helloworld.txt.gz"); UnGzipConverter converter = new UnGzipConverter(); FileSystem fs = FileSystem.getLocal(new Configuration()); for (String fileName: helloWorldFiles) { String filePath = "unGzipConverterTest/" + fileName; String fullPath = getClass().getClassLoader().getResource(filePath).getFile(); FileAwareInputStream fileAwareInputStream = FileAwareInputStream.builder() .file(CopyableFileUtils.getTestCopyableFile(filePath, "/tmp/" + fileName, null, null)) .inputStream(fs.open(new Path(fullPath))).build(); Iterable<FileAwareInputStream> iterable = converter.convertRecord("outputSchema", fileAwareInputStream, new WorkUnitState()); FileAwareInputStream out = iterable.iterator().next(); Assert.assertEquals(out.getFile().getDestination().getName(), "helloworld.txt"); String contents = IOUtils.toString(out.getInputStream(), StandardCharsets.UTF_8); Assert.assertEquals(contents, "helloworld\n"); } }
@Test (enabled=false) public void testConvertDifferentEncryption() throws IOException, DataConversionException { final String expectedFileContents = "2345678"; WorkUnitState workUnitState = new WorkUnitState(); workUnitState.getJobState() .setProp("converter.encrypt." + EncryptionConfigParser.ENCRYPTION_ALGORITHM_KEY, "insecure_shift"); try (DecryptConverter converter = new DecryptConverter()) { converter.init(workUnitState); FileSystem fs = FileSystem.getLocal(new Configuration()); URL url = getClass().getClassLoader().getResource("decryptConverterTest/decrypt-test.txt.insecure_shift"); Assert.assertNotNull(url); String testFilePath = url.getFile(); try (FSDataInputStream testFileInput = fs.open(new Path(testFilePath))) { FileAwareInputStream fileAwareInputStream = FileAwareInputStream.builder() .file(CopyableFileUtils.getTestCopyableFile()).inputStream(testFileInput).build(); fileAwareInputStream.getFile().setDestination(new Path("file:///tmp/decrypt-test.txt.insecure_shift")); Iterable<FileAwareInputStream> iterable = converter.convertRecord("outputSchema", fileAwareInputStream, workUnitState); FileAwareInputStream decryptedStream = Iterables.getFirst(iterable, null); Assert.assertNotNull(decryptedStream); String actual = IOUtils.toString(decryptedStream.getInputStream(), Charsets.UTF_8); Assert.assertEquals(actual, expectedFileContents); Assert.assertEquals(decryptedStream.getFile().getDestination().getName(), "decrypt-test.txt"); } } }
@Test(dataProvider = "testFileDataProvider") public void testWrite(final String filePath, final String newFileName, final String expectedText) throws Exception { String expectedFileContents = "text"; String fileNameInArchive = "text.txt"; WorkUnitState state = TestUtils.createTestWorkUnitState(); state.setProp(ConfigurationKeys.WRITER_STAGING_DIR, new Path(testTempPath, "staging").toString()); state.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, new Path(testTempPath, "output").toString()); state.setProp(ConfigurationKeys.WRITER_FILE_PATH, "writer_file_path_" + RandomStringUtils.randomAlphabetic(5)); CopyableDatasetMetadata metadata = new CopyableDatasetMetadata(new TestCopyableDataset(new Path("/source"))); CopySource.serializeCopyableDataset(state, metadata); FileAwareInputStream fileAwareInputStream = getCompressedInputStream(filePath, newFileName); CopySource.serializeCopyEntity(state, fileAwareInputStream.getFile()); TarArchiveInputStreamDataWriter dataWriter = new TarArchiveInputStreamDataWriter(state, 1, 0); dataWriter.write(fileAwareInputStream); dataWriter.commit(); // the archive file contains file test.txt Path unArchivedFilePath = new Path(fileAwareInputStream.getFile().getDestination(), fileNameInArchive); // Path at which the writer writes text.txt Path taskOutputFilePath = new Path(new Path(state.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR), fileAwareInputStream.getFile().getDatasetAndPartition(metadata).identifier()), PathUtils.withoutLeadingSeparator(unArchivedFilePath)); Assert.assertEquals(IOUtils.toString(new FileInputStream(taskOutputFilePath.toString())).trim(), expectedFileContents); }
@Override public final void writeImpl(FileAwareInputStream fileAwareInputStream) throws IOException { CopyableFile copyableFile = fileAwareInputStream.getFile(); if (encryptionConfig != null) { copyableFile.setDestination(PathUtils.addExtension(copyableFile.getDestination(), "." + EncryptionConfigParser.getEncryptionType(encryptionConfig))); } Path stagingFile = getStagingFilePath(copyableFile); if (this.actualProcessedCopyableFile.isPresent()) { throw new IOException(this.getClass().getCanonicalName() + " can only process one file."); } this.actualProcessedCopyableFile = Optional.of(copyableFile); this.fs.mkdirs(stagingFile.getParent()); writeImpl(fileAwareInputStream.getInputStream(), stagingFile, copyableFile, fileAwareInputStream); this.filesWritten.incrementAndGet(); }
/** * Applies the transformation in {@link #inputStreamTransformation} to the {@link InputStream} in the * {@link FileAwareInputStream}. */ @Override public Iterable<FileAwareInputStream> convertRecord(String outputSchema, FileAwareInputStream fileAwareInputStream, WorkUnitState workUnit) throws DataConversionException { modifyExtensionAtDestination(fileAwareInputStream.getFile()); try { InputStream newInputStream = inputStreamTransformation().apply(fileAwareInputStream.getInputStream()); return new SingleRecordIterable<>(fileAwareInputStream.toBuilder().inputStream(newInputStream).build()); } catch (RuntimeException re) { throw new DataConversionException(re); } }