Path datasetWriterOutputPath = new Path(this.writerOutputDir, datasetAndPartition.identifier()); log.info(String.format("[%s] Publishing fileSet from %s for dataset %s", datasetAndPartition.identifier(), datasetWriterOutputPath, metadata.getDatasetURN())); log.info(String.format("[%s] Found %d prePublish steps and %d postPublish steps.", datasetAndPartition.identifier(), prePublish.size(), postPublish.size())); log.info(String.format("[%s] No copyable files in dataset. Proceeding to postpublish steps.", datasetAndPartition.identifier()));
@Override public void publishData(Collection<? extends WorkUnitState> states) throws IOException { /* * This mapping is used to set WorkingState of all {@link WorkUnitState}s to {@link * WorkUnitState.WorkingState#COMMITTED} after a {@link CopyableDataset} is successfully published */ Multimap<CopyEntity.DatasetAndPartition, WorkUnitState> datasets = groupByFileSet(states); boolean allDatasetsPublished = true; for (CopyEntity.DatasetAndPartition datasetAndPartition : datasets.keySet()) { try { this.publishFileSet(datasetAndPartition, datasets.get(datasetAndPartition)); } catch (Throwable e) { CopyEventSubmitterHelper.submitFailedDatasetPublish(this.eventSubmitter, datasetAndPartition); log.error("Failed to publish " + datasetAndPartition.getDataset().getDatasetURN(), e); allDatasetsPublished = false; } } if (!allDatasetsPublished) { throw new IOException("Not all datasets published successfully"); } }
Path datasetWriterOutputPath = new Path(this.writerOutputDir, datasetAndPartition.identifier()); log.info(String.format("[%s] Publishing fileSet from %s for dataset %s", datasetAndPartition.identifier(), datasetWriterOutputPath, metadata.getDatasetURN())); log.info(String.format("[%s] Found %d prePublish steps and %d postPublish steps.", datasetAndPartition.identifier(), prePublish.size(), postPublish.size())); log.info(String.format("[%s] No copyable files in dataset. Proceeding to postpublish steps.", datasetAndPartition.identifier()));
GobblinMetrics.addCustomTagToState(workUnit, new Tag<>(CopyEventSubmitterHelper.DATASET_ROOT_METADATA_NAME, this.copyableDataset.datasetURN())); workUnit.setProp(ConfigurationKeys.DATASET_URN_KEY, datasetAndPartition.toString()); workUnit.setProp(SlaEventKeys.DATASET_URN_KEY, this.copyableDataset.datasetURN()); workUnit.setProp(SlaEventKeys.PARTITION_KEY, copyEntity.getFileSet());
@Override public void publishData(Collection<? extends WorkUnitState> states) throws IOException { /* * This mapping is used to set WorkingState of all {@link WorkUnitState}s to {@link * WorkUnitState.WorkingState#COMMITTED} after a {@link CopyableDataset} is successfully published */ Multimap<CopyEntity.DatasetAndPartition, WorkUnitState> datasets = groupByFileSet(states); boolean allDatasetsPublished = true; for (CopyEntity.DatasetAndPartition datasetAndPartition : datasets.keySet()) { try { this.publishFileSet(datasetAndPartition, datasets.get(datasetAndPartition)); } catch (Throwable e) { CopyEventSubmitterHelper.submitFailedDatasetPublish(this.eventSubmitter, datasetAndPartition); log.error("Failed to publish " + datasetAndPartition.getDataset().getDatasetURN(), e); allDatasetsPublished = false; } } if (!allDatasetsPublished) { throw new IOException("Not all datasets published successfully"); } }
protected static Path getPartitionOutputRoot(Path outputDir, CopyEntity.DatasetAndPartition datasetAndPartition) { return new Path(outputDir, datasetAndPartition.identifier()); }
static void submitFailedDatasetPublish(EventSubmitter eventSubmitter, CopyEntity.DatasetAndPartition datasetAndPartition) { eventSubmitter.submit(DATASET_PUBLISHED_FAILED_EVENT_NAME, ImmutableMap.of(DATASET_ROOT_METADATA_NAME, datasetAndPartition.getDataset().getDatasetURN())); }
static void submitSuccessfulDatasetPublish(EventSubmitter eventSubmitter, CopyEntity.DatasetAndPartition datasetAndPartition, String originTimestamp, String upstreamTimestamp, Map<String, String> additionalMetadata) { SlaEventSubmitter.builder().eventSubmitter(eventSubmitter).eventName(DATASET_PUBLISHED_EVENT_NAME) .datasetUrn(datasetAndPartition.getDataset().getDatasetURN()).partition(datasetAndPartition.getPartition()) .originTimestamp(originTimestamp).upstreamTimestamp(upstreamTimestamp).additionalMetadata(additionalMetadata) .build().submit(); }
/** * Get a {@link DatasetAndPartition} instance for the dataset and fileSet this {@link CopyEntity} belongs to. * @param metadata {@link CopyableDatasetMetadata} for the dataset this {@link CopyEntity} belongs to. * @return an instance of {@link DatasetAndPartition} */ public DatasetAndPartition getDatasetAndPartition(CopyableDatasetMetadata metadata) { return new DatasetAndPartition(metadata, getFileSet()); }
cf.getDatasetAndPartition(metadata).identifier()), cf.getDestination()); Assert.assertTrue(writtenFilePath.getName().endsWith("gpg"), "Expected encryption name to be appended to destination");
cf.getDatasetAndPartition(metadata).identifier()), cf.getDestination()); Assert.assertTrue(writtenFilePath.getName().endsWith("gpg"), "Expected encryption name to be appended to destination");
GobblinMetrics.addCustomTagToState(workUnit, new Tag<>(CopyEventSubmitterHelper.DATASET_ROOT_METADATA_NAME, this.copyableDataset.datasetURN())); workUnit.setProp(ConfigurationKeys.DATASET_URN_KEY, datasetAndPartition.toString()); workUnit.setProp(SlaEventKeys.DATASET_URN_KEY, this.copyableDataset.datasetURN()); workUnit.setProp(SlaEventKeys.PARTITION_KEY, copyEntity.getFileSet());
dataWriter.commit(); Path writtenFilePath = new Path(new Path(state.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR), cf.getDatasetAndPartition(metadata).identifier()), cf.getDestination()); Assert.assertEquals(IOUtils.toString(new FileInputStream(writtenFilePath.toString())), streamString.substring(0, (int) splitLen));
@Test public void testWriteWithEncryption() throws Exception { byte[] streamString = "testEncryptedContents".getBytes("UTF-8"); byte[] expectedContents = new byte[streamString.length]; for (int i = 0; i < streamString.length; i++) { expectedContents[i] = (byte)((streamString[i] + 1) % 256); } FileStatus status = fs.getFileStatus(testTempPath); OwnerAndPermission ownerAndPermission = new OwnerAndPermission(status.getOwner(), status.getGroup(), new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL)); CopyableFile cf = CopyableFileUtils.getTestCopyableFile(ownerAndPermission); CopyableDatasetMetadata metadata = new CopyableDatasetMetadata(new TestCopyableDataset(new Path("/source"))); WorkUnitState state = TestUtils.createTestWorkUnitState(); state.setProp(ConfigurationKeys.WRITER_STAGING_DIR, new Path(testTempPath, "staging").toString()); state.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, new Path(testTempPath, "output").toString()); state.setProp(ConfigurationKeys.WRITER_FILE_PATH, RandomStringUtils.randomAlphabetic(5)); state.setProp("writer.encrypt." + EncryptionConfigParser.ENCRYPTION_ALGORITHM_KEY, "insecure_shift"); CopySource.serializeCopyEntity(state, cf); CopySource.serializeCopyableDataset(state, metadata); FileAwareInputStreamDataWriter dataWriter = new FileAwareInputStreamDataWriter(state, 1, 0); FileAwareInputStream fileAwareInputStream = FileAwareInputStream.builder().file(cf) .inputStream(StreamUtils.convertStream(new ByteArrayInputStream(streamString))).build(); dataWriter.write(fileAwareInputStream); dataWriter.commit(); Path writtenFilePath = new Path(new Path(state.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR), cf.getDatasetAndPartition(metadata).identifier()), cf.getDestination()); Assert.assertTrue(writtenFilePath.getName().endsWith("insecure_shift"), "Expected encryption name to be appended to destination"); Assert.assertEquals(IOUtils.toByteArray(new FileInputStream(writtenFilePath.toString())), expectedContents); }
@Test(dataProvider = "testFileDataProvider") public void testWrite(final String filePath, final String newFileName, final String expectedText) throws Exception { String expectedFileContents = "text"; String fileNameInArchive = "text.txt"; WorkUnitState state = TestUtils.createTestWorkUnitState(); state.setProp(ConfigurationKeys.WRITER_STAGING_DIR, new Path(testTempPath, "staging").toString()); state.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, new Path(testTempPath, "output").toString()); state.setProp(ConfigurationKeys.WRITER_FILE_PATH, "writer_file_path_" + RandomStringUtils.randomAlphabetic(5)); CopyableDatasetMetadata metadata = new CopyableDatasetMetadata(new TestCopyableDataset(new Path("/source"))); CopySource.serializeCopyableDataset(state, metadata); FileAwareInputStream fileAwareInputStream = getCompressedInputStream(filePath, newFileName); CopySource.serializeCopyEntity(state, fileAwareInputStream.getFile()); TarArchiveInputStreamDataWriter dataWriter = new TarArchiveInputStreamDataWriter(state, 1, 0); dataWriter.write(fileAwareInputStream); dataWriter.commit(); // the archive file contains file test.txt Path unArchivedFilePath = new Path(fileAwareInputStream.getFile().getDestination(), fileNameInArchive); // Path at which the writer writes text.txt Path taskOutputFilePath = new Path(new Path(state.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR), fileAwareInputStream.getFile().getDatasetAndPartition(metadata).identifier()), PathUtils.withoutLeadingSeparator(unArchivedFilePath)); Assert.assertEquals(IOUtils.toString(new FileInputStream(taskOutputFilePath.toString())).trim(), expectedFileContents); }
@Test public void testWrite() throws Exception { String streamString = "testContents"; FileStatus status = fs.getFileStatus(testTempPath); OwnerAndPermission ownerAndPermission = new OwnerAndPermission(status.getOwner(), status.getGroup(), new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL)); CopyableFile cf = CopyableFileUtils.getTestCopyableFile(ownerAndPermission); CopyableDatasetMetadata metadata = new CopyableDatasetMetadata(new TestCopyableDataset(new Path("/source"))); WorkUnitState state = TestUtils.createTestWorkUnitState(); state.setProp(ConfigurationKeys.WRITER_STAGING_DIR, new Path(testTempPath, "staging").toString()); state.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, new Path(testTempPath, "output").toString()); state.setProp(ConfigurationKeys.WRITER_FILE_PATH, RandomStringUtils.randomAlphabetic(5)); CopySource.serializeCopyEntity(state, cf); CopySource.serializeCopyableDataset(state, metadata); FileAwareInputStreamDataWriter dataWriter = new FileAwareInputStreamDataWriter(state, 1, 0); FileAwareInputStream fileAwareInputStream = FileAwareInputStream.builder().file(cf) .inputStream(StreamUtils.convertStream(IOUtils.toInputStream(streamString))).build(); dataWriter.write(fileAwareInputStream); dataWriter.commit(); Path writtenFilePath = new Path(new Path(state.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR), cf.getDatasetAndPartition(metadata).identifier()), cf.getDestination()); Assert.assertEquals(IOUtils.toString(new FileInputStream(writtenFilePath.toString())), streamString); }
protected static Path getPartitionOutputRoot(Path outputDir, CopyEntity.DatasetAndPartition datasetAndPartition) { return new Path(outputDir, datasetAndPartition.identifier()); }
static void submitFailedDatasetPublish(EventSubmitter eventSubmitter, CopyEntity.DatasetAndPartition datasetAndPartition) { eventSubmitter.submit(DATASET_PUBLISHED_FAILED_EVENT_NAME, ImmutableMap.of(DATASET_ROOT_METADATA_NAME, datasetAndPartition.getDataset().getDatasetURN())); }
private void createDatasetFiles() throws IOException { // Create writer output files Path datasetWriterOutputPath = new Path(writerOutputPath, copyEntity.getDatasetAndPartition(this.metadata).identifier()); Path outputPathWithCurrentDirectory = new Path(datasetWriterOutputPath, PathUtils.withoutLeadingSeparator(this.targetPath)); for (String path : relativeFilePaths) { Path pathToCreate = new Path(outputPathWithCurrentDirectory, path); fs.mkdirs(pathToCreate.getParent()); fs.create(pathToCreate); } }
/** * Get a {@link DatasetAndPartition} instance for the dataset and fileSet this {@link CopyEntity} belongs to. * @param metadata {@link CopyableDatasetMetadata} for the dataset this {@link CopyEntity} belongs to. * @return an instance of {@link DatasetAndPartition} */ public DatasetAndPartition getDatasetAndPartition(CopyableDatasetMetadata metadata) { return new DatasetAndPartition(metadata, getFileSet()); }