@Test public void testSerializeDeserialize() throws Exception { CopyableDataset copyableDataset = new TestCopyableDataset(); Path target = new Path("/target"); CopyableDatasetMetadata metadata = new CopyableDatasetMetadata(copyableDataset); String serialized = metadata.serialize(); CopyableDatasetMetadata deserialized = CopyableDatasetMetadata.deserialize(serialized); Assert.assertEquals(copyableDataset.datasetURN(), deserialized.getDatasetURN()); }
/** * Deserializes the serialized {@link CopyableDatasetMetadata} string. * * @param serialized string * @return a new instance of {@link CopyableDatasetMetadata} */ public static CopyableDatasetMetadata deserialize(String serialized) { return GSON.fromJson(getSerializedWithNewPackage(serialized), CopyableDatasetMetadata.class); }
/** * @return a unique string identifier for this {@link DatasetAndPartition}. */ @SuppressWarnings("deprecation") public String identifier() { return Hex.encodeHexString(DigestUtils.sha(this.dataset.toString() + this.partition)); } }
@Test public void testHashCode() throws Exception { CopyableDataset copyableDataset = new TestCopyableDataset(); Path target = new Path("/target"); CopyableDatasetMetadata metadata = new CopyableDatasetMetadata(copyableDataset); String serialized = metadata.serialize(); CopyableDatasetMetadata deserialized = CopyableDatasetMetadata.deserialize(serialized); CopyableDatasetMetadata deserialized2 = CopyableDatasetMetadata.deserialize(serialized); Multimap<CopyableDatasetMetadata, WorkUnitState> datasetRoots = ArrayListMultimap.create(); datasetRoots.put(deserialized, new WorkUnitState()); datasetRoots.put(deserialized2, new WorkUnitState()); Assert.assertEquals(datasetRoots.keySet().size(), 1); }
@Test public void testDeleteOnSource() throws Exception { State state = getTestState("testDeleteOnSource"); Path testMethodTempPath = new Path(testClassTempPath, "testDeleteOnSource"); DeletingCopyDataPublisher copyDataPublisher = closer.register(new DeletingCopyDataPublisher(state)); File outputDir = new File(testMethodTempPath.toString(), "task-output/jobid/1f042f494d1fe2198e0e71a17faa233f33b5099b"); outputDir.mkdirs(); outputDir.deleteOnExit(); WorkUnitState wus = new WorkUnitState(); CopyableDataset copyableDataset = new TestCopyableDataset(new Path("origin")); CopyableDatasetMetadata metadata = new CopyableDatasetMetadata(copyableDataset); CopyEntity cf = CopyableFileUtils.createTestCopyableFile(new Path(testMethodTempPath, "test.txt").toString()); CopySource.serializeCopyableDataset(wus, metadata); CopySource.serializeCopyEntity(wus, cf); Assert.assertTrue(fs.exists(new Path(testMethodTempPath, "test.txt"))); wus.setWorkingState(WorkingState.SUCCESSFUL); copyDataPublisher.publishData(ImmutableList.of(wus)); Assert.assertFalse(fs.exists(new Path(testMethodTempPath, "test.txt"))); }
.deserialize(datasetWorkUnitStates.iterator().next().getProp(CopySource.SERIALIZED_COPYABLE_DATASET)); Path datasetWriterOutputPath = new Path(this.writerOutputDir, datasetAndPartition.identifier()); datasetWriterOutputPath, metadata.getDatasetURN()));
static void submitFailedDatasetPublish(EventSubmitter eventSubmitter, CopyEntity.DatasetAndPartition datasetAndPartition) { eventSubmitter.submit(DATASET_PUBLISHED_FAILED_EVENT_NAME, ImmutableMap.of(DATASET_ROOT_METADATA_NAME, datasetAndPartition.getDataset().getDatasetURN())); }
/** * Deserialize a {@link CopyableDataset} from a {@link State} at {@link #SERIALIZED_COPYABLE_DATASET} */ public static CopyableDatasetMetadata deserializeCopyableDataset(State state) { return CopyableDatasetMetadata.deserialize(state.getProp(SERIALIZED_COPYABLE_DATASET)); } }
/** * Serialize a {@link CopyableDataset} into a {@link State} at {@link #SERIALIZED_COPYABLE_DATASET} */ public static void serializeCopyableDataset(State state, CopyableDatasetMetadata copyableDataset) { state.setProp(SERIALIZED_COPYABLE_DATASET, copyableDataset.serialize()); }
public TestDatasetManager(Path testMethodTempPath, State state, String datasetTargetPath, List<String> relativeFilePaths) throws IOException { this.fs = FileSystem.getLocal(new Configuration()); this.copyableDataset = new TestCopyableDataset(new Path("origin")); this.metadata = new CopyableDatasetMetadata(this.copyableDataset); this.relativeFilePaths = relativeFilePaths; this.writerOutputPath = new Path(state.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR)); this.targetPath = new Path(testMethodTempPath, datasetTargetPath); FileStatus file = new FileStatus(0, false, 0, 0, 0, new Path("/file")); FileSystem fs = FileSystem.getLocal(new Configuration()); this.copyEntity = CopyableFile.fromOriginAndDestination(fs, file, new Path("/destination"), CopyConfiguration.builder(fs, state.getProperties()).preserve(PreserveAttributes.fromMnemonicString("")) .build()).build(); fs.mkdirs(testMethodTempPath); log.info("Created a temp directory for test at " + testMethodTempPath); }
.deserialize(datasetWorkUnitStates.iterator().next().getProp(CopySource.SERIALIZED_COPYABLE_DATASET)); Path datasetWriterOutputPath = new Path(this.writerOutputDir, datasetAndPartition.identifier()); datasetWriterOutputPath, metadata.getDatasetURN()));
@Override public void publishData(Collection<? extends WorkUnitState> states) throws IOException { /* * This mapping is used to set WorkingState of all {@link WorkUnitState}s to {@link * WorkUnitState.WorkingState#COMMITTED} after a {@link CopyableDataset} is successfully published */ Multimap<CopyEntity.DatasetAndPartition, WorkUnitState> datasets = groupByFileSet(states); boolean allDatasetsPublished = true; for (CopyEntity.DatasetAndPartition datasetAndPartition : datasets.keySet()) { try { this.publishFileSet(datasetAndPartition, datasets.get(datasetAndPartition)); } catch (Throwable e) { CopyEventSubmitterHelper.submitFailedDatasetPublish(this.eventSubmitter, datasetAndPartition); log.error("Failed to publish " + datasetAndPartition.getDataset().getDatasetURN(), e); allDatasetsPublished = false; } } if (!allDatasetsPublished) { throw new IOException("Not all datasets published successfully"); } }
/** * Create a {@link Multimap} that maps a {@link CopyableDataset} to all {@link WorkUnitState}s that belong to this * {@link CopyableDataset}. This mapping is used to set WorkingState of all {@link WorkUnitState}s to * {@link WorkUnitState.WorkingState#COMMITTED} after a {@link CopyableDataset} is successfully published. */ private static Multimap<CopyEntity.DatasetAndPartition, WorkUnitState> groupByFileSet( Collection<? extends WorkUnitState> states) { Multimap<CopyEntity.DatasetAndPartition, WorkUnitState> datasetRoots = ArrayListMultimap.create(); for (WorkUnitState workUnitState : states) { CopyEntity file = CopySource.deserializeCopyEntity(workUnitState); CopyEntity.DatasetAndPartition datasetAndPartition = file.getDatasetAndPartition( CopyableDatasetMetadata.deserialize(workUnitState.getProp(CopySource.SERIALIZED_COPYABLE_DATASET))); datasetRoots.put(datasetAndPartition, workUnitState); } return datasetRoots; }
/** * Serialize a {@link CopyableDataset} into a {@link State} at {@link #SERIALIZED_COPYABLE_DATASET} */ public static void serializeCopyableDataset(State state, CopyableDatasetMetadata copyableDataset) { state.setProp(SERIALIZED_COPYABLE_DATASET, copyableDataset.serialize()); }
private Collection<WorkUnit> createMockSplitWorkUnits(FileSystem fs, long fileLen, long blockSize, long maxSplitSize) throws Exception { FileStatus file = mock(FileStatus.class); when(file.getLen()).thenReturn(fileLen); when(file.getBlockSize()).thenReturn(blockSize); URI uri = new URI("hdfs", "dummyhost", "/test", "test"); Path path = new Path(uri); when(fs.getUri()).thenReturn(uri); CopyableDatasetMetadata cdm = new CopyableDatasetMetadata(new TestCopyableDataset(path)); CopyableFile cf = CopyableFileUtils.getTestCopyableFile(); CopyableFile spy = spy(cf); doReturn(file).when(spy).getFileStatus(); doReturn(blockSize).when(spy).getBlockSize(any(FileSystem.class)); doReturn(path).when(spy).getDestination(); WorkUnit wu = WorkUnit.createEmpty(); wu.setProp(DistcpFileSplitter.MAX_SPLIT_SIZE_KEY, maxSplitSize); wu.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_OUTPUT_DIR, 1, 0), path.toString()); CopySource.setWorkUnitGuid(wu, Guid.fromStrings(wu.toString())); CopySource.serializeCopyEntity(wu, cf); CopySource.serializeCopyableDataset(wu, cdm); return DistcpFileSplitter.splitFile(spy, wu, fs); }
static void submitSuccessfulDatasetPublish(EventSubmitter eventSubmitter, CopyEntity.DatasetAndPartition datasetAndPartition, String originTimestamp, String upstreamTimestamp, Map<String, String> additionalMetadata) { SlaEventSubmitter.builder().eventSubmitter(eventSubmitter).eventName(DATASET_PUBLISHED_EVENT_NAME) .datasetUrn(datasetAndPartition.getDataset().getDatasetURN()).partition(datasetAndPartition.getPartition()) .originTimestamp(originTimestamp).upstreamTimestamp(upstreamTimestamp).additionalMetadata(additionalMetadata) .build().submit(); }
this.outputDir = getOutputDir(state); this.copyableDatasetMetadata = CopyableDatasetMetadata.deserialize(state.getProp(CopySource.SERIALIZED_COPYABLE_DATASET)); this.recoveryHelper = new RecoveryHelper(this.fs, state); this.actualProcessedCopyableFile = Optional.absent();
/** * @return a unique string identifier for this {@link DatasetAndPartition}. */ @SuppressWarnings("deprecation") public String identifier() { return Hex.encodeHexString(DigestUtils.sha(this.dataset.toString() + this.partition)); } }
/** * Deserializes the serialized {@link CopyableDatasetMetadata} string. * * @param serialized string * @return a new instance of {@link CopyableDatasetMetadata} */ public static CopyableDatasetMetadata deserialize(String serialized) { return GSON.fromJson(getSerializedWithNewPackage(serialized), CopyableDatasetMetadata.class); }
@Test(dataProvider = "testFileDataProvider") public void testWrite(final String filePath, final String newFileName, final String expectedText) throws Exception { String expectedFileContents = "text"; String fileNameInArchive = "text.txt"; WorkUnitState state = TestUtils.createTestWorkUnitState(); state.setProp(ConfigurationKeys.WRITER_STAGING_DIR, new Path(testTempPath, "staging").toString()); state.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, new Path(testTempPath, "output").toString()); state.setProp(ConfigurationKeys.WRITER_FILE_PATH, "writer_file_path_" + RandomStringUtils.randomAlphabetic(5)); CopyableDatasetMetadata metadata = new CopyableDatasetMetadata(new TestCopyableDataset(new Path("/source"))); CopySource.serializeCopyableDataset(state, metadata); FileAwareInputStream fileAwareInputStream = getCompressedInputStream(filePath, newFileName); CopySource.serializeCopyEntity(state, fileAwareInputStream.getFile()); TarArchiveInputStreamDataWriter dataWriter = new TarArchiveInputStreamDataWriter(state, 1, 0); dataWriter.write(fileAwareInputStream); dataWriter.commit(); // the archive file contains file test.txt Path unArchivedFilePath = new Path(fileAwareInputStream.getFile().getDestination(), fileNameInArchive); // Path at which the writer writes text.txt Path taskOutputFilePath = new Path(new Path(state.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR), fileAwareInputStream.getFile().getDatasetAndPartition(metadata).identifier()), PathUtils.withoutLeadingSeparator(unArchivedFilePath)); Assert.assertEquals(IOUtils.toString(new FileInputStream(taskOutputFilePath.toString())).trim(), expectedFileContents); }