/** * Deserialize a {@link List} of {@link CopyEntity}s from a {@link State} at {@link #SERIALIZED_COPYABLE_FILE} */ public static CopyEntity deserializeCopyEntity(State state) { return CopyEntity.deserialize(state.getProp(SERIALIZED_COPYABLE_FILE)); }
/** * Create a {@link Multimap} that maps a {@link CopyableDataset} to all {@link WorkUnitState}s that belong to this * {@link CopyableDataset}. This mapping is used to set WorkingState of all {@link WorkUnitState}s to * {@link WorkUnitState.WorkingState#COMMITTED} after a {@link CopyableDataset} is successfully published. */ private static Multimap<CopyEntity.DatasetAndPartition, WorkUnitState> groupByFileSet( Collection<? extends WorkUnitState> states) { Multimap<CopyEntity.DatasetAndPartition, WorkUnitState> datasetRoots = ArrayListMultimap.create(); for (WorkUnitState workUnitState : states) { CopyEntity file = CopySource.deserializeCopyEntity(workUnitState); CopyEntity.DatasetAndPartition datasetAndPartition = file.getDatasetAndPartition( CopyableDatasetMetadata.deserialize(workUnitState.getProp(CopySource.SERIALIZED_COPYABLE_DATASET))); datasetRoots.put(datasetAndPartition, workUnitState); } return datasetRoots; }
@Override public String toString() { return serialize(this); }
@Test public void testSerializeDeserialzeNulls() throws Exception { CopyableFile copyableFile = new CopyableFile(null, null, new OwnerAndPermission("owner", "group", FsPermission.getDefault()), Lists.newArrayList(new OwnerAndPermission(null, "group2", FsPermission .getDefault())), "checksum".getBytes(), PreserveAttributes.fromMnemonicString(""), "", 0, 0, Maps.<String, String>newHashMap(), ""); String serialized = CopyEntity.serialize(copyableFile); CopyEntity deserialized = CopyEntity.deserialize(serialized); Assert.assertEquals(deserialized, copyableFile); }
CopyEntity.DatasetAndPartition datasetAndPartition = copyEntity.getDatasetAndPartition(metadata); workUnit.setProp(ConfigurationKeys.DATASET_URN_KEY, datasetAndPartition.toString()); workUnit.setProp(SlaEventKeys.DATASET_URN_KEY, this.copyableDataset.datasetURN()); workUnit.setProp(SlaEventKeys.PARTITION_KEY, copyEntity.getFileSet()); setWorkUnitWeight(workUnit, copyEntity, minWorkUnitWeight); setWorkUnitWatermark(workUnit, watermarkGenerator, copyEntity);
/** * Deserializes the serialized {@link CopyEntity} string. * * @param serialized string * @return a new instance of {@link CopyEntity} */ public static CopyEntity deserialize(String serialized) { return GSON.fromJson(getSerializedWithNewPackage(serialized), CopyEntity.class); }
/** * Get a {@link DatasetAndPartition} instance for the dataset and fileSet this {@link CopyEntity} belongs to. * @param metadata {@link CopyableDatasetMetadata} for the dataset this {@link CopyEntity} belongs to. * @return an instance of {@link DatasetAndPartition} */ public DatasetAndPartition getDatasetAndPartition(CopyableDatasetMetadata metadata) { return new DatasetAndPartition(metadata, getFileSet()); }
/** * Used for simulate runs. Should explain what this copy entity will do. */ public String explain() { return toString(); }
private static String computeGuid(State state, CopyEntity file) throws IOException { Optional<Guid> stateGuid = CopySource.getWorkUnitGuid(state); if (stateGuid.isPresent()) { return Guid.combine(file.guid(), stateGuid.get()).toString(); } throw new IOException("State does not contain a guid."); } }
try { CopyEntity copyEntity = deserializeCopyEntity(workUnit); log.info(copyEntity.explain()); } catch (Exception e) { log.info("Cannot deserialize CopyEntity from wu : {}", workUnit.toString());
@Test public void testSerializeDeserialzeList() throws Exception { List<CopyEntity> copyEntities = ImmutableList.<CopyEntity>of(CopyableFileUtils.getTestCopyableFile(), CopyableFileUtils.getTestCopyableFile(), CopyableFileUtils.getTestCopyableFile()); String serialized = CopyEntity.serializeList(copyEntities); List<CopyEntity> deserialized = CopyEntity.deserializeList(serialized); Assert.assertEquals(deserialized, copyEntities); }
@Test public void testSerializeDeserialze() throws Exception { CopyableFile copyableFile = new CopyableFile(new FileStatus(10, false, 12, 100, 12345, new Path("/path")), new Path("/destination"), new OwnerAndPermission("owner", "group", FsPermission.getDefault()), Lists.newArrayList(new OwnerAndPermission("owner2", "group2", FsPermission.getDefault())), "checksum".getBytes(), PreserveAttributes.fromMnemonicString(""), "", 0, 0, Maps .<String, String>newHashMap(), ""); DatasetDescriptor dataset = new DatasetDescriptor("hive", "db.table"); PartitionDescriptor descriptor = new PartitionDescriptor("datepartition=2018/09/05", dataset); copyableFile.setDestinationData(descriptor); String s = CopyEntity.serialize(copyableFile); CopyEntity de = CopyEntity.deserialize(s); Assert.assertEquals(de, copyableFile); }
CopyEntity.DatasetAndPartition datasetAndPartition = copyEntity.getDatasetAndPartition(metadata); workUnit.setProp(ConfigurationKeys.DATASET_URN_KEY, datasetAndPartition.toString()); workUnit.setProp(SlaEventKeys.DATASET_URN_KEY, this.copyableDataset.datasetURN()); workUnit.setProp(SlaEventKeys.PARTITION_KEY, copyEntity.getFileSet()); setWorkUnitWeight(workUnit, copyEntity, minWorkUnitWeight); setWorkUnitWatermark(workUnit, watermarkGenerator, copyEntity);
/** * Deserializes the serialized {@link List} of {@link CopyEntity} string. * Used together with {@link #serializeList(List)} * * @param serialized string * @return a new {@link List} of {@link CopyEntity}s */ public static List<CopyEntity> deserializeList(String serialized) { return GSON.fromJson(getSerializedWithNewPackage(serialized), new TypeToken<List<CopyEntity>>() {}.getType()); }
private static Iterator<FileSet<CopyEntity>> partitionCopyableFiles(Dataset dataset, Collection<? extends CopyEntity> files) { Map<String, FileSet.Builder<CopyEntity>> partitionBuildersMaps = Maps.newHashMap(); for (CopyEntity file : files) { if (!partitionBuildersMaps.containsKey(file.getFileSet())) { partitionBuildersMaps.put(file.getFileSet(), new FileSet.Builder<>(file.getFileSet(), dataset)); } partitionBuildersMaps.get(file.getFileSet()).add(file); } return Iterators.transform(partitionBuildersMaps.values().iterator(), new Function<FileSet.Builder<CopyEntity>, FileSet<CopyEntity>>() { @Nullable @Override public FileSet<CopyEntity> apply(@Nonnull FileSet.Builder<CopyEntity> input) { return input.build(); } }); } }
@Override public Guid guid() throws IOException { return Guid.fromStrings(toString()); }
private static String computeGuid(State state, CopyEntity file) throws IOException { Optional<Guid> stateGuid = CopySource.getWorkUnitGuid(state); if (stateGuid.isPresent()) { return Guid.combine(file.guid(), stateGuid.get()).toString(); } throw new IOException("State does not contain a guid."); } }
try { CopyEntity copyEntity = deserializeCopyEntity(workUnit); log.info(copyEntity.explain()); } catch (Exception e) { log.info("Cannot deserialize CopyEntity from wu : {}", workUnit.toString());
/** * Serialize a {@link List} of {@link CopyEntity}s into a {@link State} at {@link #SERIALIZED_COPYABLE_FILE} */ public static void serializeCopyEntity(State state, CopyEntity copyEntity) { state.setProp(SERIALIZED_COPYABLE_FILE, CopyEntity.serialize(copyEntity)); state.setProp(COPY_ENTITY_CLASS, copyEntity.getClass().getName()); }
private void createDatasetFiles() throws IOException { // Create writer output files Path datasetWriterOutputPath = new Path(writerOutputPath, copyEntity.getDatasetAndPartition(this.metadata).identifier()); Path outputPathWithCurrentDirectory = new Path(datasetWriterOutputPath, PathUtils.withoutLeadingSeparator(this.targetPath)); for (String path : relativeFilePaths) { Path pathToCreate = new Path(outputPathWithCurrentDirectory, path); fs.mkdirs(pathToCreate.getParent()); fs.create(pathToCreate); } }