e.getPropertyValue(DuplicateEdgeGroupProperty.class); if (duplicateEdgeGroupProperty.isPresent()) { final String duplicateGroupId = duplicateEdgeGroupProperty.get().getGroupId(); edgeGroupToIrEdge.computeIfAbsent(duplicateGroupId, k -> new ArrayList<>()).add(e); final DuplicateEdgeGroupPropertyValue duplicateEdgeGroupProperty = e.getPropertyValue(DuplicateEdgeGroupProperty.class).get(); if (firstDuplicateEdgeValue.isRepresentativeEdgeDecided()) { duplicateEdgeGroupProperty.setRepresentativeEdgeId(firstDuplicateEdgeValue.getRepresentativeEdgeId()); } else { duplicateEdgeGroupProperty.setRepresentativeEdgeId(firstEdge.getId()); duplicateEdgeGroupProperty.setGroupSize(edges.size()); }); });
/** * See {@link RuntimeIdManager#generateBlockIdWildcard(String, int)} for information on block wildcards. * @param producerTaskIndex to use. * @return wildcard block id that corresponds to "ANY" task attempt of the task index. */ private String generateWildCardBlockId(final int producerTaskIndex) { final Optional<DuplicateEdgeGroupPropertyValue> duplicateDataProperty = runtimeEdge.getPropertyValue(DuplicateEdgeGroupProperty.class); if (!duplicateDataProperty.isPresent() || duplicateDataProperty.get().getGroupSize() <= 1) { return RuntimeIdManager.generateBlockIdWildcard(runtimeEdge.getId(), producerTaskIndex); } final String duplicateEdgeId = duplicateDataProperty.get().getRepresentativeEdgeId(); return RuntimeIdManager.generateBlockIdWildcard(duplicateEdgeId, producerTaskIndex); }
/** * Marks duplicate edges with DuplicateEdgeGroupProperty. */ public void markDuplicateEdges() { nonIterativeIncomingEdges.forEach(((irVertex, irEdges) -> irEdges.forEach(irEdge -> { irEdge.setProperty( DuplicateEdgeGroupProperty.of(new DuplicateEdgeGroupPropertyValue(String.valueOf(duplicateEdgeGroupId)))); duplicateEdgeGroupId++; }))); }
@Override public DAG<IRVertex, IREdge> apply(final DAG<IRVertex, IREdge> dag) { final HashMap<String, Integer> groupIdToGroupSize = new HashMap<>(); dag.topologicalDo(vertex -> dag.getIncomingEdgesOf(vertex) .forEach(e -> { final Optional<DuplicateEdgeGroupPropertyValue> duplicateEdgeGroupProperty = e.getPropertyValue(DuplicateEdgeGroupProperty.class); if (duplicateEdgeGroupProperty.isPresent()) { final String groupId = duplicateEdgeGroupProperty.get().getGroupId(); final Integer currentCount = groupIdToGroupSize.getOrDefault(groupId, 0); groupIdToGroupSize.put(groupId, currentCount + 1); } })); dag.topologicalDo(vertex -> dag.getIncomingEdgesOf(vertex) .forEach(e -> { final Optional<DuplicateEdgeGroupPropertyValue> duplicateEdgeGroupProperty = e.getPropertyValue(DuplicateEdgeGroupProperty.class); if (duplicateEdgeGroupProperty.isPresent()) { final String groupId = duplicateEdgeGroupProperty.get().getGroupId(); if (groupIdToGroupSize.containsKey(groupId)) { duplicateEdgeGroupProperty.get().setGroupSize(groupIdToGroupSize.get(groupId)); } } })); return dag; } }
cachedEdge.getPropertyValue(DuplicateEdgeGroupProperty.class) .orElseThrow(() -> new PlanAppenderException("Cached edge does not have duplicated edge group property.")); duplicateEdgeGroupPropertyValue.setGroupSize(duplicateEdgeGroupPropertyValue.getGroupSize() + 1); newEdge.getExecutionProperties().put(DuplicateEdgeGroupProperty.of(duplicateEdgeGroupPropertyValue)); } else {
physicalStageEdge.getPropertyValue(DuplicateEdgeGroupProperty.class); final String representativeEdgeId = dupProp.isPresent() ? dupProp.get().getRepresentativeEdgeId() : physicalStageEdge.getId();
/** * Get the expected number of data read according to the communication pattern of the edge and * the parallelism of destination vertex. * * @return the expected number of data read. */ private int getExpectedRead() { final Optional<DuplicateEdgeGroupPropertyValue> duplicateDataProperty = runtimeEdge.getPropertyValue(DuplicateEdgeGroupProperty.class); final int duplicatedDataMultiplier = duplicateDataProperty.isPresent() ? duplicateDataProperty.get().getGroupSize() : 1; final int readForABlock = CommunicationPatternProperty.Value.OneToOne.equals( runtimeEdge.getPropertyValue(CommunicationPatternProperty.class).orElseThrow( () -> new RuntimeException("No communication pattern on this edge."))) ? 1 : dstIrVertex.getPropertyValue(ParallelismProperty.class).orElseThrow( () -> new RuntimeException("No parallelism property on the destination vertex.")); return readForABlock * duplicatedDataMultiplier; } }
@Override public DAG<IRVertex, IREdge> apply(final DAG<IRVertex, IREdge> dag) { final HashMap<String, Integer> groupIdToGroupSize = new HashMap<>(); dag.topologicalDo(vertex -> dag.getIncomingEdgesOf(vertex) .forEach(e -> { final Optional<DuplicateEdgeGroupPropertyValue> duplicateEdgeGroupProperty = e.getPropertyValue(DuplicateEdgeGroupProperty.class); if (duplicateEdgeGroupProperty.isPresent()) { final String groupId = duplicateEdgeGroupProperty.get().getGroupId(); final Integer currentCount = groupIdToGroupSize.getOrDefault(groupId, 0); groupIdToGroupSize.put(groupId, currentCount + 1); } })); dag.topologicalDo(vertex -> dag.getIncomingEdgesOf(vertex) .forEach(e -> { final Optional<DuplicateEdgeGroupPropertyValue> duplicateEdgeGroupProperty = e.getPropertyValue(DuplicateEdgeGroupProperty.class); if (duplicateEdgeGroupProperty.isPresent()) { final String groupId = duplicateEdgeGroupProperty.get().getGroupId(); if (groupIdToGroupSize.containsKey(groupId)) { duplicateEdgeGroupProperty.get().setGroupSize(groupIdToGroupSize.get(groupId)); } } })); return dag; } }
cachedEdge.getPropertyValue(DuplicateEdgeGroupProperty.class) .orElseThrow(() -> new PlanAppenderException("Cached edge does not have duplicated edge group property.")); duplicateEdgeGroupPropertyValue.setGroupSize(duplicateEdgeGroupPropertyValue.getGroupSize() + 1); newEdge.getExecutionProperties().put(DuplicateEdgeGroupProperty.of(duplicateEdgeGroupPropertyValue)); } else {
physicalStageEdge.getPropertyValue(DuplicateEdgeGroupProperty.class); final String representativeEdgeId = dupProp.isPresent() ? dupProp.get().getRepresentativeEdgeId() : physicalStageEdge.getId();
/** * Get the expected number of data read according to the communication pattern of the edge and * the parallelism of destination vertex. * * @return the expected number of data read. */ private int getExpectedRead() { final Optional<DuplicateEdgeGroupPropertyValue> duplicateDataProperty = runtimeEdge.getPropertyValue(DuplicateEdgeGroupProperty.class); final int duplicatedDataMultiplier = duplicateDataProperty.isPresent() ? duplicateDataProperty.get().getGroupSize() : 1; final int readForABlock = CommunicationPatternProperty.Value.OneToOne.equals( runtimeEdge.getPropertyValue(CommunicationPatternProperty.class).orElseThrow( () -> new RuntimeException("No communication pattern on this edge."))) ? 1 : dstIrVertex.getPropertyValue(ParallelismProperty.class).orElseThrow( () -> new RuntimeException("No parallelism property on the destination vertex.")); return readForABlock * duplicatedDataMultiplier; } }
e.getPropertyValue(DuplicateEdgeGroupProperty.class); if (duplicateEdgeGroupProperty.isPresent()) { final String duplicateGroupId = duplicateEdgeGroupProperty.get().getGroupId(); edgeGroupToIrEdge.computeIfAbsent(duplicateGroupId, k -> new ArrayList<>()).add(e); final DuplicateEdgeGroupPropertyValue duplicateEdgeGroupProperty = e.getPropertyValue(DuplicateEdgeGroupProperty.class).get(); if (firstDuplicateEdgeValue.isRepresentativeEdgeDecided()) { duplicateEdgeGroupProperty.setRepresentativeEdgeId(firstDuplicateEdgeValue.getRepresentativeEdgeId()); } else { duplicateEdgeGroupProperty.setRepresentativeEdgeId(firstEdge.getId()); duplicateEdgeGroupProperty.setGroupSize(edges.size()); }); });
/** * See {@link RuntimeIdManager#generateBlockIdWildcard(String, int)} for information on block wildcards. * @param producerTaskIndex to use. * @return wildcard block id that corresponds to "ANY" task attempt of the task index. */ private String generateWildCardBlockId(final int producerTaskIndex) { final Optional<DuplicateEdgeGroupPropertyValue> duplicateDataProperty = runtimeEdge.getPropertyValue(DuplicateEdgeGroupProperty.class); if (!duplicateDataProperty.isPresent() || duplicateDataProperty.get().getGroupSize() <= 1) { return RuntimeIdManager.generateBlockIdWildcard(runtimeEdge.getId(), producerTaskIndex); } final String duplicateEdgeId = duplicateDataProperty.get().getRepresentativeEdgeId(); return RuntimeIdManager.generateBlockIdWildcard(duplicateEdgeId, producerTaskIndex); }
/** * Marks duplicate edges with DuplicateEdgeGroupProperty. */ public void markDuplicateEdges() { nonIterativeIncomingEdges.forEach(((irVertex, irEdges) -> irEdges.forEach(irEdge -> { irEdge.setProperty( DuplicateEdgeGroupProperty.of(new DuplicateEdgeGroupPropertyValue(String.valueOf(duplicateEdgeGroupId)))); duplicateEdgeGroupId++; }))); }
/** * Constructor. * * @param srcTaskId the id of the source task. * @param dstIrVertex the destination IR vertex. * @param runtimeEdge the {@link RuntimeEdge}. * @param blockManagerWorker the {@link BlockManagerWorker}. */ BlockOutputWriter(final String srcTaskId, final IRVertex dstIrVertex, final RuntimeEdge<?> runtimeEdge, final BlockManagerWorker blockManagerWorker) { this.runtimeEdge = runtimeEdge; this.dstIrVertex = dstIrVertex; this.partitioner = Partitioner.getPartitioner(runtimeEdge); this.blockManagerWorker = blockManagerWorker; this.blockStoreValue = runtimeEdge.getPropertyValue(DataStoreProperty.class) .orElseThrow(() -> new RuntimeException("No data store property on the edge")); blockToWrite = blockManagerWorker.createBlock( RuntimeIdManager.generateBlockId(runtimeEdge.getId(), srcTaskId), blockStoreValue); final Optional<DuplicateEdgeGroupPropertyValue> duplicateDataProperty = runtimeEdge.getPropertyValue(DuplicateEdgeGroupProperty.class); nonDummyBlock = !duplicateDataProperty.isPresent() || duplicateDataProperty.get().getRepresentativeEdgeId().equals(runtimeEdge.getId()) || duplicateDataProperty.get().getGroupSize() <= 1; }
/** * Constructor. * * @param hashRangeMultiplier the {@link org.apache.nemo.conf.JobConf.HashRangeMultiplier}. * @param srcTaskId the id of the source task. * @param dstIrVertex the destination IR vertex. * @param runtimeEdge the {@link RuntimeEdge}. * @param blockManagerWorker the {@link BlockManagerWorker}. */ BlockOutputWriter(final int hashRangeMultiplier, final String srcTaskId, final IRVertex dstIrVertex, final RuntimeEdge<?> runtimeEdge, final BlockManagerWorker blockManagerWorker) { this.runtimeEdge = runtimeEdge; this.dstIrVertex = dstIrVertex; this.partitioner = OutputWriter.getPartitioner(runtimeEdge, hashRangeMultiplier); this.blockManagerWorker = blockManagerWorker; this.blockStoreValue = runtimeEdge.getPropertyValue(DataStoreProperty.class) .orElseThrow(() -> new RuntimeException("No data store property on the edge")); blockToWrite = blockManagerWorker.createBlock( RuntimeIdManager.generateBlockId(runtimeEdge.getId(), srcTaskId), blockStoreValue); final Optional<DuplicateEdgeGroupPropertyValue> duplicateDataProperty = runtimeEdge.getPropertyValue(DuplicateEdgeGroupProperty.class); nonDummyBlock = !duplicateDataProperty.isPresent() || duplicateDataProperty.get().getRepresentativeEdgeId().equals(runtimeEdge.getId()) || duplicateDataProperty.get().getGroupSize() <= 1; }