private void addMergeOperation(Set<String> stageInputs, Map<String, Operation> processedOperations) { Set<String> sortedInputs = new TreeSet<>(stageInputs); String mergeOperationName = prefixedOperationName(Joiner.on(SEPARATOR).join(sortedInputs), "merge"); String mergeDescription = "Merged stages: " + Joiner.on(",").join(sortedInputs); if (processedOperations.containsKey(mergeOperationName)) { // it is possible that same stages act as an input to multiple stages. // we should still only add single merge operation for them return; } List<InputField> inputFields = new ArrayList<>(); for (String inputStage : sortedInputs) { List<String> parentStages = findParentStages(inputStage); for (String parentStage : parentStages) { Map<String, String> fieldOrigins = stageOutputsWithOrigins.get(parentStage); for (Map.Entry<String, String> fieldOrigin : fieldOrigins.entrySet()) { inputFields.add(InputField.of(fieldOrigin.getValue(), fieldOrigin.getKey())); } } } Set<String> outputs = new LinkedHashSet<>(); for (InputField inputField : inputFields) { outputs.add(inputField.getName()); } TransformOperation merge = new TransformOperation(mergeOperationName, mergeDescription, inputFields, new ArrayList<>(outputs)); processedOperations.put(merge.getName(), merge); }
/** * Helper method to compute the outgoing connections * @param currentOperation current operation which needs to evaluated * @param visitedOperations a {@link Set} containing all the operations which has been processed so * far. */ private void computeOutgoing(Operation currentOperation, Set<Operation> visitedOperations) { // mark this operation if not already done if (!visitedOperations.add(currentOperation)) { return; } // base condition: if the current operation is write we have reached the end if (currentOperation.getType() == OperationType.WRITE) { return; } // if this is a transform operation then traverse down to all the outgoing operation from this operation // expanding further the traversal and exploring the operations if (currentOperation.getType() == OperationType.TRANSFORM) { TransformOperation transform = (TransformOperation) currentOperation; Set<Operation> operations = operationOutgoingConnections.get(transform.getName()); for (Operation operation : operations) { computeOutgoing(operation, visitedOperations); } } }
/** * Helper method to compute the outgoing connections * @param currentOperation current operation which needs to evaluated * @param visitedOperations a {@link Set} containing all the operations which has been processed so * far. */ private void computeOutgoing(Operation currentOperation, Set<Operation> visitedOperations) { // mark this operation if not already done if (!visitedOperations.add(currentOperation)) { return; } // base condition: if the current operation is write we have reached the end if (currentOperation.getType() == OperationType.WRITE) { return; } // if this is a transform operation then traverse down to all the outgoing operation from this operation // expanding further the traversal and exploring the operations if (currentOperation.getType() == OperationType.TRANSFORM) { TransformOperation transform = (TransformOperation) currentOperation; Set<Operation> operations = operationOutgoingConnections.get(transform.getName()); for (Operation operation : operations) { computeOutgoing(operation, visitedOperations); } } }