private Map<EndPoint, Set<String>> computeDestinationFields() { if (writeOperations == null) { computeAndValidateFieldLineageInfo(this.operations); } Map<EndPoint, Set<String>> destinationFields = new HashMap<>(); for (WriteOperation write : this.writeOperations) { Set<String> endPointFields = destinationFields.computeIfAbsent(write.getDestination(), k -> new HashSet<>()); for (InputField field : write.getInputs()) { endPointFields.add(field.getName()); } } return destinationFields; }
case WRITE: WriteOperation write = (WriteOperation) operation; EndPoint destination = write.getDestination(); if (destination == null) { throw new IllegalArgumentException(String.format("Destination endpoint cannot be null for the write " + "operation '%s'.", write.getName())); origins = write.getInputs().stream().map(InputField::getOrigin).collect(Collectors.toSet());
FieldWriteOperation write = (FieldWriteOperation) fieldOperation; inputFields = createInputFields(write.getInputFields(), stageName, processedOperations); newOperation = new WriteOperation(newOperationName, write.getDescription(), write.getSink(), inputFields); break;
/** * Checks whether the given field is used in the next operations or not * * @param nextOperation the next operation which should either be a {@link TransformOperation} or {@link * WriteOperation} * @param inputField the field whose usage needs to be checked * @return true if the field is used in the nextOperation */ private boolean containsInputField(Operation nextOperation, InputField inputField) { Set<InputField> inputFields = new HashSet<>(); if (OperationType.WRITE == nextOperation.getType()) { WriteOperation nextWrite = (WriteOperation) nextOperation; inputFields = new HashSet<>(nextWrite.getInputs()); } else if (OperationType.TRANSFORM == nextOperation.getType()) { TransformOperation nextTransform = (TransformOperation) nextOperation; inputFields = new HashSet<>(nextTransform.getInputs()); } // if the next operation inputFields does contains the given fieldName return true return inputFields.contains(inputField); }
private void populateSourcesAndDestinations() { sources = new HashSet<>(); destinations = new HashSet<>(); for (Operation operation : operations) { if (OperationType.READ == operation.getType()) { ReadOperation read = (ReadOperation) operation; sources.add(read.getSource()); } else if (OperationType.WRITE == operation.getType()) { WriteOperation write = (WriteOperation) operation; destinations.add(write.getDestination()); } } }
WriteOperation write = new WriteOperation("write_op", "writing data to file", EndPoint.of("myns", "another_file"), writeInput);
/** * Checks whether the given field is used in the next operations or not * * @param nextOperation the next operation which should either be a {@link TransformOperation} or {@link * WriteOperation} * @param inputField the field whose usage needs to be checked * @return true if the field is used in the nextOperation */ private boolean containsInputField(Operation nextOperation, InputField inputField) { Set<InputField> inputFields = new HashSet<>(); if (OperationType.WRITE == nextOperation.getType()) { WriteOperation nextWrite = (WriteOperation) nextOperation; inputFields = new HashSet<>(nextWrite.getInputs()); } else if (OperationType.TRANSFORM == nextOperation.getType()) { TransformOperation nextTransform = (TransformOperation) nextOperation; inputFields = new HashSet<>(nextTransform.getInputs()); } // if the next operation inputFields does contains the given fieldName return true return inputFields.contains(inputField); }
private void populateSourcesAndDestinations() { sources = new HashSet<>(); destinations = new HashSet<>(); for (Operation operation : operations) { if (OperationType.READ == operation.getType()) { ReadOperation read = (ReadOperation) operation; sources.add(read.getSource()); } else if (OperationType.WRITE == operation.getType()) { WriteOperation write = (WriteOperation) operation; destinations.add(write.getDestination()); } } }
@Test(expected = IllegalArgumentException.class) public void testCycle() { EndPoint readEndPoint = EndPoint.of("ns", "file1"); EndPoint writeEndPoint = EndPoint.of("ns", "file2"); ReadOperation read = new ReadOperation("read", "read", readEndPoint, "offset", "body"); TransformOperation parse = new TransformOperation("parse", "parse", Arrays.asList(InputField.of("read", "body"), InputField.of("normalize", "name")), "name", "address"); TransformOperation normalize = new TransformOperation("normalize", "normalize", Collections.singletonList(InputField.of("parse", "name")), "name"); WriteOperation write = new WriteOperation("write", "writing to another file", writeEndPoint, Arrays.asList(InputField.of("normalize", "name"), InputField.of("parse", "address"))); List<Operation> operations = new ArrayList<>(); operations.add(parse); operations.add(read); operations.add(normalize); operations.add(write); FieldLineageInfo.getTopologicallySortedOperations(new HashSet<>(operations)); }
private Map<EndPoint, Set<String>> computeDestinationFields() { if (writeOperations == null) { computeAndValidateFieldLineageInfo(this.operations); } Map<EndPoint, Set<String>> destinationFields = new HashMap<>(); for (WriteOperation write : this.writeOperations) { Set<String> endPointFields = destinationFields.computeIfAbsent(write.getDestination(), k -> new HashSet<>()); for (InputField field : write.getInputs()) { endPointFields.add(field.getName()); } } return destinationFields; }
case WRITE: WriteOperation write = (WriteOperation) operation; EndPoint destination = write.getDestination(); if (destination == null) { throw new IllegalArgumentException(String.format("Destination endpoint cannot be null for the write " + "operation '%s'.", write.getName())); origins = write.getInputs().stream().map(InputField::getOrigin).collect(Collectors.toSet());
case WRITE: WriteOperation write = (WriteOperation) operation; inputFields.addAll(write.getInputs());
@Test public void testDisjointBranches() { // read1 -----> write1 // read2 -----> write2 ReadOperation read1 = new ReadOperation("read1", "read descr", EndPoint.of("ns", "input1"), "offset", "body"); WriteOperation write1 = new WriteOperation("write1", "write descr", EndPoint.of("ns", "output"), InputField.of("read1", "offset")); ReadOperation read2 = new ReadOperation("read2", "read descr", EndPoint.of("ns", "input2"), "offset", "body"); WriteOperation write2 = new WriteOperation("write2", "write descr", EndPoint.of("ns", "output"), InputField.of("read2", "offset")); Set<Operation> operations = new LinkedHashSet<>(); operations.add(write1); operations.add(write2); operations.add(read2); operations.add(read1); List<Operation> topologicallySortedOperations = FieldLineageInfo.getTopologicallySortedOperations(operations); assertBefore(topologicallySortedOperations, read1, write1); assertBefore(topologicallySortedOperations, read2, write2); }
private Map<EndPointField, Set<EndPointField>> computeIncomingSummary() { if (writeOperations == null) { computeAndValidateFieldLineageInfo(this.operations); } Map<EndPointField, Set<EndPointField>> summary = new HashMap<>(); for (WriteOperation write : writeOperations) { List<InputField> inputs = write.getInputs(); for (InputField input : inputs) { computeIncomingSummaryHelper(new EndPointField(write.getDestination(), input.getName()), operationsMap.get(input.getOrigin()), write, summary); } } return summary; }
case WRITE: WriteOperation write = (WriteOperation) operation; inputFields.addAll(write.getInputs());
Collections.singletonList(InputField.of("read", "body")), "name", "address"); WriteOperation write = new WriteOperation("write", "write data", EndPoint.of("ns", "endpoint2"), Arrays.asList(InputField.of("read", "offset"), InputField.of("parse", "name"), WriteOperation anotherWrite = new WriteOperation("write", "write data", EndPoint.of("myns", "endpoint2"), Arrays.asList(InputField.of("read", "offset"), InputField.of("parse", "name"),
private Map<EndPointField, Set<EndPointField>> computeIncomingSummary() { if (writeOperations == null) { computeAndValidateFieldLineageInfo(this.operations); } Map<EndPointField, Set<EndPointField>> summary = new HashMap<>(); for (WriteOperation write : writeOperations) { List<InputField> inputs = write.getInputs(); for (InputField input : inputs) { computeIncomingSummaryHelper(new EndPointField(write.getDestination(), input.getName()), operationsMap.get(input.getOrigin()), write, summary); } } return summary; }
if (OperationType.WRITE == previousOperation.getType()) { WriteOperation previousWrite = (WriteOperation) previousOperation; inputFields = new HashSet<>(previousWrite.getInputs()); } else if (OperationType.TRANSFORM == previousOperation.getType()) { TransformOperation previousTransform = (TransformOperation) previousOperation;
@Test(expected = IllegalArgumentException.class) public void testCycleWithNonExistentOperationNames() { EndPoint readEndPoint = EndPoint.of("ns", "file1"); EndPoint writeEndPoint = EndPoint.of("ns", "file2"); ReadOperation read = new ReadOperation("read", "read", readEndPoint, "offset", "body"); TransformOperation parse = new TransformOperation("parse", "parse", Arrays.asList(InputField.of("read", "body"), InputField.of("normalize", "name"), InputField.of("nop1", "field1")), "name", "address"); TransformOperation normalize = new TransformOperation("normalize", "normalize", Arrays.asList(InputField.of("parse", "name"), InputField.of("nop2", "field2")), "name"); WriteOperation write = new WriteOperation("write", "writing to another file", writeEndPoint, Arrays.asList(InputField.of("normalize", "name"), InputField.of("parse", "address"), InputField.of("nop3", "field3"))); List<Operation> operations = new ArrayList<>(); operations.add(parse); operations.add(read); operations.add(normalize); operations.add(write); FieldLineageInfo.getTopologicallySortedOperations(new HashSet<>(operations)); }
for (WriteOperation write : writeOperations) { if (!write.getDestination().equals(destinationField.getEndPoint())) { continue; write.getInputs().stream().filter(input -> input.getName().equals(destinationField.getField())) .collect(Collectors.toSet());