private void addMergeOperation(Set<String> stageInputs, Map<String, Operation> processedOperations) { Set<String> sortedInputs = new TreeSet<>(stageInputs); String mergeOperationName = prefixedOperationName(Joiner.on(SEPARATOR).join(sortedInputs), "merge"); String mergeDescription = "Merged stages: " + Joiner.on(",").join(sortedInputs); if (processedOperations.containsKey(mergeOperationName)) { // it is possible that same stages act as an input to multiple stages. // we should still only add single merge operation for them return; } List<InputField> inputFields = new ArrayList<>(); for (String inputStage : sortedInputs) { List<String> parentStages = findParentStages(inputStage); for (String parentStage : parentStages) { Map<String, String> fieldOrigins = stageOutputsWithOrigins.get(parentStage); for (Map.Entry<String, String> fieldOrigin : fieldOrigins.entrySet()) { inputFields.add(InputField.of(fieldOrigin.getValue(), fieldOrigin.getKey())); } } } Set<String> outputs = new LinkedHashSet<>(); for (InputField inputField : inputFields) { outputs.add(inputField.getName()); } TransformOperation merge = new TransformOperation(mergeOperationName, mergeDescription, inputFields, new ArrayList<>(outputs)); processedOperations.put(merge.getName(), merge); }
@Test(expected = IllegalArgumentException.class) public void testSelfReferentialOperations() { TransformOperation parse = new TransformOperation("parse", "parse", Arrays.asList(InputField.of("read", "body"), InputField.of("parse", "name")), "name", "address"); FieldLineageInfo.getTopologicallySortedOperations(Collections.singleton(parse)); }
@Test(expected = IllegalArgumentException.class) public void testCycle() { EndPoint readEndPoint = EndPoint.of("ns", "file1"); EndPoint writeEndPoint = EndPoint.of("ns", "file2"); ReadOperation read = new ReadOperation("read", "read", readEndPoint, "offset", "body"); TransformOperation parse = new TransformOperation("parse", "parse", Arrays.asList(InputField.of("read", "body"), InputField.of("normalize", "name")), "name", "address"); TransformOperation normalize = new TransformOperation("normalize", "normalize", Collections.singletonList(InputField.of("parse", "name")), "name"); WriteOperation write = new WriteOperation("write", "writing to another file", writeEndPoint, Arrays.asList(InputField.of("normalize", "name"), InputField.of("parse", "address"))); List<Operation> operations = new ArrayList<>(); operations.add(parse); operations.add(read); operations.add(normalize); operations.add(write); FieldLineageInfo.getTopologicallySortedOperations(new HashSet<>(operations)); }
TransformOperation parse = new TransformOperation("parse", "parsing body", Collections.singletonList(InputField.of("read", "body")), "first_name", "last_name"); TransformOperation concat = new TransformOperation("concat", "concatinating the fields", Arrays.asList(InputField.of("parse", "first_name"), InputField.of("parse", "last_name")), "name");
List<InputField> inputFields = createInputFields(transform.getInputFields(), stageName, processedOperations); newOperation = new TransformOperation(newOperationName, transform.getDescription(), inputFields, transform.getOutputFields()); currentOperationOutputs.addAll(transform.getOutputFields());
@Test(expected = IllegalArgumentException.class) public void testCycleWithNonExistentOperationNames() { EndPoint readEndPoint = EndPoint.of("ns", "file1"); EndPoint writeEndPoint = EndPoint.of("ns", "file2"); ReadOperation read = new ReadOperation("read", "read", readEndPoint, "offset", "body"); TransformOperation parse = new TransformOperation("parse", "parse", Arrays.asList(InputField.of("read", "body"), InputField.of("normalize", "name"), InputField.of("nop1", "field1")), "name", "address"); TransformOperation normalize = new TransformOperation("normalize", "normalize", Arrays.asList(InputField.of("parse", "name"), InputField.of("nop2", "field2")), "name"); WriteOperation write = new WriteOperation("write", "writing to another file", writeEndPoint, Arrays.asList(InputField.of("normalize", "name"), InputField.of("parse", "address"), InputField.of("nop3", "field3"))); List<Operation> operations = new ArrayList<>(); operations.add(parse); operations.add(read); operations.add(normalize); operations.add(write); FieldLineageInfo.getTopologicallySortedOperations(new HashSet<>(operations)); }
TransformOperation parse = new TransformOperation("parse", "parse descr", Collections.singletonList(InputField.of("read", "body")), "name", "address"); TransformOperation normalize = new TransformOperation("normalize", "normalize descr", Collections.singletonList(InputField.of("parse", "address")), "address");
TransformOperation parse = new TransformOperation("parse", "parse descr", Collections.singletonList(InputField.of("read", "body")), "name", "address"); TransformOperation normalize = new TransformOperation("normalize", "normalize descr", Collections.singletonList(InputField.of("parse", "address")), "address");
@Test public void testValidOperations() { ReadOperation read = new ReadOperation("read", "some read", EndPoint.of("endpoint1"), "offset", "body"); TransformOperation parse = new TransformOperation("parse", "parse body", Collections.singletonList(InputField.of("read", "body")), "name", "address");
@Test public void testInvalidOperations() { ReadOperation read = new ReadOperation("read", "some read", EndPoint.of("endpoint1"), "offset", "body"); TransformOperation parse = new TransformOperation("parse", "parse body", Collections.singletonList(InputField.of("read", "body")), "name", "address"); TransformOperation invalidOrigin = new TransformOperation("anotherparse", "parse body", Arrays.asList(InputField.of("invalid", "body"), InputField.of("anotherinvalid", "body")),
TransformOperation parse = new TransformOperation("parse", "parsing body", Collections.singletonList(InputField.of("read", "body")), "id", "name", "address", "zip");
ProgramRunId spark1Run1 = spark1.run(RunIds.generate(100)); ReadOperation read = new ReadOperation("read", "some read", EndPoint.of("ns", "endpoint1"), "offset", "body"); TransformOperation parse = new TransformOperation("parse", "parse body", Collections.singletonList(InputField.of("read", "body")), "name", "address"); operations2.add(read); operations2.add(parse); TransformOperation normalize = new TransformOperation("normalize", "normalize address", Collections.singletonList(InputField.of("parse", "address")), "address");
TransformOperation parse = new TransformOperation("parse", "parsing body", Collections.singletonList(InputField.of("pRead", "body")), "id", "name", "address"); TransformOperation codeGen = new TransformOperation("codeGen", "Generate secure code", Arrays.asList(InputField.of("parse", "id"), InputField.of("cRead", "id")), "id");
TransformOperation parse = new TransformOperation("parse", "parsing body", Collections.singletonList(InputField.of("read", "body")), "first_name", "last_name"); TransformOperation concat = new TransformOperation("concat", "concatinating the fields", Arrays.asList(InputField.of("parse", "first_name"), InputField.of("parse", "last_name")), "name");
TransformOperation merge = new TransformOperation("merge", "merging fields", Arrays.asList(InputField.of("read1", "offset"), InputField.of("read2", "offset"), InputField.of("read2", "body")), "offset", "body"); TransformOperation parse = new TransformOperation("parse", "parsing body", Collections.singletonList(InputField.of("merge", "body")), "name", "address");
new HashSet<>(Arrays.asList(read, write)))); TransformOperation normalize = new TransformOperation("normalize", "normalizing offset", Collections.singletonList(InputField.of("read", "offset")), "offset");