co.cask.cdap.etl.api.lineage.field java code examples

 @Override
 public int hashCode() {
  return Objects.hash(super.hashCode(), inputFields, outputFields);
 }
}

@Override
public boolean equals(Object o) {
 if (this == o) {
  return true;
 }
 if (o == null || getClass() != o.getClass()) {
  return false;
 }
 if (!super.equals(o)) {
  return false;
 }
 FieldTransformOperation that = (FieldTransformOperation) o;
 return Objects.equals(inputFields, that.inputFields) &&
  Objects.equals(outputFields, that.outputFields);
}

for (FieldOperation fieldOperation : fieldOperations) {
 Operation newOperation = null;
 String newOperationName =  prefixedOperationName(stageName, fieldOperation.getName());
 Set<String> currentOperationOutputs = new LinkedHashSet<>();
 switch (fieldOperation.getType()) {
  case READ:
   FieldReadOperation read = (FieldReadOperation) fieldOperation;
   newOperation = new ReadOperation(newOperationName, read.getDescription(),
                    read.getSource(), read.getOutputFields());
   currentOperationOutputs.addAll(read.getOutputFields());
   break;
  case TRANSFORM:
   FieldTransformOperation transform = (FieldTransformOperation) fieldOperation;
   List<InputField> inputFields = createInputFields(transform.getInputFields(), stageName,
                            processedOperations);
   newOperation = new TransformOperation(newOperationName, transform.getDescription(), inputFields,
                      transform.getOutputFields());
   currentOperationOutputs.addAll(transform.getOutputFields());
   break;
  case WRITE:
   FieldWriteOperation write = (FieldWriteOperation) fieldOperation;
   inputFields = createInputFields(write.getInputFields(), stageName, processedOperations);
   newOperation = new WriteOperation(newOperationName, write.getDescription(), write.getSink(), inputFields);
   break;

switch (pipelineOperation.getType()) {
 case READ:
  FieldReadOperation read = (FieldReadOperation) pipelineOperation;
  updateInvalidOutputs(Collections.emptyList(), unusedOutputs, redundantOutputs);
  validInputsSoFar.addAll(read.getOutputFields());
  for (String field : read.getOutputFields()) {
   List<String> origins = unusedOutputs.computeIfAbsent(field, k -> new ArrayList<>());
   origins.add(pipelineOperation.getName());
  validateInputs(pipelineOperation.getName(), transform.getInputFields(), validInputsSoFar);
  updateInvalidOutputs(transform.getInputFields(), unusedOutputs, redundantOutputs);
  validInputsSoFar.addAll(transform.getOutputFields());
  for (String field : transform.getOutputFields()) {
   List<String> origins = unusedOutputs.computeIfAbsent(field, k -> new ArrayList<>());
   origins.add(pipelineOperation.getName());
  validateInputs(pipelineOperation.getName(), write.getInputFields(), validInputsSoFar);
  updateInvalidOutputs(write.getInputFields(), unusedOutputs, redundantOutputs);
  break;

FieldOperation joinOperation = new FieldTransformOperation("Join", JOIN_OPERATION_DESCRIPTION, joinInputs,
                              new ArrayList<>(joinOutputs));
operations.add(joinOperation);
  FieldOperation identity = new FieldTransformOperation(operationName, IDENTITY_OPERATION_DESCRIPTION,
                             Collections.singletonList(stagedInputField),
                             outputFieldInfo.name);
 FieldOperation transform = new FieldTransformOperation(operationName, RENAME_OPERATION_DESCRIPTION,
                             Collections.singletonList(stagedInputField),
                             outputFieldInfo.name);

@Override
public void prepareRun(BatchSourceContext context) throws DatasetManagementException {
 super.prepareRun(context);
 Schema schema = tableConfig.getSchema();
 if (schema != null && schema.getFields() != null) {
  FieldOperation operation =
   new FieldReadOperation("Read", "Read from Table dataset",
               EndPoint.of(context.getNamespace(), tableConfig.getName()),
               schema.getFields().stream().map(Schema.Field::getName)
                .collect(Collectors.toList()));
  context.record(Collections.singletonList(operation));
 }
}

@Override
public void prepareRun(BatchSinkContext context) throws DatasetManagementException {
 super.prepareRun(context);
 String schemaString = tableSinkConfig.getSchemaStr();
 if (schemaString != null) {
  try {
   Schema schema = Schema.parseJson(schemaString);
   if (schema.getFields() != null) {
    FieldOperation operation =
     new FieldWriteOperation("Write", "Wrote to CDAP Table",
                 EndPoint.of(context.getNamespace(), tableSinkConfig.getName()),
                 schema.getFields().stream().map(Schema.Field::getName)
                  .collect(Collectors.toList()));
    context.record(Collections.singletonList(operation));
   }
  } catch (IOException e) {
   throw new IllegalStateException("Failed to parse schema.", e);
  }
 }
}

@Override
public void prepareRun(StageSubmitterContext context) throws Exception {
 super.prepareRun(context);
 List<String> inputFields = new ArrayList<>();
 List<String> outputFields = new ArrayList<>();
 Schema inputSchema = context.getInputSchema();
 if (SchemaValidator.canRecordLineage(inputSchema, "input")) {
  //noinspection ConstantConditions
  inputFields = inputSchema.getFields().stream().map(Schema.Field::getName).collect(Collectors.toList());
 }
 Schema outputSchema = context.getOutputSchema();
 if (SchemaValidator.canRecordLineage(outputSchema, "output")) {
  //noinspection ConstantConditions
  outputFields = outputSchema.getFields().stream().map(Schema.Field::getName).collect(Collectors.toList());
 }
 FieldOperation dataPrepOperation = new FieldTransformOperation("Python", config.script, inputFields, outputFields);
 context.record(Collections.singletonList(dataPrepOperation));
}

 @Override
 public int hashCode() {
  return Objects.hash(super.hashCode(), source, outputFields);
 }
}

@Override
public boolean equals(Object o) {
 if (this == o) {
  return true;
 }
 if (o == null || getClass() != o.getClass()) {
  return false;
 }
 if (!super.equals(o)) {
  return false;
 }
 FieldWriteOperation that = (FieldWriteOperation) o;
 return Objects.equals(inputFields, that.inputFields) &&
  Objects.equals(sink, that.sink);
}

@Override
public void prepareRun(BatchSourceContext context) throws Exception {
 InputFormatProvider inputFormatProvider = context.newPluginInstance(FORMAT_PLUGIN_ID);
 DatasetProperties datasetProperties = createProperties(inputFormatProvider);
 // Dataset must still be created if macros provided at configure time
 if (!context.datasetExists(config.getName())) {
  context.createDataset(config.getName(), PartitionedFileSet.class.getName(), datasetProperties);
 }
 PartitionedFileSet partitionedFileSet = context.getDataset(config.getName());
 SnapshotFileSet snapshotFileSet = new SnapshotFileSet(partitionedFileSet);
 Map<String, String> arguments = new HashMap<>(datasetProperties.getProperties());
 if (config.getFileProperties() != null) {
  arguments = GSON.fromJson(config.getFileProperties(), MAP_TYPE);
 }
 Schema schema = config.getSchema();
 if (schema.getFields() != null) {
  String formatName = getInputFormatName();
  FieldOperation operation =
   new FieldReadOperation("Read", String.format("Read from SnapshotFile source in %s format.", formatName),
               EndPoint.of(context.getNamespace(), config.getName()),
               schema.getFields().stream().map(Schema.Field::getName).collect(Collectors.toList()));
  context.record(Collections.singletonList(operation));
 }
 context.setInput(Input.ofDataset(config.getName(), snapshotFileSet.getInputArguments(arguments)));
}

if (schema.getFields() != null) {
 FieldOperation operation =
  new FieldWriteOperation("Write", "Wrote to TPFS dataset",
              EndPoint.of(context.getNamespace(), tpfsSinkConfig.name),
              schema.getFields().stream().map(Schema.Field::getName)

@Override
public void prepareRun(StageSubmitterContext context) throws Exception {
 super.prepareRun(context);
 List<String> inputFields = new ArrayList<>();
 List<String> outputFields = new ArrayList<>();
 Schema inputSchema = context.getInputSchema();
 if (SchemaValidator.canRecordLineage(inputSchema, "input")) {
  //noinspection ConstantConditions
  inputFields = inputSchema.getFields().stream().map(Schema.Field::getName).collect(Collectors.toList());
 }
 Schema outputSchema = context.getOutputSchema();
 if (SchemaValidator.canRecordLineage(outputSchema, "output")) {
  //noinspection ConstantConditions
  outputFields = outputSchema.getFields().stream().map(Schema.Field::getName).collect(Collectors.toList());
 }
 FieldOperation dataPrepOperation = new FieldTransformOperation("JavaScript", config.script, inputFields,
                                 outputFields);
 context.record(Collections.singletonList(dataPrepOperation));
}

 @Override
 public int hashCode() {
  return Objects.hash(super.hashCode(), inputFields, sink);
 }
}

@Override
public boolean equals(Object o) {
 if (this == o) {
  return true;
 }
 if (o == null || getClass() != o.getClass()) {
  return false;
 }
 if (!super.equals(o)) {
  return false;
 }
 FieldReadOperation that = (FieldReadOperation) o;
 return Objects.equals(source, that.source) &&
  Objects.equals(outputFields, that.outputFields);
}

@Override
public void prepareRun(BatchSourceContext context) throws DatasetManagementException, InstantiationException {
 config.validate();
 InputFormatProvider inputFormatProvider = context.newPluginInstance(FORMAT_PLUGIN_ID);
 DatasetProperties datasetProperties = createProperties(inputFormatProvider);
 // If macros provided at runtime, dataset still needs to be created
 if (!context.datasetExists(config.getName())) {
  String tpfsName = config.getName();
  context.createDataset(tpfsName, TimePartitionedFileSet.class.getName(), datasetProperties);
 }
 Schema schema = config.getSchema();
 if (schema.getFields() != null) {
  String formatName = getInputFormatName();
  FieldOperation operation =
   new FieldReadOperation("Read", String.format("Read from TimePartitionedFileSet in %s format.", formatName),
               EndPoint.of(context.getNamespace(), config.getName()),
               schema.getFields().stream().map(Schema.Field::getName).collect(Collectors.toList()));
  context.record(Collections.singletonList(operation));
 }
 long duration = TimeParser.parseDuration(config.getDuration());
 long delay = Strings.isNullOrEmpty(config.getDelay()) ? 0 : TimeParser.parseDuration(config.getDelay());
 long endTime = context.getLogicalStartTime() - delay;
 long startTime = endTime - duration;
 Map<String, String> sourceArgs = Maps.newHashMap(datasetProperties.getProperties());
 TimePartitionedFileSetArguments.setInputStartTime(sourceArgs, startTime);
 TimePartitionedFileSetArguments.setInputEndTime(sourceArgs, endTime);
 context.setInput(Input.ofDataset(config.getName(), sourceArgs));
}

@Override
public void prepareRun(BatchAggregatorContext context) throws Exception {
 super.prepareRun(context);
 LinkedList<FieldOperation> fllOperations = new LinkedList<>();
 // in configurePipeline all the necessary checks have been performed already to set output schema
 if (SchemaValidator.canRecordLineage(context.getOutputSchema(), "output")) {
  Schema inputSchema = context.getInputSchema();
  // for every function record the field level operation details
  for (GroupByConfig.FunctionInfo functionInfo : conf.getAggregates()) {
   Schema.Field outputSchemaField = getOutputSchemaField(functionInfo, inputSchema);
   String operationName = String.format("Group %s", functionInfo.getField());
   String description = String.format("Aggregate function applied: '%s'.", functionInfo.getFunction());
   FieldOperation operation = new FieldTransformOperation(operationName, description,
                               Collections.singletonList(functionInfo.getField()),
                               outputSchemaField.getName());
   fllOperations.add(operation);
  }
 }
 context.record(fllOperations);
}

How to use co.cask.cdap.etl.api.lineage.field

Best Java code snippets using co.cask.cdap.etl.api.lineage.field (Showing top 17 results out of 315)