@Override public void prepareRun(BatchSourceContext context) throws DatasetManagementException { super.prepareRun(context); Schema schema = tableConfig.getSchema(); if (schema != null && schema.getFields() != null) { FieldOperation operation = new FieldReadOperation("Read", "Read from Table dataset", EndPoint.of(context.getNamespace(), tableConfig.getName()), schema.getFields().stream().map(Schema.Field::getName) .collect(Collectors.toList())); context.record(Collections.singletonList(operation)); } }
case READ: FieldReadOperation read = (FieldReadOperation) fieldOperation; newOperation = new ReadOperation(newOperationName, read.getDescription(), read.getSource(), read.getOutputFields()); currentOperationOutputs.addAll(read.getOutputFields()); break; case TRANSFORM:
FieldReadOperation read = (FieldReadOperation) pipelineOperation; updateInvalidOutputs(Collections.emptyList(), unusedOutputs, redundantOutputs); validInputsSoFar.addAll(read.getOutputFields()); for (String field : read.getOutputFields()) { List<String> origins = unusedOutputs.computeIfAbsent(field, k -> new ArrayList<>()); origins.add(pipelineOperation.getName());
@Override public void prepareRun(BatchSourceContext context) throws Exception { InputFormatProvider inputFormatProvider = context.newPluginInstance(FORMAT_PLUGIN_ID); DatasetProperties datasetProperties = createProperties(inputFormatProvider); // Dataset must still be created if macros provided at configure time if (!context.datasetExists(config.getName())) { context.createDataset(config.getName(), PartitionedFileSet.class.getName(), datasetProperties); } PartitionedFileSet partitionedFileSet = context.getDataset(config.getName()); SnapshotFileSet snapshotFileSet = new SnapshotFileSet(partitionedFileSet); Map<String, String> arguments = new HashMap<>(datasetProperties.getProperties()); if (config.getFileProperties() != null) { arguments = GSON.fromJson(config.getFileProperties(), MAP_TYPE); } Schema schema = config.getSchema(); if (schema.getFields() != null) { String formatName = getInputFormatName(); FieldOperation operation = new FieldReadOperation("Read", String.format("Read from SnapshotFile source in %s format.", formatName), EndPoint.of(context.getNamespace(), config.getName()), schema.getFields().stream().map(Schema.Field::getName).collect(Collectors.toList())); context.record(Collections.singletonList(operation)); } context.setInput(Input.ofDataset(config.getName(), snapshotFileSet.getInputArguments(arguments))); }
@Override public void prepareRun(BatchSourceContext context) throws DatasetManagementException, InstantiationException { config.validate(); InputFormatProvider inputFormatProvider = context.newPluginInstance(FORMAT_PLUGIN_ID); DatasetProperties datasetProperties = createProperties(inputFormatProvider); // If macros provided at runtime, dataset still needs to be created if (!context.datasetExists(config.getName())) { String tpfsName = config.getName(); context.createDataset(tpfsName, TimePartitionedFileSet.class.getName(), datasetProperties); } Schema schema = config.getSchema(); if (schema.getFields() != null) { String formatName = getInputFormatName(); FieldOperation operation = new FieldReadOperation("Read", String.format("Read from TimePartitionedFileSet in %s format.", formatName), EndPoint.of(context.getNamespace(), config.getName()), schema.getFields().stream().map(Schema.Field::getName).collect(Collectors.toList())); context.record(Collections.singletonList(operation)); } long duration = TimeParser.parseDuration(config.getDuration()); long delay = Strings.isNullOrEmpty(config.getDelay()) ? 0 : TimeParser.parseDuration(config.getDelay()); long endTime = context.getLogicalStartTime() - delay; long startTime = endTime - duration; Map<String, String> sourceArgs = Maps.newHashMap(datasetProperties.getProperties()); TimePartitionedFileSetArguments.setInputStartTime(sourceArgs, startTime); TimePartitionedFileSetArguments.setInputEndTime(sourceArgs, endTime); context.setInput(Input.ofDataset(config.getName(), sourceArgs)); }