@Override protected void configure() { setName(phaseSpec.getPhaseName()); setDescription(phaseSpec.getDescription()); // register the plugins at program level so that the program can be failed by the platform early in case of // plugin requirements not being meet phaseSpec.getPhase().registerPlugins(getConfigurer()); setMainClass(BatchSparkPipelineDriver.class); setExecutorResources(phaseSpec.getResources()); setDriverResources(phaseSpec.getDriverResources()); setClientResources(phaseSpec.getClientResources()); // add source, sink, transform ids to the properties. These are needed at runtime to instantiate the plugins Map<String, String> properties = new HashMap<>(); properties.put(Constants.PIPELINEID, GSON.toJson(phaseSpec, BatchPhaseSpec.class)); setProperties(properties); }
numOfRecordsPreview = phaseSpec.getNumOfRecordsPreview(); PipelinePluginContext pluginContext = new PipelinePluginContext(sec.getPluginContext(), sec.getMetrics(), phaseSpec.isStageLoggingEnabled(), phaseSpec.isProcessTimingEnabled()); if (phaseSpec.pipelineContainsCondition()) { Iterator<StageSpec> iterator = phaseSpec.getPhase().iterator(); while (iterator.hasNext()) { StageSpec spec = iterator.next(); PipelinePluginInstantiator pluginInstantiator = new PipelinePluginInstantiator(pluginContext, sec.getMetrics(), phaseSpec, new SingleConnectorFactory()); runPipeline(phaseSpec.getPhase(), BatchSource.PLUGIN_TYPE, sec, stagePartitions, pluginInstantiator, collectors); } finally { updateWorkflowToken(sec.getWorkflowToken(), collectors);
BatchPhaseSpec phaseSpec = GSON.fromJson(properties.get(Constants.PIPELINEID), BatchPhaseSpec.class); for (Map.Entry<String, String> pipelineProperty : phaseSpec.getPipelineProperties().entrySet()) { sparkConf.set(pipelineProperty.getKey(), pipelineProperty.getValue()); final Map<String, Integer> stagePartitions = new HashMap<>(); PluginContext pluginContext = new SparkPipelinePluginContext(context, context.getMetrics(), phaseSpec.isStageLoggingEnabled(), phaseSpec.isProcessTimingEnabled()); PipelinePluginInstantiator pluginInstantiator = new PipelinePluginInstantiator(pluginContext, context.getMetrics(), phaseSpec, new SingleConnectorFactory()); final Admin admin = context.getAdmin(); PipelinePhase phase = phaseSpec.getPhase();
PipelinePhase phase = phaseSpec.getPhase(); StageSpec stageSpec = phase.iterator().next(); PluginContext pluginContext = new PipelinePluginContext(input, metrics, phaseSpec.isStageLoggingEnabled(), phaseSpec.isProcessTimingEnabled());
this.outputWriter = getSinkWriter(context, phaseSpec.getPhase(), hConf); PipelinePhase phase = phaseSpec.getPhase(); Set<StageSpec> reducers = phase.getStagesOfType(BatchAggregator.PLUGIN_TYPE, BatchJoiner.PLUGIN_TYPE); if (!reducers.isEmpty()) { new MapReduceTransformExecutorFactory<>(context, pluginInstantiator, metrics, new BasicArguments(context.getWorkflowToken(), runtimeArgs), sourceStage, phaseSpec.getNumOfRecordsPreview(), phaseSpec.pipelineContainsCondition()); this.transformExecutor = transformExecutorFactory.create(phase, outputWriter);
phaseSpec.getPhase().registerPlugins(getConfigurer()); PluginSpec pluginSpec = stageSpec.getPlugin(); PluginProperties pluginProperties = PluginProperties.builder().addAll(pluginSpec.getProperties()).build(); setName(phaseSpec.getPhaseName());
BatchPhaseSpec.class); PipelinePluginContext pluginContext = new SparkPipelinePluginContext(sec.getPluginContext(), sec.getMetrics(), batchPhaseSpec.isStageLoggingEnabled(), batchPhaseSpec.isProcessTimingEnabled());
CONNECTOR_DATASETS_TYPE); for (Map.Entry<String, String> pipelineProperty : phaseSpec.getPipelineProperties().entrySet()) { hConf.set(pipelineProperty.getKey(), pipelineProperty.getValue()); final PipelinePhase phase = phaseSpec.getPhase(); PipelinePluginInstantiator pluginInstantiator = new PipelinePluginInstantiator(context, mrMetrics, phaseSpec, new MultiConnectorFactory()); Set<StageSpec> reducers = phaseSpec.getPhase().getStagesOfType(BatchAggregator.PLUGIN_TYPE, BatchJoiner.PLUGIN_TYPE); if (reducers.size() > 1) {
@Override protected void configure() { setName(phaseSpec.getPhaseName()); setDescription("CustomAction phase executor. " + phaseSpec.getPhaseName()); // add source, sink, transform ids to the properties. These are needed at runtime to instantiate the plugins Map<String, String> properties = new HashMap<>(); properties.put(Constants.PIPELINEID, GSON.toJson(phaseSpec)); setProperties(properties); }
Set<String> pluginTypes = batchPhaseSpec.getPhase().getPluginTypes(); if (pluginTypes.contains(Action.PLUGIN_TYPE)) {
return new BatchPhaseSpec(programName, phase, spec.getResources(), spec.getDriverResources(), spec.getClientResources(), spec.isStageLoggingEnabled(), spec.isProcessTimingEnabled(), phaseConnectorDatasets, spec.getNumOfRecordsPreview(), spec.getProperties(),
@Override public void run() throws Exception { CustomActionContext context = getContext(); Map<String, String> properties = context.getSpecification().getProperties(); BatchPhaseSpec phaseSpec = GSON.fromJson(properties.get(Constants.PIPELINEID), BatchPhaseSpec.class); PipelinePhase phase = phaseSpec.getPhase(); StageSpec stageSpec = phase.iterator().next(); PluginContext pluginContext = new PipelinePluginContext(context, metrics, phaseSpec.isStageLoggingEnabled(), phaseSpec.isProcessTimingEnabled()); PipelineRuntime pipelineRuntime = new PipelineRuntime(context, metrics); Action action = pluginContext.newPluginInstance(stageSpec.getName(), new DefaultMacroEvaluator(pipelineRuntime.getArguments(), context.getLogicalStartTime(), context, context.getNamespace())); ActionContext actionContext = new BasicActionContext(context, pipelineRuntime, stageSpec); if (!context.getDataTracer(stageSpec.getName()).isEnabled()) { action.run(actionContext); } WorkflowToken token = context.getWorkflowToken(); if (token == null) { throw new IllegalStateException("WorkflowToken cannot be null when action is executed through Workflow."); } for (Map.Entry<String, String> entry : pipelineRuntime.getArguments().getAddedArguments().entrySet()) { token.put(entry.getKey(), entry.getValue()); } } }
@Override protected void configure() { setName(phaseSpec.getPhaseName()); setDescription("Condition phase executor. " + phaseSpec.getPhaseName()); // add source, sink, transform ids to the properties. These are needed at runtime to instantiate the plugins Map<String, String> properties = new HashMap<>(); properties.put(Constants.PIPELINEID, GSON.toJson(phaseSpec)); setProperties(properties); }
BatchPhaseSpec batchPhaseSpec = new BatchPhaseSpec(ETLMapReduce.NAME, pipeline, config.getResources(), config.getDriverResources(), break; case SPARK: batchPhaseSpec = new BatchPhaseSpec(ETLSpark.class.getSimpleName(), pipeline, config.getResources(), config.getDriverResources(),
@Override public void configure() { setName(phaseSpec.getPhaseName()); setDescription("MapReduce phase executor. " + phaseSpec.getDescription()); phaseSpec.getPhase().registerPlugins(getConfigurer()); setMapperResources(phaseSpec.getResources()); setReducerResources(phaseSpec.getResources()); setDriverResources(phaseSpec.getDriverResources()); Set<String> sources = phaseSpec.getPhase().getSources(); "Pipeline phase '%s' must contain at least one source but it has no sources.", phaseSpec.getPhaseName())); if (phaseSpec.getPhase().getSinks().isEmpty()) { throw new IllegalArgumentException(String.format( "Pipeline phase '%s' must contain at least one sink but does not have any.", phaseSpec.getPhaseName())); Set<StageSpec> reducers = phaseSpec.getPhase().getStagesOfType(BatchAggregator.PLUGIN_TYPE, BatchJoiner.PLUGIN_TYPE); if (reducers.size() > 1) { throw new IllegalArgumentException(String.format( "Pipeline phase '%s' cannot contain more than one reducer but it has reducers '%s'.", phaseSpec.getPhaseName(), Joiner.on(',').join(reducers)));