public MockRecordWriter(DataSourceTask<?> inputBase, Class<StreamRecord<Tuple1<Integer>>> outputClass) { super(inputBase.getEnvironment().getWriter(0)); }
initInputFormat(); LOG.debug(getLogString("Start registering input and output")); initOutputs(getUserCodeClassLoader()); } catch (Exception ex) { throw new RuntimeException("The initialization of the DataSource's outputs caused an error: " + LOG.debug(getLogString("Finished registering input and output")); LOG.debug(getLogString("Starting data source operator")); RuntimeContext ctx = createRuntimeContext(); LOG.debug(getLogString("Rich Source detected. Initializing runtime context.")); ((RichInputFormat) this.format).openInputFormat(); LOG.debug(getLogString("Rich Source detected. Opening the InputFormat.")); ExecutionConfig executionConfig = getExecutionConfig(); final Iterator<InputSplit> splitIterator = getInputSplits(); LOG.debug(getLogString("Opening input split " + split.toString())); LOG.debug(getLogString("Starting to read input from split " + split.toString())); LOG.debug(getLogString("Closing input split " + split.toString())); LOG.debug(getLogString("Rich Source detected. Closing the InputFormat."));
public DistributedRuntimeUDFContext createRuntimeContext() { Environment env = getEnvironment(); String sourceName = getEnvironment().getTaskInfo().getTaskName().split("->")[0].trim(); sourceName = sourceName.startsWith("CHAIN") ? sourceName.substring(6) : sourceName; return new DistributedRuntimeUDFContext(env.getTaskInfo(), getUserCodeClassLoader(), getExecutionConfig(), env.getDistributedCacheEntries(), env.getAccumulatorRegistry().getUserMap(), getEnvironment().getMetricGroup().getOrAddOperator(sourceName)); } }
/** * Creates a writer for each output. Creates an OutputCollector which forwards its input to all writers. * The output collector applies the configured shipping strategy. */ private void initOutputs(ClassLoader cl) throws Exception { this.chainedTasks = new ArrayList<ChainedDriver<?, ?>>(); this.eventualOutputs = new ArrayList<RecordWriter<?>>(); this.output = BatchTask.initOutputs(this, cl, this.config, this.chainedTasks, this.eventualOutputs, getExecutionConfig(), getEnvironment().getAccumulatorRegistry()); }
/** * Utility function that composes a string for logging purposes. The string includes the given message and * the index of the task in its task group together with the number of tasks in the task group. * * @param message The main message for the log. * @return The string ready for logging. */ private String getLogString(String message) { return getLogString(message, this.getEnvironment().getTaskInfo().getTaskName()); }
ClassLoader userCodeClassLoader = getUserCodeClassLoader(); Configuration taskConf = getTaskConfiguration(); this.config = new TaskConfig(taskConf);
OperatorID operatorID = OperatorID.fromJobVertexID(getEnvironment().getJobVertexId()); split = provider.getNextInputSplit(operatorID, getUserCodeClassLoader()); } catch (InputSplitProviderException e) { throw new RuntimeException("Could not retrieve next input split.", e);
@Override public void cancel() throws Exception { this.taskCanceled = true; LOG.debug(getLogString("Cancelling data source operator")); }
@Override public boolean hasNext() { if (exhausted) { return false; } if (nextSplit != null) { return true; } final InputSplit split; try { split = provider.getNextInputSplit(getUserCodeClassLoader()); } catch (InputSplitProviderException e) { throw new RuntimeException("Could not retrieve next input split.", e); } if (split != null) { this.nextSplit = split; return true; } else { exhausted = true; return false; } }
initInputFormat(); LOG.debug(getLogString("Start registering input and output")); initOutputs(getUserCodeClassLoader()); } catch (Exception ex) { throw new RuntimeException("The initialization of the DataSource's outputs caused an error: " + LOG.debug(getLogString("Finished registering input and output")); LOG.debug(getLogString("Starting data source operator")); RuntimeContext ctx = createRuntimeContext(); LOG.debug(getLogString("Rich Source detected. Initializing runtime context.")); ((RichInputFormat) this.format).openInputFormat(); LOG.debug(getLogString("Rich Source detected. Opening the InputFormat.")); ExecutionConfig executionConfig = getExecutionConfig(); final Iterator<InputSplit> splitIterator = getInputSplits(); LOG.debug(getLogString("Opening input split " + split.toString())); LOG.debug(getLogString("Starting to read input from split " + split.toString())); LOG.debug(getLogString("Closing input split " + split.toString())); LOG.debug(getLogString("Rich Source detected. Closing the InputFormat."));
public DistributedRuntimeUDFContext createRuntimeContext() { Environment env = getEnvironment(); String sourceName = getEnvironment().getTaskInfo().getTaskName().split("->")[0].trim(); sourceName = sourceName.startsWith("CHAIN") ? sourceName.substring(6) : sourceName; return new DistributedRuntimeUDFContext(env.getTaskInfo(), getUserCodeClassLoader(), getExecutionConfig(), env.getDistributedCacheEntries(), env.getAccumulatorRegistry(), getEnvironment().getMetricGroup().addOperator(sourceName)); } }
/** * Creates a writer for each output. Creates an OutputCollector which forwards its input to all writers. * The output collector applies the configured shipping strategy. */ private void initOutputs(ClassLoader cl) throws Exception { this.chainedTasks = new ArrayList<ChainedDriver<?, ?>>(); this.eventualOutputs = new ArrayList<RecordWriter<?>>(); this.output = BatchTask.initOutputs(this, cl, this.config, this.chainedTasks, this.eventualOutputs, getExecutionConfig(), getEnvironment().getAccumulatorRegistry().getUserMap()); }
/** * Utility function that composes a string for logging purposes. The string includes the given message and * the index of the task in its task group together with the number of tasks in the task group. * * @param message The main message for the log. * @return The string ready for logging. */ private String getLogString(String message) { return getLogString(message, this.getEnvironment().getTaskInfo().getTaskName()); }
ClassLoader userCodeClassLoader = getUserCodeClassLoader(); Configuration taskConf = getTaskConfiguration(); this.config = new TaskConfig(taskConf);
@Override public void cancel() throws Exception { this.taskCanceled = true; LOG.debug(getLogString("Cancelling data source operator")); }
@Override public boolean hasNext() { if (exhausted) { return false; } if (nextSplit != null) { return true; } final InputSplit split; try { split = provider.getNextInputSplit(getUserCodeClassLoader()); } catch (InputSplitProviderException e) { throw new RuntimeException("Could not retrieve next input split.", e); } if (split != null) { this.nextSplit = split; return true; } else { exhausted = true; return false; } }
initInputFormat(); LOG.debug(getLogString("Start registering input and output")); initOutputs(getUserCodeClassLoader()); } catch (Exception ex) { throw new RuntimeException("The initialization of the DataSource's outputs caused an error: " + LOG.debug(getLogString("Finished registering input and output")); LOG.debug(getLogString("Starting data source operator")); RuntimeContext ctx = createRuntimeContext(); Counter completedSplitsCounter = ctx.getMetricGroup().counter("numSplitsProcessed"); ((OperatorMetricGroup) ctx.getMetricGroup()).getIOMetricGroup().reuseInputMetricsForTask(); LOG.debug(getLogString("Rich Source detected. Initializing runtime context.")); ((RichInputFormat) this.format).openInputFormat(); LOG.debug(getLogString("Rich Source detected. Opening the InputFormat.")); ExecutionConfig executionConfig = getExecutionConfig(); final Iterator<InputSplit> splitIterator = getInputSplits(); LOG.debug(getLogString("Opening input split " + split.toString())); LOG.debug(getLogString("Starting to read input from split " + split.toString())); LOG.debug(getLogString("Closing input split " + split.toString()));
public DistributedRuntimeUDFContext createRuntimeContext() { Environment env = getEnvironment(); String sourceName = getEnvironment().getTaskInfo().getTaskName().split("->")[0].trim(); sourceName = sourceName.startsWith("CHAIN") ? sourceName.substring(6) : sourceName; return new DistributedRuntimeUDFContext(env.getTaskInfo(), getUserCodeClassLoader(), getExecutionConfig(), env.getDistributedCacheEntries(), env.getAccumulatorRegistry().getUserMap(), getEnvironment().getMetricGroup().addOperator(sourceName)); } }
/** * Creates a writer for each output. Creates an OutputCollector which forwards its input to all writers. * The output collector applies the configured shipping strategy. */ private void initOutputs(ClassLoader cl) throws Exception { this.chainedTasks = new ArrayList<ChainedDriver<?, ?>>(); this.eventualOutputs = new ArrayList<RecordWriter<?>>(); this.output = BatchTask.initOutputs(this, cl, this.config, this.chainedTasks, this.eventualOutputs, getExecutionConfig(), getEnvironment().getAccumulatorRegistry().getUserMap()); }
/** * Utility function that composes a string for logging purposes. The string includes the given message and * the index of the task in its task group together with the number of tasks in the task group. * * @param message The main message for the log. * @return The string ready for logging. */ private String getLogString(String message) { return getLogString(message, this.getEnvironment().getTaskInfo().getTaskName()); }