@Override protected <T> T accept(T accumulator, Aggregator<T> aggregator) { return aggregator.aggregate(accumulator, this); }
public <T> T traverseAndMark(T accumulator, Aggregator<T> aggregator) { return this.traverse(accumulator, aggregator, true); }
/** * Models eager execution by marking all {@link LazyExecutionLineageNode}s as executed and collecting all marked ones. * However, the output {@link ChannelInstance}s are not yet produced. * * @param inputs the input {@link ChannelInstance}s * @param outputs the output {@link ChannelInstance}s * @param operatorContext the executed {@link OptimizationContext.OperatorContext} * @return the executed {@link OptimizationContext.OperatorContext} and produced {@link ChannelInstance}s */ static Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> modelQuasiEagerExecution( ChannelInstance[] inputs, ChannelInstance[] outputs, OptimizationContext.OperatorContext operatorContext) { final ExecutionLineageNode executionLineageNode = new ExecutionLineageNode(operatorContext); executionLineageNode.addAtomicExecutionFromOperatorContext(); LazyExecutionLineageNode.connectAll(inputs, executionLineageNode, outputs); return executionLineageNode.collectAndMark(); }
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, SparkExecutor sparkExecutor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == this.getNumInputs(); assert outputs.length == this.getNumOutputs(); RddChannel.Instance output = (RddChannel.Instance) outputs[0]; final JavaRDD<String> rdd = sparkExecutor.sc.textFile(this.getInputUrl()); this.name(rdd); output.accept(rdd, sparkExecutor); ExecutionLineageNode prepareLineageNode = new ExecutionLineageNode(operatorContext); prepareLineageNode.add(LoadProfileEstimators.createFromSpecification( "rheem.spark.textfilesource.load.prepare", sparkExecutor.getConfiguration() )); ExecutionLineageNode mainLineageNode = new ExecutionLineageNode(operatorContext); mainLineageNode.add(LoadProfileEstimators.createFromSpecification( "rheem.spark.textfilesource.load.main", sparkExecutor.getConfiguration() )); output.getLineage().addPredecessor(mainLineageNode); return prepareLineageNode.collectAndMark(); }
/** * Models lazy execution by not marking any {@link LazyExecutionLineageNode}s. * * @param inputs the input {@link ChannelInstance}s * @param outputs the output {@link ChannelInstance}s * @param operatorContext the executed {@link OptimizationContext.OperatorContext} * @return the executed {@link OptimizationContext.OperatorContext} and produced {@link ChannelInstance}s */ static Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> modelLazyExecution(ChannelInstance[] inputs, ChannelInstance[] outputs, OptimizationContext.OperatorContext operatorContext) { final ExecutionLineageNode executionLineageNode = new ExecutionLineageNode(operatorContext); executionLineageNode.addAtomicExecutionFromOperatorContext(); LazyExecutionLineageNode.connectAll(inputs, executionLineageNode, outputs); return new Tuple<>(Collections.emptyList(), Collections.emptyList()); }
/** * Set all of the {@code inputs} as predecessors of the {@code operatorContext} each of the {@code outputs}. * * @param inputs input {@link ChannelInstance}s * @param executionLineageNode in-between {@link ExecutionLineageNode} * @param outputs output {@link ChannelInstance}s * @see #addPredecessor(LazyExecutionLineageNode) */ public static void connectAll(ChannelInstance[] inputs, ExecutionLineageNode executionLineageNode, ChannelInstance[] outputs) { for (ChannelInstance input : inputs) { if (input != null) executionLineageNode.addPredecessor(input.getLineage()); } for (ChannelInstance output : outputs) { if (output != null) output.getLineage().addPredecessor(executionLineageNode); } }
/** * Adds an {@link AtomicExecution} for the {@link LoadProfileEstimator} of the described {@link OptimizationContext.OperatorContext}. * * @return this instance */ public ExecutionLineageNode addAtomicExecutionFromOperatorContext() { return this.add(this.operatorContext.getLoadProfileEstimator()); }
/** * Utility method to forward a {@link JavaChannelInstance} to another. * * @param input that should be forwarded * @param output to that should be forwarded */ static void forward(ChannelInstance input, ChannelInstance output) { // Do the forward. if (output instanceof CollectionChannel.Instance) { ((CollectionChannel.Instance) output).accept(((CollectionChannel.Instance) input).provideCollection()); } else if (output instanceof StreamChannel.Instance) { ((StreamChannel.Instance) output).accept(((JavaChannelInstance) input).provideStream()); } else { throw new RheemException(String.format("Cannot forward %s to %s.", input, output)); } // Manipulate the lineage. output.getLineage().addPredecessor(input.getLineage()); }
/** * Traverse this instance and all its predecessors unless they are marked as executed. * * @param accumulator state that is maintained over the traversal * @param aggregator visits the traversed instances * @param isMark whether traversed instances should be marked * @param <T> * @return the {@code accumulator} in its final state */ public <T> T traverse(T accumulator, Aggregator<T> aggregator, boolean isMark) { if (!this.isExecuted) { for (Iterator<LazyExecutionLineageNode> i = this.predecessors.iterator(); i.hasNext(); ) { LazyExecutionLineageNode predecessor = i.next(); accumulator = predecessor.traverse(accumulator, aggregator, isMark); if (predecessor.isExecuted) { i.remove(); } } accumulator = this.accept(accumulator, aggregator); if (isMark) this.markAsExecuted(); } return accumulator; }
/** * Collect and mark all unmarked {@link LazyExecutionLineageNode}s in this instance. * * @param executionLineageCollector collects the unmarked {@link ExecutionLineageNode} * @param channelInstanceCollector collects the {@link ChannelInstance} in the unmarked {@link LazyExecutionLineageNode}s * @return the two collectors */ public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> collectAndMark( Collection<ExecutionLineageNode> executionLineageCollector, Collection<ChannelInstance> channelInstanceCollector ) { return this.traverseAndMark( new Tuple<>(executionLineageCollector, channelInstanceCollector), new CollectingAggregator() ); }
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> aggregate( Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> accumulator, ChannelLineageNode node) { accumulator.getField1().add(node.getChannelInstance()); return accumulator; }
/** * Creates a new instance and registers it with its {@link Executor}. * * @param executor that maintains this instance * @param producerOperatorContext the {@link OptimizationContext.OperatorContext} for the producing * {@link ExecutionOperator} * @param producerOutputIndex the output index of the producer {@link ExecutionTask} */ protected AbstractChannelInstance(Executor executor, OptimizationContext.OperatorContext producerOperatorContext, int producerOutputIndex) { super(executor); this.lineage = new ChannelLineageNode(this); this.producerOperatorContext = producerOperatorContext; }
/** * Collect and mark all unmarked {@link ExecutionLineageNode}s in this instance. * * @return the collected {@link ExecutionLineageNode}s and produced {@link ChannelInstance}s */ public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> collectAndMark() { return this.collectAndMark(new LinkedList<>(), new LinkedList<>()); }
@Override protected void markAsExecuted() { super.markAsExecuted(); assert !this.channelInstance.wasProduced(); this.channelInstance.markProduced(); this.channelInstance.noteDiscardedReference(false); }
/** * Models eager execution by marking all {@link LazyExecutionLineageNode}s as executed and collecting all marked ones. * * @param inputs the input {@link ChannelInstance}s * @param outputs the output {@link ChannelInstance}s * @param operatorContext the executed {@link OptimizationContext.OperatorContext} * @return the executed {@link OptimizationContext.OperatorContext} and produced {@link ChannelInstance}s */ static Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> modelEagerExecution( ChannelInstance[] inputs, ChannelInstance[] outputs, OptimizationContext.OperatorContext operatorContext) { final ExecutionLineageNode executionLineageNode = new ExecutionLineageNode(operatorContext); executionLineageNode.addAtomicExecutionFromOperatorContext(); LazyExecutionLineageNode.connectAll(inputs, executionLineageNode, outputs); final Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> collectors; if (outputs.length == 0) { collectors = executionLineageNode.collectAndMark(); } else { collectors = new Tuple<>(new LinkedList<>(), new LinkedList<>()); for (ChannelInstance output : outputs) { output.getLineage().collectAndMark(collectors.getField0(), collectors.getField1()); } } return collectors; }
/** * Adds an {@link AtomicExecution} to this instance. Short-cut for {@link #add(AtomicExecution)}. * * @param loadProfileEstimator for which the {@link AtomicExecution} should be added * @return this instance */ public ExecutionLineageNode add(LoadProfileEstimator loadProfileEstimator) { return this.add(new AtomicExecution(loadProfileEstimator)); }
/** * Utility method to forward a {@link RddChannel.Instance} to another. * * @param input that should be forwarded * @param output to that should be forwarded */ public void forward(ChannelInstance input, ChannelInstance output) { final RddChannel.Instance rddInput = (RddChannel.Instance) input; final RddChannel.Instance rddOutput = (RddChannel.Instance) output; // Do the forward. assert rddInput.getChannel().getDescriptor() == RddChannel.CACHED_DESCRIPTOR || rddOutput.getChannel().getDescriptor() != RddChannel.CACHED_DESCRIPTOR; rddOutput.accept(rddInput.provideRdd(), this); // Manipulate the lineage. output.getLineage().addPredecessor(input.getLineage()); }
@Override protected <T> T accept(T accumulator, Aggregator<T> aggregator) { return aggregator.aggregate(accumulator, this); }
public <T> T traverse(T accumulator, Aggregator<T> aggregator) { return this.traverse(accumulator, aggregator, false); }
/** * Adds a predecessor. * * @param predecessor the predecessor */ public void addPredecessor(LazyExecutionLineageNode predecessor) { assert !this.predecessors.contains(predecessor) : String.format("Lineage predecessor %s is already present.", predecessor); this.predecessors.add(predecessor); // TODO: Pinning the input ChannelInstances down like this is not very elegant. // A better solution would be to incorporate all LazyExecutionLineageNodes into the // reference counting scheme. However, this would imply considerable effort to get it right. if (!this.isExecuted && predecessor instanceof ChannelLineageNode) { ChannelInstance channelInstance = ((ChannelLineageNode) predecessor).getChannelInstance(); this.pinnedDownChannelInstances.add(channelInstance); channelInstance.noteObtainedReference(); } }