/** * Creates a new instance and registers it with its {@link Executor}. * * @param executor that maintains this instance * @param producerOperatorContext the {@link OptimizationContext.OperatorContext} for the producing * {@link ExecutionOperator} * @param producerOutputIndex the output index of the producer {@link ExecutionTask} */ protected AbstractChannelInstance(Executor executor, OptimizationContext.OperatorContext producerOperatorContext, int producerOutputIndex) { super(executor); this.lineage = new ChannelLineageNode(this); this.producerOperatorContext = producerOperatorContext; }
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> aggregate( Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> accumulator, ChannelLineageNode node) { accumulator.getField1().add(node.getChannelInstance()); return accumulator; }
/** * Set all of the {@code inputs} as predecessors of the {@code operatorContext} each of the {@code outputs}. * * @param inputs input {@link ChannelInstance}s * @param executionLineageNode in-between {@link ExecutionLineageNode} * @param outputs output {@link ChannelInstance}s * @see #addPredecessor(LazyExecutionLineageNode) */ public static void connectAll(ChannelInstance[] inputs, ExecutionLineageNode executionLineageNode, ChannelInstance[] outputs) { for (ChannelInstance input : inputs) { if (input != null) executionLineageNode.addPredecessor(input.getLineage()); } for (ChannelInstance output : outputs) { if (output != null) output.getLineage().addPredecessor(executionLineageNode); } }
/** * Models eager execution by marking all {@link LazyExecutionLineageNode}s as executed and collecting all marked ones. * * @param inputs the input {@link ChannelInstance}s * @param outputs the output {@link ChannelInstance}s * @param operatorContext the executed {@link OptimizationContext.OperatorContext} * @return the executed {@link OptimizationContext.OperatorContext} and produced {@link ChannelInstance}s */ static Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> modelEagerExecution( ChannelInstance[] inputs, ChannelInstance[] outputs, OptimizationContext.OperatorContext operatorContext) { final ExecutionLineageNode executionLineageNode = new ExecutionLineageNode(operatorContext); executionLineageNode.addAtomicExecutionFromOperatorContext(); LazyExecutionLineageNode.connectAll(inputs, executionLineageNode, outputs); final Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> collectors; if (outputs.length == 0) { collectors = executionLineageNode.collectAndMark(); } else { collectors = new Tuple<>(new LinkedList<>(), new LinkedList<>()); for (ChannelInstance output : outputs) { output.getLineage().collectAndMark(collectors.getField0(), collectors.getField1()); } } return collectors; }
/** * Utility method to forward a {@link JavaChannelInstance} to another. * * @param input that should be forwarded * @param output to that should be forwarded */ static void forward(ChannelInstance input, ChannelInstance output) { // Do the forward. if (output instanceof CollectionChannel.Instance) { ((CollectionChannel.Instance) output).accept(((CollectionChannel.Instance) input).provideCollection()); } else if (output instanceof StreamChannel.Instance) { ((StreamChannel.Instance) output).accept(((JavaChannelInstance) input).provideStream()); } else { throw new RheemException(String.format("Cannot forward %s to %s.", input, output)); } // Manipulate the lineage. output.getLineage().addPredecessor(input.getLineage()); }
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, JavaExecutor javaExecutor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == this.getNumInputs(); assert outputs.length == this.getNumOutputs(); String url = this.getInputUrl().trim(); FileSystem fs = FileSystems.getFileSystem(url).orElseThrow( () -> new RheemException(String.format("Cannot access file system of %s.", url)) ); try { final InputStream inputStream = fs.open(url); Stream<String> lines = new BufferedReader(new InputStreamReader(inputStream)).lines(); ((StreamChannel.Instance) outputs[0]).accept(lines); } catch (IOException e) { throw new RheemException(String.format("Reading %s failed.", url), e); } ExecutionLineageNode prepareLineageNode = new ExecutionLineageNode(operatorContext); prepareLineageNode.add(LoadProfileEstimators.createFromSpecification( "rheem.java.textfilesource.load.prepare", javaExecutor.getConfiguration() )); ExecutionLineageNode mainLineageNode = new ExecutionLineageNode(operatorContext); mainLineageNode.add(LoadProfileEstimators.createFromSpecification( "rheem.java.textfilesource.load.main", javaExecutor.getConfiguration() )); outputs[0].getLineage().addPredecessor(mainLineageNode); return prepareLineageNode.collectAndMark(); }
/** * Adds a predecessor. * * @param predecessor the predecessor */ public void addPredecessor(LazyExecutionLineageNode predecessor) { assert !this.predecessors.contains(predecessor) : String.format("Lineage predecessor %s is already present.", predecessor); this.predecessors.add(predecessor); // TODO: Pinning the input ChannelInstances down like this is not very elegant. // A better solution would be to incorporate all LazyExecutionLineageNodes into the // reference counting scheme. However, this would imply considerable effort to get it right. if (!this.isExecuted && predecessor instanceof ChannelLineageNode) { ChannelInstance channelInstance = ((ChannelLineageNode) predecessor).getChannelInstance(); this.pinnedDownChannelInstances.add(channelInstance); channelInstance.noteObtainedReference(); } }
outputs[0].getLineage().addPredecessor(probingExecutionLineageNode);
/** * Utility method to forward a {@link RddChannel.Instance} to another. * * @param input that should be forwarded * @param output to that should be forwarded */ public void forward(ChannelInstance input, ChannelInstance output) { final RddChannel.Instance rddInput = (RddChannel.Instance) input; final RddChannel.Instance rddOutput = (RddChannel.Instance) output; // Do the forward. assert rddInput.getChannel().getDescriptor() == RddChannel.CACHED_DESCRIPTOR || rddOutput.getChannel().getDescriptor() != RddChannel.CACHED_DESCRIPTOR; rddOutput.accept(rddInput.provideRdd(), this); // Manipulate the lineage. output.getLineage().addPredecessor(input.getLineage()); }
output.getLineage().addPredecessor(probingExecutionLineageNode); return new Tuple<>(executionLineageNodes, producedChannelInstances);
"rheem.graphchi.pagerank.load.output", configuration )); outputChannelInstance.getLineage().addPredecessor(outputExecutionLineage);
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, SparkExecutor sparkExecutor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == this.getNumInputs(); assert outputs.length == this.getNumOutputs(); RddChannel.Instance output = (RddChannel.Instance) outputs[0]; final JavaRDD<String> rdd = sparkExecutor.sc.textFile(this.getInputUrl()); this.name(rdd); output.accept(rdd, sparkExecutor); ExecutionLineageNode prepareLineageNode = new ExecutionLineageNode(operatorContext); prepareLineageNode.add(LoadProfileEstimators.createFromSpecification( "rheem.spark.textfilesource.load.prepare", sparkExecutor.getConfiguration() )); ExecutionLineageNode mainLineageNode = new ExecutionLineageNode(operatorContext); mainLineageNode.add(LoadProfileEstimators.createFromSpecification( "rheem.spark.textfilesource.load.main", sparkExecutor.getConfiguration() )); output.getLineage().addPredecessor(mainLineageNode); return prepareLineageNode.collectAndMark(); }
outputs[0].getLineage().addPredecessor(probingExecutionLineageNode);