/** * Models eager execution by marking all {@link LazyExecutionLineageNode}s as executed and collecting all marked ones. * However, the output {@link ChannelInstance}s are not yet produced. * * @param inputs the input {@link ChannelInstance}s * @param outputs the output {@link ChannelInstance}s * @param operatorContext the executed {@link OptimizationContext.OperatorContext} * @return the executed {@link OptimizationContext.OperatorContext} and produced {@link ChannelInstance}s */ static Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> modelQuasiEagerExecution( ChannelInstance[] inputs, ChannelInstance[] outputs, OptimizationContext.OperatorContext operatorContext) { final ExecutionLineageNode executionLineageNode = new ExecutionLineageNode(operatorContext); executionLineageNode.addAtomicExecutionFromOperatorContext(); LazyExecutionLineageNode.connectAll(inputs, executionLineageNode, outputs); return executionLineageNode.collectAndMark(); }
/** * Adds an {@link AtomicExecution} for the {@link LoadProfileEstimator} of the described {@link OptimizationContext.OperatorContext}. * * @return this instance */ public ExecutionLineageNode addAtomicExecutionFromOperatorContext() { return this.add(this.operatorContext.getLoadProfileEstimator()); }
/** * Creates a new instance. * * @param measuredExecutionTime the time measured for the partial execution * @param lowerCost the lower possible costs for the new instance (excluding fix costs) * @param upperCost the upper possible costs for the new instance (excluding fix costs) * @param executionLineageNodes for all executed {@link ExecutionOperator}s * @param configuration the {@link Configuration} to re-estimate execution statistics */ public PartialExecution(long measuredExecutionTime, double lowerCost, double upperCost, Collection<ExecutionLineageNode> executionLineageNodes, Configuration configuration) { this.measuredExecutionTime = measuredExecutionTime; this.atomicExecutionGroups = executionLineageNodes.stream() .map(node -> new AtomicExecutionGroup( node.getOperatorContext(), ((ExecutionOperator) node.getOperatorContext().getOperator()).getPlatform(), configuration, node.getAtomicExecutions() )) .collect(Collectors.toList()); this.lowerCost = lowerCost; this.upperCost = upperCost; }
/** * Models lazy execution by not marking any {@link LazyExecutionLineageNode}s. * * @param inputs the input {@link ChannelInstance}s * @param outputs the output {@link ChannelInstance}s * @param operatorContext the executed {@link OptimizationContext.OperatorContext} * @return the executed {@link OptimizationContext.OperatorContext} and produced {@link ChannelInstance}s */ static Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> modelLazyExecution(ChannelInstance[] inputs, ChannelInstance[] outputs, OptimizationContext.OperatorContext operatorContext) { final ExecutionLineageNode executionLineageNode = new ExecutionLineageNode(operatorContext); executionLineageNode.addAtomicExecutionFromOperatorContext(); LazyExecutionLineageNode.connectAll(inputs, executionLineageNode, outputs); return new Tuple<>(Collections.emptyList(), Collections.emptyList()); }
final CardinalityEstimate cardinalityEstimate1 = operatorContext.getInputCardinality(1); ExecutionLineageNode indexingExecutionLineageNode = new ExecutionLineageNode(operatorContext); indexingExecutionLineageNode.add(LoadProfileEstimators.createFromSpecification( "rheem.java.join.load.indexing", javaExecutor.getConfiguration() )); ExecutionLineageNode probingExecutionLineageNode = new ExecutionLineageNode(operatorContext); probingExecutionLineageNode.add(LoadProfileEstimators.createFromSpecification( "rheem.java.join.load.probing", javaExecutor.getConfiguration() )); probeTable.getOrDefault(keyExtractor1.apply(dataQuantum1), Collections.emptyList()).stream() .map(dataQuantum0 -> new Tuple2<>(dataQuantum0, dataQuantum1))); indexingExecutionLineageNode.addPredecessor(inputs[0].getLineage()); indexingExecutionLineageNode.collectAndMark(executionLineageNodes, producedChannelInstances); probingExecutionLineageNode.addPredecessor(inputs[1].getLineage()); } else { final int expectedNumElements = cardinalityEstimate1 == null ? probeTable.getOrDefault(keyExtractor0.apply(dataQuantum0), Collections.emptyList()).stream() .map(dataQuantum1 -> new Tuple2<>(dataQuantum0, dataQuantum1))); indexingExecutionLineageNode.addPredecessor(inputs[1].getLineage()); indexingExecutionLineageNode.collectAndMark(executionLineageNodes, producedChannelInstances); probingExecutionLineageNode.addPredecessor(inputs[0].getLineage());
assert outputs.length == this.getNumOutputs(); ExecutionLineageNode executionLineageNode = new ExecutionLineageNode(operatorContext); executionLineageNode.addAtomicExecutionFromOperatorContext(); executionLineageNode.addPredecessor(inputs[CONVERGENCE_INPUT_INDEX].getLineage()); endloop = stoppingCondition.test(convergenceCollection); input = (JavaChannelInstance) inputs[ITERATION_INPUT_INDEX]; return executionLineageNode.collectAndMark();
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, JavaExecutor javaExecutor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == this.getNumInputs(); assert outputs.length == this.getNumOutputs(); String url = this.getInputUrl().trim(); FileSystem fs = FileSystems.getFileSystem(url).orElseThrow( () -> new RheemException(String.format("Cannot access file system of %s.", url)) ); try { final InputStream inputStream = fs.open(url); Stream<String> lines = new BufferedReader(new InputStreamReader(inputStream)).lines(); ((StreamChannel.Instance) outputs[0]).accept(lines); } catch (IOException e) { throw new RheemException(String.format("Reading %s failed.", url), e); } ExecutionLineageNode prepareLineageNode = new ExecutionLineageNode(operatorContext); prepareLineageNode.add(LoadProfileEstimators.createFromSpecification( "rheem.java.textfilesource.load.prepare", javaExecutor.getConfiguration() )); ExecutionLineageNode mainLineageNode = new ExecutionLineageNode(operatorContext); mainLineageNode.add(LoadProfileEstimators.createFromSpecification( "rheem.java.textfilesource.load.main", javaExecutor.getConfiguration() )); outputs[0].getLineage().addPredecessor(mainLineageNode); return prepareLineageNode.collectAndMark(); }
/** * Set all of the {@code inputs} as predecessors of the {@code operatorContext} each of the {@code outputs}. * * @param inputs input {@link ChannelInstance}s * @param executionLineageNode in-between {@link ExecutionLineageNode} * @param outputs output {@link ChannelInstance}s * @see #addPredecessor(LazyExecutionLineageNode) */ public static void connectAll(ChannelInstance[] inputs, ExecutionLineageNode executionLineageNode, ChannelInstance[] outputs) { for (ChannelInstance input : inputs) { if (input != null) executionLineageNode.addPredecessor(input.getLineage()); } for (ChannelInstance output : outputs) { if (output != null) output.getLineage().addPredecessor(executionLineageNode); } }
/** * Creates a new instance according to the measurement data. * * @param measuredExecutionTime the measured execution time * @param executionLineageNodes the {@link ExecutionLineageNode}s reflecting what has been executed * @param configuration the execution {@link Configuration} * @return the new instance */ public static PartialExecution createFromMeasurement( long measuredExecutionTime, Collection<ExecutionLineageNode> executionLineageNodes, Configuration configuration) { // Calculate possible costs. double lowerCost = Double.POSITIVE_INFINITY, upperCost = Double.NEGATIVE_INFINITY; final Set<Platform> platforms = executionLineageNodes.stream() .map(node -> ((ExecutionOperator) node.getOperatorContext().getOperator()).getPlatform()) .collect(Collectors.toSet()); for (Platform platform : platforms) { final TimeToCostConverter timeToCostConverter = configuration.getTimeToCostConverterProvider().provideFor(platform); final ProbabilisticDoubleInterval costs = timeToCostConverter.convertWithoutFixCosts(TimeEstimate.ZERO.plus(measuredExecutionTime)); lowerCost = Math.min(lowerCost, costs.getLowerEstimate()); upperCost = Math.max(upperCost, costs.getUpperEstimate()); } return new PartialExecution(measuredExecutionTime, lowerCost, upperCost, executionLineageNodes, configuration); }
ExecutionLineageNode probingExecutionLineageNode = new ExecutionLineageNode(operatorContext); probingExecutionLineageNode.add(LoadProfileEstimators.createFromSpecification( "rheem.java.cartesian.load.probing", javaExecutor.getConfiguration() )); ExecutionLineageNode indexingExecutionLineageNode = new ExecutionLineageNode(operatorContext); indexingExecutionLineageNode.add(LoadProfileEstimators.createFromSpecification( "rheem.java.cartesian.load.indexing", javaExecutor.getConfiguration() )); materializedInput = inputs[0]; probingInput = inputs[1]; probingExecutionLineageNode.addPredecessor(materializedInput.getLineage()); materializedInput = inputs[1]; probingInput = inputs[0]; probingExecutionLineageNode.addPredecessor(materializedInput.getLineage()); materializedInput = inputs[0]; probingInput = inputs[1]; indexingExecutionLineageNode.addPredecessor(materializedInput.getLineage()); indexingExecutionLineageNode.collectAndMark(executionLineageNodes, producedChannelInstances); materializedInput = inputs[1]; probingInput = inputs[0]; indexingExecutionLineageNode.addPredecessor(materializedInput.getLineage()); indexingExecutionLineageNode.collectAndMark(executionLineageNodes, producedChannelInstances); probingExecutionLineageNode.addPredecessor(probingInput.getLineage()); output.getLineage().addPredecessor(probingExecutionLineageNode);
assert outputs.length == this.getNumOutputs(); ExecutionLineageNode executionLineageNode = new ExecutionLineageNode(operatorContext); executionLineageNode.addAtomicExecutionFromOperatorContext(); throw new RheemException(String.format("Could not evaluate stopping condition for %s.", this), e); executionLineageNode.addPredecessor(convergenceInput.getLineage()); break; default: return executionLineageNode.collectAndMark();
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, SparkExecutor sparkExecutor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == this.getNumInputs(); assert outputs.length == this.getNumOutputs(); RddChannel.Instance output = (RddChannel.Instance) outputs[0]; final JavaRDD<String> rdd = sparkExecutor.sc.textFile(this.getInputUrl()); this.name(rdd); output.accept(rdd, sparkExecutor); ExecutionLineageNode prepareLineageNode = new ExecutionLineageNode(operatorContext); prepareLineageNode.add(LoadProfileEstimators.createFromSpecification( "rheem.spark.textfilesource.load.prepare", sparkExecutor.getConfiguration() )); ExecutionLineageNode mainLineageNode = new ExecutionLineageNode(operatorContext); mainLineageNode.add(LoadProfileEstimators.createFromSpecification( "rheem.spark.textfilesource.load.main", sparkExecutor.getConfiguration() )); output.getLineage().addPredecessor(mainLineageNode); return prepareLineageNode.collectAndMark(); }
final CardinalityEstimate cardinalityEstimate1 = operatorContext.getOutputCardinality(0); ExecutionLineageNode indexingExecutionLineageNode = new ExecutionLineageNode(operatorContext); indexingExecutionLineageNode.add(LoadProfileEstimators.createFromSpecification( "rheem.java.intersect.load.indexing", javaExecutor.getConfiguration() )); ExecutionLineageNode probingExecutionLineageNode = new ExecutionLineageNode(operatorContext); probingExecutionLineageNode.add(LoadProfileEstimators.createFromSpecification( "rheem.java.intersect.load.probing", javaExecutor.getConfiguration() )); candidateStream = ((JavaChannelInstance) inputs[0]).provideStream(); probingTable = this.createProbingTable(((JavaChannelInstance) inputs[1]).provideStream()); indexingExecutionLineageNode.addPredecessor(inputs[0].getLineage()); probingExecutionLineageNode.addPredecessor(inputs[1].getLineage()); } else { candidateStream = ((JavaChannelInstance) inputs[1]).provideStream(); probingTable = this.createProbingTable(((JavaChannelInstance) inputs[0]).provideStream()); indexingExecutionLineageNode.addPredecessor(inputs[1].getLineage()); probingExecutionLineageNode.addPredecessor(inputs[0].getLineage()); outputs[0].getLineage().addPredecessor(probingExecutionLineageNode); indexingExecutionLineageNode.collectAndMark(executionLineageNodes, producedChannelInstances); return new Tuple<>(executionLineageNodes, producedChannelInstances);
assert outputs.length == this.getNumOutputs(); final ExecutionLineageNode executionLineageNode = new ExecutionLineageNode(operatorContext); executionLineageNode.addAtomicExecutionFromOperatorContext(); executionLineageNode.addPredecessor(inputs[ITERATION_CONVERGENCE_INPUT_INDEX].getLineage()); break; default: return executionLineageNode.collectAndMark();
/** * Models eager execution by marking all {@link LazyExecutionLineageNode}s as executed and collecting all marked ones. * * @param inputs the input {@link ChannelInstance}s * @param outputs the output {@link ChannelInstance}s * @param operatorContext the executed {@link OptimizationContext.OperatorContext} * @return the executed {@link OptimizationContext.OperatorContext} and produced {@link ChannelInstance}s */ static Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> modelEagerExecution( ChannelInstance[] inputs, ChannelInstance[] outputs, OptimizationContext.OperatorContext operatorContext) { final ExecutionLineageNode executionLineageNode = new ExecutionLineageNode(operatorContext); executionLineageNode.addAtomicExecutionFromOperatorContext(); LazyExecutionLineageNode.connectAll(inputs, executionLineageNode, outputs); final Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> collectors; if (outputs.length == 0) { collectors = executionLineageNode.collectAndMark(); } else { collectors = new Tuple<>(new LinkedList<>(), new LinkedList<>()); for (ChannelInstance output : outputs) { output.getLineage().collectAndMark(collectors.getField0(), collectors.getField1()); } } return collectors; }
/** * Adds an {@link AtomicExecution} to this instance. Short-cut for {@link #add(AtomicExecution)}. * * @param loadProfileEstimator for which the {@link AtomicExecution} should be added * @return this instance */ public ExecutionLineageNode add(LoadProfileEstimator loadProfileEstimator) { return this.add(new AtomicExecution(loadProfileEstimator)); }
final ExecutionLineageNode mainExecutionLineage = new ExecutionLineageNode(operatorContext); mainExecutionLineage.add(LoadProfileEstimators.createFromSpecification( "rheem.graphchi.pagerank.load.main", configuration )); mainExecutionLineage.addPredecessor(inputFileChannelInstance.getLineage()); final ExecutionLineageNode outputExecutionLineage = new ExecutionLineageNode(operatorContext); outputExecutionLineage.add(LoadProfileEstimators.createFromSpecification( "rheem.graphchi.pagerank.load.output", configuration )); outputChannelInstance.getLineage().addPredecessor(outputExecutionLineage); return mainExecutionLineage.collectAndMark();
assert outputs.length == this.getNumOutputs(); ExecutionLineageNode executionLineageNode = new ExecutionLineageNode(operatorContext); executionLineageNode.addAtomicExecutionFromOperatorContext(); executionLineageNode.addPredecessor(inputs[ITERATION_CONVERGENCE_INPUT_INDEX].getLineage()); return executionLineageNode.collectAndMark();
assert outputs.length == this.getNumOutputs(); ExecutionLineageNode executionLineageNode = new ExecutionLineageNode(operatorContext); executionLineageNode.addAtomicExecutionFromOperatorContext(); return executionLineageNode.collectAndMark();
assert outputs.length == this.getNumOutputs(); final ExecutionLineageNode executionLineageNode = new ExecutionLineageNode(operatorContext); executionLineageNode.addAtomicExecutionFromOperatorContext(); return executionLineageNode.collectAndMark();