/** * Its crucial to pick the right parallelism. * <p> * totalSubPartitions : this is deemed safe limit, to be nice with Spark. inputParallelism : * typically number of input file splits * <p> * We pick the max such that, we are always safe, but go higher if say a there are a lot of input * files. (otherwise, we will fallback to number of partitions in input and end up with slow * performance) */ private int determineParallelism(int inputParallelism, int totalSubPartitions) { // If bloom index parallelism is set, use it to to check against the input parallelism and // take the max int indexParallelism = Math.max(inputParallelism, config.getBloomIndexParallelism()); int joinParallelism = Math.max(totalSubPartitions, indexParallelism); logger.info("InputParallelism: ${" + inputParallelism + "}, " + "IndexParallelism: ${" + config .getBloomIndexParallelism() + "}, " + "TotalSubParts: ${" + totalSubPartitions + "}, " + "Join Parallelism set to : " + joinParallelism); return joinParallelism; }
/** * Its crucial to pick the right parallelism. * <p> * totalSubPartitions : this is deemed safe limit, to be nice with Spark. inputParallelism : * typically number of input file splits * <p> * We pick the max such that, we are always safe, but go higher if say a there are a lot of input * files. (otherwise, we will fallback to number of partitions in input and end up with slow * performance) */ private int determineParallelism(int inputParallelism, int totalSubPartitions) { // If bloom index parallelism is set, use it to to check against the input parallelism and // take the max int indexParallelism = Math.max(inputParallelism, config.getBloomIndexParallelism()); int joinParallelism = Math.max(totalSubPartitions, indexParallelism); logger.info("InputParallelism: ${" + inputParallelism + "}, " + "IndexParallelism: ${" + config .getBloomIndexParallelism() + "}, " + "TotalSubParts: ${" + totalSubPartitions + "}, " + "Join Parallelism set to : " + joinParallelism); return joinParallelism; }