@Before public void setupTables() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); tableEnv = TableEnvironment.getTableEnvironment(env, new TableConfig()); DataSet<Tuple3<Integer, Long, String>> dataSet = CollectionDataSets.get3TupleDataSet(env); tableEnv.registerDataSet(TABLE_NAME, dataSet); MapOperator<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>> dataSetWithNulls = dataSet.map(new MapFunction<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>() { @Override public Tuple3<Integer, Long, String> map(Tuple3<Integer, Long, String> value) throws Exception { if (value.f2.toLowerCase().contains("world")) { value.f2 = null; } return value; } }); tableEnv.registerDataSet(TABLE_WITH_NULLS_NAME, dataSetWithNulls); }
@Test(expected = TableException.class) public void testCustomCalciteConfig() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env, config()); CalciteConfig cc = new CalciteConfigBuilder() .replaceLogicalOptRuleSet(RuleSets.ofList()) .replacePhysicalOptRuleSet(RuleSets.ofList()) .build(); tableEnv.getConfig().setCalciteConfig(cc); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); Table t = tableEnv.fromDataSet(ds); tableEnv.toDataSet(t, Row.class); }
private Configuration getTableConf() { return tableEnv.getConfig().getConf(); }
public void buildRUs(BatchExecNode<?> rootNode) { if (rootNode instanceof BatchExecSink<?>) { rootNode = (BatchExecNode<?>) ((BatchExecSink) rootNode).getInput(); } // not support subsectionOptimization or external shuffle temporarily if (tableEnv.getConfig().getSubsectionOptimization() || getTableConf().getBoolean(TableConfigOptions.SQL_EXEC_SORT_RANGE_ENABLED)) { supportRunningUnit = false; return; } RunningUnitGenerator visitor = new RunningUnitGenerator(getTableConf()); rootNode.accept(visitor); runningUnits = visitor.getRunningUnits(); for (NodeRunningUnit runningUnit : runningUnits) { for (BatchExecNode<?> node : runningUnit.getNodeSet()) { nodeRunningUnitMap.computeIfAbsent(node, k -> new LinkedHashSet<>()).add(runningUnit); } } buildNodeStagesMap(); }
private void setDefaultRes(NodeResource resource) { double cpu = NodeResourceUtil.getDefaultCpu(tEnv.getConfig().getConf()); int heap = NodeResourceUtil.getDefaultHeapMem(tEnv.getConfig().getConf()); int direct = NodeResourceUtil.getDefaultDirectMem(tEnv.getConfig().getConf()); resource.setCpu(cpu); resource.setHeapMem(heap); resource.setDirectMem(direct); }
public BinaryHashPartitioner(BaseRowTypeInfo info, int[] hashFields) { this.genHashFunc = HashCodeGenerator.generateRowHash( CodeGeneratorContext.apply(new TableConfig(), false), TypeConverters.createInternalTypeFromTypeInfo(info), "HashPartitioner", hashFields); this.hashFieldNames = new String[hashFields.length]; String[] fieldNames = info.getFieldNames(); for (int i = 0; i < hashFields.length; i++) { hashFieldNames[i] = fieldNames[hashFields[i]]; } }
@Override public List<ExecNode<?, ?>> process(List<ExecNode<?, ?>> sinkNodes, DAGProcessContext context) { sinkNodes.forEach(s -> Preconditions.checkArgument(s instanceof BatchExecNode)); tEnv = context.getTableEnvironment(); NodeResourceUtil.InferMode inferMode = NodeResourceUtil.getInferMode(tEnv.getConfig().getConf()); BatchExecNodeVisitor managedVisitor; if (inferMode.equals(NodeResourceUtil.InferMode.ALL)) { managedVisitor = new BatchManagedMemCalculatorOnStatistics(tEnv.getConfig().getConf()); } else { managedVisitor = new BatchManagedMemCalculatorOnConfig(tEnv.getConfig().getConf()); } sinkNodes.forEach(s -> ((BatchExecNode) s).accept(managedVisitor)); return sinkNodes; } }
private void calculateTableSourceScan(CommonScan tableSourceScan) { // user may have set resource for source transformation. StreamTransformation transformation = tableSourceScan.getSourceTransformation(tEnv.execEnv()); ResourceSpec sourceRes = transformation.getMinResources(); if (sourceRes == ResourceSpec.DEFAULT || sourceRes == null) { int heap = NodeResourceUtil.getSourceMem(tEnv.getConfig().getConf()); int direct = NodeResourceUtil.getSourceDirectMem(tEnv.getConfig().getConf()); sourceRes = NodeResourceUtil.getResourceSpec(tEnv.getConfig().getConf(), heap, direct); } calculateCommonScan(tableSourceScan, sourceRes); }
private void calculateCommonScan(CommonScan commonScan, ResourceSpec sourceRes) { ResourceSpec conversionRes = ResourceSpec.DEFAULT; if (commonScan.needInternalConversion()) { conversionRes = NodeResourceUtil.getDefaultResourceSpec(tEnv.getConfig().getConf()); } commonScan.setResForSourceAndConversion(sourceRes, conversionRes); }
protected void calculateOnShuffleStages(Map<ExecNode<?, ?>, ShuffleStage> nodeShuffleStageMap, DAGProcessContext context) { Configuration tableConf = tEnv.getConfig().getConf(); NodeResourceUtil.InferMode inferMode = NodeResourceUtil.getInferMode(tableConf); getShuffleStageParallelismCalculator(tableConf, inferMode).calculate(nodeShuffleStageMap.values()); Double cpuLimit = tableConf.getDouble(TableConfigOptions.SQL_RESOURCE_RUNNING_UNIT_CPU_TOTAL); if (cpuLimit > 0) { Map<BatchExecNode<?>, Set<NodeRunningUnit>> nodeRunningUnitMap = context.getRunningUnitMap(); BatchParallelismAdjuster.adjustParallelism(cpuLimit, nodeRunningUnitMap, nodeShuffleStageMap); } }
private void calculateDefault(ExecNode<?, ?> execNode) { execNode.getResource().setParallelism(NodeResourceUtil.getOperatorDefaultParallelism( tEnv.getConfig().getConf(), tEnv.execEnv().getParallelism())); }
private void calculateTableSourceScan(StreamExecTableSourceScan tableSourceScan) { StreamTransformation transformation = tableSourceScan.getSourceTransformation(tEnv.execEnv()); if (transformation.getMaxParallelism() > 0) { tableSourceScan.getResource().setParallelism(transformation.getMaxParallelism()); return; } int configParallelism = NodeResourceUtil.getSourceParallelism( tEnv.getConfig().getConf(), tEnv.execEnv().getParallelism()); tableSourceScan.getResource().setParallelism(configParallelism); }