public long getLongVar(ConfVars var) { return getLongVar(this, var); }
@Override public boolean checkQuerySize(long querySize, HiveConf hiveConf) { long minSize = hiveConf.getLongVar(HiveConf.ConfVars.HIVEOPTINDEXFILTER_COMPACT_MINSIZE); long maxSize = hiveConf.getLongVar(HiveConf.ConfVars.HIVEOPTINDEXFILTER_COMPACT_MAXSIZE); if (maxSize < 0) { maxSize = Long.MAX_VALUE; } return (querySize > minSize & querySize < maxSize); }
public CopyUtils(String distCpDoAsUser, HiveConf hiveConf) { this.hiveConf = hiveConf; maxNumberOfFiles = hiveConf.getLongVar(HiveConf.ConfVars.HIVE_EXEC_COPYFILE_MAXNUMFILES); maxCopyFileSize = hiveConf.getLongVar(HiveConf.ConfVars.HIVE_EXEC_COPYFILE_MAXSIZE); hiveInTest = hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_IN_TEST); this.copyAsUser = distCpDoAsUser; }
public synchronized RootAllocator getRootAllocator(Configuration conf) { if (rootAllocator == null) { final long limit = HiveConf.getLongVar(conf, HIVE_ARROW_ROOT_ALLOCATOR_LIMIT); rootAllocator = new RootAllocator(limit); } return rootAllocator; }
public void initializeMapOperator(Configuration hconf) throws HiveException { super.initializeMapOperator(hconf); cntr = 1; logEveryNRows = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVE_LOG_N_RECORDS); for (Entry<Operator<?>, StructObjectInspector> entry : childrenOpToOI.entrySet()) { Operator<?> child = entry.getKey(); child.initialize(hconf, new ObjectInspector[] {entry.getValue()}); } }
public void initializeMapOperator(Configuration hconf) throws HiveException { super.initializeMapOperator(hconf); cntr = 1; logEveryNRows = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVE_LOG_N_RECORDS); for (Entry<Operator<?>, StructObjectInspector> entry : childrenOpToOI.entrySet()) { Operator<?> child = entry.getKey(); child.initialize(hconf, new ObjectInspector[] {entry.getValue()}); } }
private QueryResultsCache(HiveConf configuration) throws IOException { this.conf = configuration; // Set up cache directory Path rootCacheDir = new Path(conf.getVar(HiveConf.ConfVars.HIVE_QUERY_RESULTS_CACHE_DIRECTORY)); LOG.info("Initializing query results cache at {}", rootCacheDir); Utilities.ensurePathIsWritable(rootCacheDir, conf); String currentCacheDirName = "results-" + UUID.randomUUID().toString(); cacheDirPath = new Path(rootCacheDir, currentCacheDirName); FileSystem fs = cacheDirPath.getFileSystem(conf); FsPermission fsPermission = new FsPermission("700"); fs.mkdirs(cacheDirPath, fsPermission); // Create non-existent path for 0-row results zeroRowsPath = new Path(cacheDirPath, "dummy_zero_rows"); // Results cache directory should be cleaned up at process termination. fs.deleteOnExit(cacheDirPath); maxCacheSize = conf.getLongVar(HiveConf.ConfVars.HIVE_QUERY_RESULTS_CACHE_MAX_SIZE); maxEntrySize = conf.getLongVar(HiveConf.ConfVars.HIVE_QUERY_RESULTS_CACHE_MAX_ENTRY_SIZE); maxEntryLifetime = conf.getTimeVar( HiveConf.ConfVars.HIVE_QUERY_RESULTS_CACHE_MAX_ENTRY_LIFETIME, TimeUnit.MILLISECONDS); LOG.info("Query results cache: cacheDirectory {}, maxCacheSize {}, maxEntrySize {}, maxEntryLifetime {}", cacheDirPath, maxCacheSize, maxEntrySize, maxEntryLifetime); }
public MemoryCalculator(PhysicalContext pctx) { this.pctx = pctx; this.totalAvailableMemory = HiveConf.getLongVar(pctx.conf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD); this.minimumHashTableSize = HiveConf.getIntVar(pctx.conf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS) * HiveConf.getIntVar(pctx.conf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINWBSIZE); this.inflationFactor = HiveConf.getFloatVar(pctx.conf, HiveConf.ConfVars.HIVE_HASH_TABLE_INFLATION_FACTOR); }
@Override public void initializeOp(Configuration hconf) throws HiveException { super.initializeOp(hconf); MAX_SIZE = HiveConf.getLongVar(hconf, ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING_MAX_EVENT_SIZE); serializer = (Serializer) ReflectionUtils.newInstance(conf.getTable().getDeserializerClass(), null); initDataBuffer(false); }
private boolean checkShuffleSizeForLargeTable(JoinOperator joinOp, int position, OptimizeTezProcContext context) { long max = HiveConf.getLongVar(context.parseContext.getConf(), HiveConf.ConfVars.HIVECONVERTJOINMAXSHUFFLESIZE); if (max < 1) { // Max is disabled, we can safely return false return false; } // Evaluate ReduceSinkOperator rsOp = (ReduceSinkOperator) joinOp.getParentOperators().get(position); Statistics inputStats = rsOp.getStatistics(); long inputSize = computeOnlineDataSize(inputStats); LOG.debug("Estimated size for input {}: {}; Max size for DPHJ conversion: {}", position, inputSize, max); if (inputSize > max) { LOG.debug("Size of input is greater than the max; " + "we do not convert to DPHJ"); return false; } return true; }
public boolean checkFatalErrors(Counters ctrs, StringBuilder errMsg) { if (ctrs == null) { // hadoop might return null if it cannot locate the job. // we may still be able to retrieve the job status - so ignore return false; } // check for number of created files Counters.Counter cntr = ctrs.findCounter(HiveConf.getVar(job, ConfVars.HIVECOUNTERGROUP), Operator.HIVE_COUNTER_CREATED_FILES); long numFiles = cntr != null ? cntr.getValue() : 0; long upperLimit = HiveConf.getLongVar(job, HiveConf.ConfVars.MAXCREATEDFILES); if (numFiles > upperLimit) { errMsg.append("total number of created files now is " + numFiles + ", which exceeds ").append(upperLimit); return true; } return this.callBackObj.checkFatalErrors(ctrs, errMsg); }
@Override public void initializeOp(Configuration hconf) throws HiveException { super.initializeOp(hconf); MAX_SIZE = HiveConf.getLongVar(hconf, ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING_MAX_EVENT_SIZE); serializer = (Serializer) ReflectionUtils.newInstance(conf.getTable().getDeserializerClass(), null); initDataBuffer(false); }
public MemoryCalculator(PhysicalContext pctx) { this.pctx = pctx; this.totalAvailableMemory = HiveConf.getLongVar(pctx.conf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD); this.minimumHashTableSize = HiveConf.getIntVar(pctx.conf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS) * HiveConf.getIntVar(pctx.conf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINWBSIZE); this.inflationFactor = HiveConf.getFloatVar(pctx.conf, HiveConf.ConfVars.HIVE_HASH_TABLE_INFLATION_FACTOR); }
public Serializer(Configuration conf, String attemptId, List<TypeInfo> typeInfos, List<String> fieldNames) { this.fieldTypeInfos = typeInfos; this.fieldNames = fieldNames; long childAllocatorLimit = HiveConf.getLongVar(conf, HIVE_ARROW_BATCH_ALLOCATOR_LIMIT); //Use per-task allocator for accounting only, no need to reserve per-task memory long childAllocatorReservation = 0L; //Break out accounting of direct memory per-task, so we can check no memory is leaked when task is completed allocator = RootAllocatorFactory.INSTANCE.getRootAllocator(conf).newChildAllocator( attemptId, childAllocatorReservation, childAllocatorLimit); rootVector = StructVector.empty(null, allocator); //These last fields are unused in non-serde usage vectorizedRowBatch = null; vectorAssignRow = null; MAX_BUFFERED_ROWS = 0; }
public boolean checkFatalErrors(Counters ctrs, StringBuilder errMsg) { if (ctrs == null) { // hadoop might return null if it cannot locate the job. // we may still be able to retrieve the job status - so ignore return false; } // check for number of created files Counters.Counter cntr = ctrs.findCounter(HiveConf.getVar(job, ConfVars.HIVECOUNTERGROUP), Operator.HIVECOUNTERCREATEDFILES); long numFiles = cntr != null ? cntr.getValue() : 0; long upperLimit = HiveConf.getLongVar(job, HiveConf.ConfVars.MAXCREATEDFILES); if (numFiles > upperLimit) { errMsg.append("total number of created files now is " + numFiles + ", which exceeds ").append(upperLimit); return true; } return this.callBackObj.checkFatalErrors(ctrs, errMsg); }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procContext, Object... nodeOutputs) throws SemanticException { OptimizeTezProcContext context = (OptimizeTezProcContext) procContext; AppMasterEventOperator event = (AppMasterEventOperator) nd; AppMasterEventDesc desc = event.getConf(); if (desc.getStatistics().getDataSize() > context.conf .getLongVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING_MAX_DATA_SIZE) && (context.pruningOpsRemovedByPriorOpt.isEmpty() || !context.pruningOpsRemovedByPriorOpt.contains(event))) { context.pruningOpsRemovedByPriorOpt.add(event); GenTezUtils.removeBranch(event); // at this point we've found the fork in the op pipeline that has the pruning as a child plan. LOG.info("Disabling dynamic pruning for: " + ((DynamicPruningEventDesc) desc).getTableScan().getName() + ". Expected data size is too big: " + desc.getStatistics().getDataSize()); } return false; } }
private static RelOptPlanner createPlanner( HiveConf conf, Set<RelNode> corrScalarRexSQWithAgg, Set<RelNode> scalarAggNoGbyNoWin) { final Double maxSplitSize = (double) HiveConf.getLongVar( conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE); final Double maxMemory = (double) HiveConf.getLongVar( conf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD); HiveAlgorithmsConf algorithmsConf = new HiveAlgorithmsConf(maxSplitSize, maxMemory); HiveRulesRegistry registry = new HiveRulesRegistry(); Properties calciteConfigProperties = new Properties(); calciteConfigProperties.setProperty( CalciteConnectionProperty.TIME_ZONE.camelName(), conf.getLocalTimeZone().getId()); calciteConfigProperties.setProperty( CalciteConnectionProperty.MATERIALIZATIONS_ENABLED.camelName(), Boolean.FALSE.toString()); CalciteConnectionConfig calciteConfig = new CalciteConnectionConfigImpl(calciteConfigProperties); boolean isCorrelatedColumns = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_STATS_CORRELATED_MULTI_KEY_JOINS); boolean heuristicMaterializationStrategy = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_REWRITING_SELECTION_STRATEGY).equals("heuristic"); HivePlannerContext confContext = new HivePlannerContext(algorithmsConf, registry, calciteConfig, corrScalarRexSQWithAgg, scalarAggNoGbyNoWin, new HiveConfPlannerContext(isCorrelatedColumns, heuristicMaterializationStrategy)); return HiveVolcanoPlanner.createPlanner(confContext); }
private boolean checkThreshold(FetchData data, int limit, ParseContext pctx) throws Exception { if (limit > 0) { if (data.hasOnlyPruningFilter()) { /* partitioned table + query has only pruning filters */ return true; } else if (data.isPartitioned() == false && data.isFiltered() == false) { /* unpartitioned table + no filters */ return true; } /* fall through */ } long threshold = HiveConf.getLongVar(pctx.getConf(), HiveConf.ConfVars.HIVEFETCHTASKCONVERSIONTHRESHOLD); if (threshold < 0) { return true; } Operator child = data.scanOp.getChildOperators().get(0); if(child instanceof SelectOperator) { // select *, constant and casts can be allowed without a threshold check if (checkExpressions((SelectOperator)child)) { return true; } } return data.isDataLengthWithInThreshold(pctx, threshold); }
public RelMetadataProvider getMetadataProvider() { // Create cost metadata provider final HiveCostModel cm; if (HiveConf.getVar(this.hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") && HiveConf.getBoolVar(this.hiveConf, HiveConf.ConfVars.HIVE_CBO_EXTENDED_COST_MODEL)) { cm = HiveOnTezCostModel.getCostModel(hiveConf); } else { cm = HiveDefaultCostModel.getCostModel(); } // Get max split size for HiveRelMdParallelism final Double maxSplitSize = (double) HiveConf.getLongVar( this.hiveConf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE); // Return MD provider return ChainedRelMetadataProvider.of(ImmutableList .of( HiveRelMdDistinctRowCount.SOURCE, new HiveRelMdCost(cm).getMetadataProvider(), HiveRelMdSelectivity.SOURCE, HiveRelMdRowCount.SOURCE, HiveRelMdUniqueKeys.SOURCE, HiveRelMdColumnUniqueness.SOURCE, HiveRelMdSize.SOURCE, HiveRelMdMemory.SOURCE, new HiveRelMdParallelism(maxSplitSize).getMetadataProvider(), HiveRelMdDistribution.SOURCE, HiveRelMdCollation.SOURCE, HiveRelMdPredicates.SOURCE, DefaultRelMetadataProvider.INSTANCE)); }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procContext, Object... nodeOutputs) throws SemanticException { OptimizeSparkProcContext context = (OptimizeSparkProcContext) procContext; SparkPartitionPruningSinkOperator op = (SparkPartitionPruningSinkOperator) nd; SparkPartitionPruningSinkDesc desc = op.getConf(); if (desc.getStatistics().getDataSize() > context.getConf() .getLongVar(ConfVars.SPARK_DYNAMIC_PARTITION_PRUNING_MAX_DATA_SIZE)) { OperatorUtils.removeBranch(op); // at this point we've found the fork in the op pipeline that has the pruning as a child plan. LOG.info("Disabling dynamic pruning for: " + desc.getTableScan().getName() + ". Expected data size is too big: " + desc.getStatistics().getDataSize()); } return false; } }