/** * Make a best guess at trying to find the number of reducers */ private static int getNumberOfReducers(MapredWork mrwork, HiveConf conf) { if (mrwork.getReduceWork() == null) { return 0; } if (mrwork.getReduceWork().getNumReduceTasks() >= 0) { return mrwork.getReduceWork().getNumReduceTasks(); } return conf.getIntVar(HiveConf.ConfVars.HADOOPNUMREDUCERS); }
@Override public boolean hasReduce() { MapredWork w = getWork(); return w.getReduceWork() != null; }
@Override public boolean hasReduce() { MapredWork w = getWork(); return w.getReduceWork() != null; }
/** * Make a best guess at trying to find the number of reducers */ private static int getNumberOfReducers(MapredWork mrwork, HiveConf conf) { if (mrwork.getReduceWork() == null) { return 0; } if (mrwork.getReduceWork().getNumReduceTasks() >= 0) { return mrwork.getReduceWork().getNumReduceTasks(); } return conf.getIntVar(HiveConf.ConfVars.HADOOPNUMREDUCERS); }
public static void setMapRedWork(Configuration conf, MapredWork w, Path hiveScratchDir) { String useName = conf.get(INPUT_NAME); if (useName == null) { useName = "mapreduce:" + hiveScratchDir; } conf.set(INPUT_NAME, useName); setMapWork(conf, w.getMapWork(), hiveScratchDir, true); if (w.getReduceWork() != null) { conf.set(INPUT_NAME, useName); setReduceWork(conf, w.getReduceWork(), hiveScratchDir, true); } }
public static void setMapRedWork(Configuration conf, MapredWork w, Path hiveScratchDir) { String useName = conf.get(INPUT_NAME); if (useName == null) { useName = "mapreduce"; } conf.set(INPUT_NAME, useName); setMapWork(conf, w.getMapWork(), hiveScratchDir, true); if (w.getReduceWork() != null) { conf.set(INPUT_NAME, useName); setReduceWork(conf, w.getReduceWork(), hiveScratchDir, true); } }
private SMBMapJoinOperator getSMBMapJoinOp(MapredWork work) throws SemanticException { if (work != null && work.getReduceWork() != null) { Operator<? extends OperatorDesc> reducer = work.getReduceWork().getReducer(); for (Operator<? extends OperatorDesc> op : work.getMapWork().getAliasToWork().values()) { SMBMapJoinOperator smbMapJoinOp = getSMBMapJoinOp(op, reducer); if (smbMapJoinOp != null) { return smbMapJoinOp; } } } return null; }
private SMBMapJoinOperator getSMBMapJoinOp(MapredWork work) throws SemanticException { if (work != null && work.getReduceWork() != null) { Operator<? extends OperatorDesc> reducer = work.getReduceWork().getReducer(); for (Operator<? extends OperatorDesc> op : work.getMapWork().getAliasToWork().values()) { SMBMapJoinOperator smbMapJoinOp = getSMBMapJoinOp(op, reducer); if (smbMapJoinOp != null) { return smbMapJoinOp; } } } return null; }
@Override public Operator<? extends OperatorDesc> getReducer(MapWork mapWork) { if (getWork().getMapWork() == mapWork) { return getWork().getReduceWork() == null ? null : getWork().getReduceWork().getReducer(); } return null; }
@Override public Operator<? extends OperatorDesc> getReducer(MapWork mapWork) { if (getWork().getMapWork() == mapWork) { return getWork().getReduceWork() == null ? null : getWork().getReduceWork().getReducer(); } return null; }
/** * Initialize the current union plan. * * @param op * the reduce sink operator encountered * @param opProcCtx * processing context */ public static void initUnionPlan(ReduceSinkOperator op, UnionOperator currUnionOp, GenMRProcContext opProcCtx, Task<? extends Serializable> unionTask) throws SemanticException { Operator<? extends OperatorDesc> reducer = op.getChildOperators().get(0); MapredWork plan = (MapredWork) unionTask.getWork(); HashMap<Operator<? extends OperatorDesc>, Task<? extends Serializable>> opTaskMap = opProcCtx.getOpTaskMap(); opTaskMap.put(reducer, unionTask); plan.setReduceWork(new ReduceWork()); plan.getReduceWork().setReducer(reducer); plan.getReduceWork().setReducer(reducer); ReduceSinkDesc desc = op.getConf(); plan.getReduceWork().setNumReduceTasks(desc.getNumReducers()); if (needsTagging(plan.getReduceWork())) { plan.getReduceWork().setNeedsTagging(true); } initUnionPlan(opProcCtx, currUnionOp, unionTask, false); }
private void checkMRReducer(String taskName, MapredWork mrWrk) throws SemanticException { ReduceWork rWrk = mrWrk.getReduceWork(); if ( rWrk == null) { return; } Operator<? extends OperatorDesc> reducer = rWrk.getReducer(); if ( reducer instanceof JoinOperator|| reducer instanceof CommonMergeJoinOperator ) { BaseWork parentWork = mrWrk.getMapWork(); checkForCrossProduct(taskName, reducer, new ExtractReduceSinkInfo(null).analyze(parentWork)); } }
private void checkMRReducer(String taskName, MapredWork mrWrk) throws SemanticException { ReduceWork rWrk = mrWrk.getReduceWork(); if ( rWrk == null) { return; } Operator<? extends OperatorDesc> reducer = rWrk.getReducer(); if ( reducer instanceof JoinOperator|| reducer instanceof CommonMergeJoinOperator ) { BaseWork prntWork = mrWrk.getMapWork(); checkForCrossProduct(taskName, reducer, new ExtractReduceSinkInfo(null).analyze(prntWork)); } }
private JoinOperator getJoinOp(MapRedTask task) throws SemanticException { MapWork mWork = task.getWork().getMapWork(); ReduceWork rWork = task.getWork().getReduceWork(); if (rWork == null) { return null; } Operator<? extends OperatorDesc> reducerOp = rWork.getReducer(); if (reducerOp instanceof JoinOperator) { /* Is any operator present, which prevents the conversion */ Map<String, Operator<? extends OperatorDesc>> aliasToWork = mWork.getAliasToWork(); for (Operator<? extends OperatorDesc> op : aliasToWork.values()) { if (!checkOperatorOKMapJoinConversion(op)) { return null; } } return (JoinOperator) reducerOp; } else { return null; } }
private JoinOperator getJoinOp(MapRedTask task) throws SemanticException { MapWork mWork = task.getWork().getMapWork(); ReduceWork rWork = task.getWork().getReduceWork(); if (rWork == null) { return null; } Operator<? extends OperatorDesc> reducerOp = rWork.getReducer(); if (reducerOp instanceof JoinOperator) { /* Is any operator present, which prevents the conversion */ Map<String, Operator<? extends OperatorDesc>> aliasToWork = mWork.getAliasToWork(); for (Operator<? extends OperatorDesc> op : aliasToWork.values()) { if (!checkOperatorOKMapJoinConversion(op)) { return null; } } return (JoinOperator) reducerOp; } else { return null; } }
private MapRedTask convertSMBTaskToMapJoinTask(MapredWork origWork, int bigTablePosition, SMBMapJoinOperator smbJoinOp) throws SemanticException { // deep copy a new mapred work MapredWork newWork = SerializationUtilities.clonePlan(origWork); // create a mapred task for this work MapRedTask newTask = (MapRedTask) TaskFactory.get(newWork); // generate the map join operator; already checked the map join MapJoinOperator newMapJoinOp = getMapJoinOperator(newTask, newWork, smbJoinOp, bigTablePosition); // The reducer needs to be restored - Consider a query like: // select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key; // The reducer contains a groupby, which needs to be restored. ReduceWork rWork = newWork.getReduceWork(); // create the local work for this plan MapJoinProcessor.genLocalWorkForMapJoin(newWork, newMapJoinOp, bigTablePosition); // restore the reducer newWork.setReduceWork(rWork); return newTask; }
private void checkMapJoins(MapRedTask mrTsk) throws SemanticException { MapredWork mrWrk = mrTsk.getWork(); MapWork mapWork = mrWrk.getMapWork(); List<String> warnings = new MapJoinCheck(mrTsk.toString()).analyze(mapWork); if (!warnings.isEmpty()) { for (String w : warnings) { warn(w); } } ReduceWork redWork = mrWrk.getReduceWork(); if (redWork != null) { warnings = new MapJoinCheck(mrTsk.toString()).analyze(redWork); if (!warnings.isEmpty()) { for (String w : warnings) { warn(w); } } } }
private void checkMapJoins(MapRedTask mrTsk) throws SemanticException { MapredWork mrWrk = mrTsk.getWork(); MapWork mapWork = mrWrk.getMapWork(); List<String> warnings = new MapJoinCheck(mrTsk.toString()).analyze(mapWork); if (!warnings.isEmpty()) { for (String w : warnings) { warn(w); } } ReduceWork redWork = mrWrk.getReduceWork(); if (redWork != null) { warnings = new MapJoinCheck(mrTsk.toString()).analyze(redWork); if (!warnings.isEmpty()) { for (String w : warnings) { warn(w); } } } }
/** * Met cRS in pOP(parentTask with RS)-cRS-cOP(noTask) case * Create new child task for cRS-cOP and link two tasks by temporary file : pOP-FS / TS-cRS-cOP * * @param cRS * the reduce sink operator encountered * @param opProcCtx * processing context */ static void splitPlan(ReduceSinkOperator cRS, GenMRProcContext opProcCtx) throws SemanticException { // Generate a new task ParseContext parseCtx = opProcCtx.getParseCtx(); Task<? extends Serializable> parentTask = opProcCtx.getCurrTask(); MapredWork childPlan = getMapRedWork(parseCtx); Task<? extends Serializable> childTask = TaskFactory.get(childPlan); Operator<? extends OperatorDesc> reducer = cRS.getChildOperators().get(0); // Add the reducer ReduceWork rWork = new ReduceWork(); childPlan.setReduceWork(rWork); rWork.setReducer(reducer); ReduceSinkDesc desc = cRS.getConf(); childPlan.getReduceWork().setNumReduceTasks(new Integer(desc.getNumReducers())); opProcCtx.getOpTaskMap().put(reducer, childTask); splitTasks(cRS, parentTask, childTask, opProcCtx); }
private static boolean isMergeRequiredForMr(HiveConf hconf, FileSinkOperator fsOp, Task<? extends Serializable> currTask) { if (fsOp.getConf().isLinkedFileSink()) { // If the user has HIVEMERGEMAPREDFILES set to false, the idea was the // number of reducers are few, so the number of files anyway are small. // However, with this optimization, we are increasing the number of files // possibly by a big margin. So, merge aggresively. return (hconf.getBoolVar(ConfVars.HIVEMERGEMAPFILES) || hconf.getBoolVar(ConfVars.HIVEMERGEMAPREDFILES)); } // There are separate configuration parameters to control whether to // merge for a map-only job // or for a map-reduce job if (currTask.getWork() instanceof MapredWork) { ReduceWork reduceWork = ((MapredWork) currTask.getWork()).getReduceWork(); boolean mergeMapOnly = hconf.getBoolVar(ConfVars.HIVEMERGEMAPFILES) && reduceWork == null; boolean mergeMapRed = hconf.getBoolVar(ConfVars.HIVEMERGEMAPREDFILES) && reduceWork != null; if (mergeMapOnly || mergeMapRed) { return true; } } return false; }