public static MapredWork getMapRedWork(Configuration conf) { MapredWork w = new MapredWork(); w.setMapWork(getMapWork(conf)); w.setReduceWork(getReduceWork(conf)); return w; }
public static MapredWork getMapRedWork(Configuration conf) { MapredWork w = new MapredWork(); w.setMapWork(getMapWork(conf)); w.setReduceWork(getReduceWork(conf)); return w; }
newWork.setReduceWork(null);
private MapRedTask convertSMBTaskToMapJoinTask(MapredWork origWork, int bigTablePosition, SMBMapJoinOperator smbJoinOp) throws SemanticException { // deep copy a new mapred work MapredWork newWork = SerializationUtilities.clonePlan(origWork); // create a mapred task for this work MapRedTask newTask = (MapRedTask) TaskFactory.get(newWork); // generate the map join operator; already checked the map join MapJoinOperator newMapJoinOp = getMapJoinOperator(newTask, newWork, smbJoinOp, bigTablePosition); // The reducer needs to be restored - Consider a query like: // select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key; // The reducer contains a groupby, which needs to be restored. ReduceWork rWork = newWork.getReduceWork(); // create the local work for this plan MapJoinProcessor.genLocalWorkForMapJoin(newWork, newMapJoinOp, bigTablePosition); // restore the reducer newWork.setReduceWork(rWork); return newTask; }
newWork.setReduceWork(null);
/** * Met cRS in pOP(parentTask with RS)-cRS-cOP(noTask) case * Create new child task for cRS-cOP and link two tasks by temporary file : pOP-FS / TS-cRS-cOP * * @param cRS * the reduce sink operator encountered * @param opProcCtx * processing context */ static void splitPlan(ReduceSinkOperator cRS, GenMRProcContext opProcCtx) throws SemanticException { // Generate a new task ParseContext parseCtx = opProcCtx.getParseCtx(); Task<? extends Serializable> parentTask = opProcCtx.getCurrTask(); MapredWork childPlan = getMapRedWork(parseCtx); Task<? extends Serializable> childTask = TaskFactory.get(childPlan); Operator<? extends OperatorDesc> reducer = cRS.getChildOperators().get(0); // Add the reducer ReduceWork rWork = new ReduceWork(); childPlan.setReduceWork(rWork); rWork.setReducer(reducer); ReduceSinkDesc desc = cRS.getConf(); childPlan.getReduceWork().setNumReduceTasks(new Integer(desc.getNumReducers())); opProcCtx.getOpTaskMap().put(reducer, childTask); splitTasks(cRS, parentTask, childTask, opProcCtx); }
private MapRedTask convertSMBTaskToMapJoinTask(MapredWork origWork, int bigTablePosition, SMBMapJoinOperator smbJoinOp) throws UnsupportedEncodingException, SemanticException { // deep copy a new mapred work MapredWork newWork = SerializationUtilities.clonePlan(origWork); // create a mapred task for this work MapRedTask newTask = (MapRedTask) TaskFactory.get(newWork, physicalContext .getParseContext().getConf()); // generate the map join operator; already checked the map join MapJoinOperator newMapJoinOp = getMapJoinOperator(newTask, newWork, smbJoinOp, bigTablePosition); // The reducer needs to be restored - Consider a query like: // select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key; // The reducer contains a groupby, which needs to be restored. ReduceWork rWork = newWork.getReduceWork(); // create the local work for this plan MapJoinProcessor.genLocalWorkForMapJoin(newWork, newMapJoinOp, bigTablePosition); // restore the reducer newWork.setReduceWork(rWork); return newTask; }
/** * Met cRS in pOP(parentTask with RS)-cRS-cOP(noTask) case * Create new child task for cRS-cOP and link two tasks by temporary file : pOP-FS / TS-cRS-cOP * * @param cRS * the reduce sink operator encountered * @param opProcCtx * processing context */ static void splitPlan(ReduceSinkOperator cRS, GenMRProcContext opProcCtx) throws SemanticException { // Generate a new task ParseContext parseCtx = opProcCtx.getParseCtx(); Task<? extends Serializable> parentTask = opProcCtx.getCurrTask(); MapredWork childPlan = getMapRedWork(parseCtx); Task<? extends Serializable> childTask = TaskFactory.get(childPlan, parseCtx .getConf()); Operator<? extends OperatorDesc> reducer = cRS.getChildOperators().get(0); // Add the reducer ReduceWork rWork = new ReduceWork(); childPlan.setReduceWork(rWork); rWork.setReducer(reducer); ReduceSinkDesc desc = cRS.getConf(); childPlan.getReduceWork().setNumReduceTasks(new Integer(desc.getNumReducers())); opProcCtx.getOpTaskMap().put(reducer, childTask); splitTasks(cRS, parentTask, childTask, opProcCtx); }
/** * Initialize the current union plan. * * @param op * the reduce sink operator encountered * @param opProcCtx * processing context */ public static void initUnionPlan(ReduceSinkOperator op, UnionOperator currUnionOp, GenMRProcContext opProcCtx, Task<? extends Serializable> unionTask) throws SemanticException { Operator<? extends OperatorDesc> reducer = op.getChildOperators().get(0); MapredWork plan = (MapredWork) unionTask.getWork(); HashMap<Operator<? extends OperatorDesc>, Task<? extends Serializable>> opTaskMap = opProcCtx.getOpTaskMap(); opTaskMap.put(reducer, unionTask); plan.setReduceWork(new ReduceWork()); plan.getReduceWork().setReducer(reducer); plan.getReduceWork().setReducer(reducer); ReduceSinkDesc desc = op.getConf(); plan.getReduceWork().setNumReduceTasks(desc.getNumReducers()); if (needsTagging(plan.getReduceWork())) { plan.getReduceWork().setNeedsTagging(true); } initUnionPlan(opProcCtx, currUnionOp, unionTask, false); }
@SuppressWarnings("unchecked") private void populateMapRedPlan1(Table src) throws SemanticException { ArrayList<String> outputColumns = new ArrayList<String>(); for (int i = 0; i < 2; i++) { outputColumns.add("_col" + i); } // map-side work Operator<ReduceSinkDesc> op1 = OperatorFactory.get(ctx, PlanUtils .getReduceSinkDesc(Utilities.makeList(getStringColumn("key")), Utilities.makeList(getStringColumn("value")), outputColumns, true, -1, 1, -1, AcidUtils.Operation.NOT_ACID)); addMapWork(mr, src, "a", op1); ReduceWork rWork = new ReduceWork(); rWork.setNumReduceTasks(Integer.valueOf(1)); rWork.setKeyDesc(op1.getConf().getKeySerializeInfo()); rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo()); mr.setReduceWork(rWork); // reduce side work Operator<FileSinkDesc> op3 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(tmpdir + File.separator + "mapredplan1.out"), Utilities.defaultTd, false)); List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>(); cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString()+"."+outputColumns.get(1))); List<String> colNames = new ArrayList<String>(); colNames.add(HiveConf.getColumnInternalName(2)); Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols, colNames), op3); rWork.setReducer(op2); }
plan.setReduceWork(new ReduceWork()); plan.getReduceWork().setReducer(reducer); ReduceSinkDesc desc = op.getConf();
/** * Initialize the current union plan. * * @param op * the reduce sink operator encountered * @param opProcCtx * processing context */ public static void initUnionPlan(ReduceSinkOperator op, UnionOperator currUnionOp, GenMRProcContext opProcCtx, Task<? extends Serializable> unionTask) throws SemanticException { Operator<? extends OperatorDesc> reducer = op.getChildOperators().get(0); MapredWork plan = (MapredWork) unionTask.getWork(); HashMap<Operator<? extends OperatorDesc>, Task<? extends Serializable>> opTaskMap = opProcCtx.getOpTaskMap(); opTaskMap.put(reducer, unionTask); plan.setReduceWork(new ReduceWork()); plan.getReduceWork().setReducer(reducer); plan.getReduceWork().setReducer(reducer); ReduceSinkDesc desc = op.getConf(); plan.getReduceWork().setNumReduceTasks(desc.getNumReducers()); if (needsTagging(plan.getReduceWork())) { plan.getReduceWork().setNeedsTagging(true); } initUnionPlan(opProcCtx, currUnionOp, unionTask, false); }
plan.setReduceWork(new ReduceWork()); plan.getReduceWork().setReducer(reducer); ReduceSinkDesc desc = op.getConf();
@SuppressWarnings("unchecked") private void populateMapRedPlan2(Table src) throws Exception { ArrayList<String> outputColumns = new ArrayList<String>(); for (int i = 0; i < 2; i++) { outputColumns.add("_col" + i); } // map-side work Operator<ReduceSinkDesc> op1 = OperatorFactory.get(ctx, PlanUtils .getReduceSinkDesc(Utilities.makeList(getStringColumn("key")), Utilities .makeList(getStringColumn("key"), getStringColumn("value")), outputColumns, false, -1, 1, -1, AcidUtils.Operation.NOT_ACID)); addMapWork(mr, src, "a", op1); ReduceWork rWork = new ReduceWork(); rWork.setNumReduceTasks(Integer.valueOf(1)); rWork.setKeyDesc(op1.getConf().getKeySerializeInfo()); rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo()); mr.setReduceWork(rWork); // reduce side work Operator<FileSinkDesc> op4 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(tmpdir + File.separator + "mapredplan2.out"), Utilities.defaultTd, false)); Operator<FilterDesc> op3 = OperatorFactory.get(getTestFilterDesc("0"), op4); List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>(); cols.add(getStringColumn(Utilities.ReduceField.KEY + ".reducesinkkey" + 0)); cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString()+"."+outputColumns.get(1))); Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols, outputColumns), op3); rWork.setReducer(op2); }
@SuppressWarnings("unchecked") private void populateMapRedPlan5(Table src) throws SemanticException { // map-side work ArrayList<String> outputColumns = new ArrayList<String>(); for (int i = 0; i < 2; i++) { outputColumns.add("_col" + i); } Operator<ReduceSinkDesc> op0 = OperatorFactory.get(ctx, PlanUtils .getReduceSinkDesc(Utilities.makeList(getStringColumn("0")), Utilities .makeList(getStringColumn("0"), getStringColumn("1")), outputColumns, false, -1, 1, -1, AcidUtils.Operation.NOT_ACID)); Operator<SelectDesc> op4 = OperatorFactory.get(new SelectDesc(Utilities .makeList(getStringColumn("key"), getStringColumn("value")), outputColumns), op0); addMapWork(mr, src, "a", op4); ReduceWork rWork = new ReduceWork(); mr.setReduceWork(rWork); rWork.setNumReduceTasks(Integer.valueOf(1)); rWork.setKeyDesc(op0.getConf().getKeySerializeInfo()); rWork.getTagToValueDesc().add(op0.getConf().getValueSerializeInfo()); // reduce side work Operator<FileSinkDesc> op3 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(tmpdir + File.separator + "mapredplan5.out"), Utilities.defaultTd, false)); List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>(); cols.add(getStringColumn(Utilities.ReduceField.KEY + ".reducesinkkey" + 0)); cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString()+"."+outputColumns.get(1))); Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols, outputColumns), op3); rWork.setReducer(op2); }
rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo()); rWork.setNumReduceTasks(Integer.valueOf(1)); mr.setReduceWork(rWork);
mr.setReduceWork(rWork); rWork.setNumReduceTasks(Integer.valueOf(1)); rWork.setKeyDesc(op1.getConf().getKeySerializeInfo());
rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo()); mr.setReduceWork(rWork); rWork.getTagToValueDesc().add(op2.getConf().getValueSerializeInfo());
public static MapredWork getMapRedWork(Configuration conf) { MapredWork w = new MapredWork(); w.setMapWork(getMapWork(conf)); w.setReduceWork(getReduceWork(conf)); return w; }
private MapRedTask convertSMBTaskToMapJoinTask(MapredWork origWork, int bigTablePosition, SMBMapJoinOperator smbJoinOp) throws UnsupportedEncodingException, SemanticException { // deep copy a new mapred work MapredWork newWork = Utilities.clonePlan(origWork); // create a mapred task for this work MapRedTask newTask = (MapRedTask) TaskFactory.get(newWork, physicalContext .getParseContext().getConf()); // generate the map join operator; already checked the map join MapJoinOperator newMapJoinOp = getMapJoinOperator(newTask, newWork, smbJoinOp, bigTablePosition); // The reducer needs to be restored - Consider a query like: // select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key; // The reducer contains a groupby, which needs to be restored. ReduceWork rWork = newWork.getReduceWork(); // create the local work for this plan MapJoinProcessor.genLocalWorkForMapJoin(newWork, newMapJoinOp, bigTablePosition); // restore the reducer newWork.setReduceWork(rWork); return newTask; }