/** * Set key and value descriptor * @param work RedueWork * @param rs ReduceSinkOperator */ public static void setKeyAndValueDesc(ReduceWork work, ReduceSinkOperator rs) { work.setKeyDesc(rs.getConf().getKeySerializeInfo()); int tag = Math.max(0, rs.getConf().getTag()); List<TableDesc> tagToSchema = work.getTagToValueDesc(); while (tag + 1 > tagToSchema.size()) { tagToSchema.add(null); } tagToSchema.set(tag, rs.getConf().getValueSerializeInfo()); }
/** * Set key and value descriptor * @param work RedueWork * @param rs ReduceSinkOperator */ public static void setKeyAndValueDesc(ReduceWork work, ReduceSinkOperator rs) { work.setKeyDesc(rs.getConf().getKeySerializeInfo()); int tag = Math.max(0, rs.getConf().getTag()); List<TableDesc> tagToSchema = work.getTagToValueDesc(); while (tag + 1 > tagToSchema.size()) { tagToSchema.add(null); } tagToSchema.set(tag, rs.getConf().getValueSerializeInfo()); }
@SuppressWarnings("unchecked") private void populateMapRedPlan1(Table src) throws SemanticException { ArrayList<String> outputColumns = new ArrayList<String>(); for (int i = 0; i < 2; i++) { outputColumns.add("_col" + i); } // map-side work Operator<ReduceSinkDesc> op1 = OperatorFactory.get(ctx, PlanUtils .getReduceSinkDesc(Utilities.makeList(getStringColumn("key")), Utilities.makeList(getStringColumn("value")), outputColumns, true, -1, 1, -1, AcidUtils.Operation.NOT_ACID)); addMapWork(mr, src, "a", op1); ReduceWork rWork = new ReduceWork(); rWork.setNumReduceTasks(Integer.valueOf(1)); rWork.setKeyDesc(op1.getConf().getKeySerializeInfo()); rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo()); mr.setReduceWork(rWork); // reduce side work Operator<FileSinkDesc> op3 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(tmpdir + File.separator + "mapredplan1.out"), Utilities.defaultTd, false)); List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>(); cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString()+"."+outputColumns.get(1))); List<String> colNames = new ArrayList<String>(); colNames.add(HiveConf.getColumnInternalName(2)); Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols, colNames), op3); rWork.setReducer(op2); }
@SuppressWarnings("unchecked") private void populateMapRedPlan2(Table src) throws Exception { ArrayList<String> outputColumns = new ArrayList<String>(); for (int i = 0; i < 2; i++) { outputColumns.add("_col" + i); } // map-side work Operator<ReduceSinkDesc> op1 = OperatorFactory.get(ctx, PlanUtils .getReduceSinkDesc(Utilities.makeList(getStringColumn("key")), Utilities .makeList(getStringColumn("key"), getStringColumn("value")), outputColumns, false, -1, 1, -1, AcidUtils.Operation.NOT_ACID)); addMapWork(mr, src, "a", op1); ReduceWork rWork = new ReduceWork(); rWork.setNumReduceTasks(Integer.valueOf(1)); rWork.setKeyDesc(op1.getConf().getKeySerializeInfo()); rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo()); mr.setReduceWork(rWork); // reduce side work Operator<FileSinkDesc> op4 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(tmpdir + File.separator + "mapredplan2.out"), Utilities.defaultTd, false)); Operator<FilterDesc> op3 = OperatorFactory.get(getTestFilterDesc("0"), op4); List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>(); cols.add(getStringColumn(Utilities.ReduceField.KEY + ".reducesinkkey" + 0)); cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString()+"."+outputColumns.get(1))); Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols, outputColumns), op3); rWork.setReducer(op2); }
@SuppressWarnings("unchecked") private void populateMapRedPlan5(Table src) throws SemanticException { // map-side work ArrayList<String> outputColumns = new ArrayList<String>(); for (int i = 0; i < 2; i++) { outputColumns.add("_col" + i); } Operator<ReduceSinkDesc> op0 = OperatorFactory.get(ctx, PlanUtils .getReduceSinkDesc(Utilities.makeList(getStringColumn("0")), Utilities .makeList(getStringColumn("0"), getStringColumn("1")), outputColumns, false, -1, 1, -1, AcidUtils.Operation.NOT_ACID)); Operator<SelectDesc> op4 = OperatorFactory.get(new SelectDesc(Utilities .makeList(getStringColumn("key"), getStringColumn("value")), outputColumns), op0); addMapWork(mr, src, "a", op4); ReduceWork rWork = new ReduceWork(); mr.setReduceWork(rWork); rWork.setNumReduceTasks(Integer.valueOf(1)); rWork.setKeyDesc(op0.getConf().getKeySerializeInfo()); rWork.getTagToValueDesc().add(op0.getConf().getValueSerializeInfo()); // reduce side work Operator<FileSinkDesc> op3 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(tmpdir + File.separator + "mapredplan5.out"), Utilities.defaultTd, false)); List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>(); cols.add(getStringColumn(Utilities.ReduceField.KEY + ".reducesinkkey" + 0)); cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString()+"."+outputColumns.get(1))); Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols, outputColumns), op3); rWork.setReducer(op2); }
mr.setReduceWork(rWork); rWork.setNumReduceTasks(Integer.valueOf(1)); rWork.setKeyDesc(op1.getConf().getKeySerializeInfo()); rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());
rWork.setKeyDesc(op1.getConf().getKeySerializeInfo()); rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo()); rWork.setNumReduceTasks(Integer.valueOf(1));
rWork.setNumReduceTasks(Integer.valueOf(1)); rWork.setNeedsTagging(true); rWork.setKeyDesc(op1.getConf().getKeySerializeInfo()); rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());
/** * Set key and value descriptor * @param work RedueWork * @param rs ReduceSinkOperator */ public static void setKeyAndValueDesc(ReduceWork work, ReduceSinkOperator rs) { work.setKeyDesc(rs.getConf().getKeySerializeInfo()); int tag = Math.max(0, rs.getConf().getTag()); List<TableDesc> tagToSchema = work.getTagToValueDesc(); while (tag + 1 > tagToSchema.size()) { tagToSchema.add(null); } tagToSchema.set(tag, rs.getConf().getValueSerializeInfo()); }