/** * create a new plan and return. The pan won't contain the name to split * sample information in parse context. * * @return the new plan */ public static MapredWork getMapRedWorkFromConf(HiveConf conf) { MapredWork mrWork = new MapredWork(); MapWork work = mrWork.getMapWork(); boolean mapperCannotSpanPartns = conf.getBoolVar( HiveConf.ConfVars.HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS); work.setMapperCannotSpanPartns(mapperCannotSpanPartns); work.setPathToAliases(new LinkedHashMap<Path, ArrayList<String>>()); work.setPathToPartitionInfo(new LinkedHashMap<Path, PartitionDesc>()); work.setAliasToWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>()); return mrWork; }
@Override public void checkOutputSpecs(FileSystem ignored, JobConf job) throws IOException { MapredWork work = Utilities.getMapRedWork(job); List<Operator<?>> opList = work.getAllOperators(); for (Operator<?> op : opList) { if (op instanceof FileSinkOperator) { ((FileSinkOperator) op).checkOutputSpecs(ignored, job); } } } }
public static MapredWork getMapRedWork(Configuration conf) { MapredWork w = new MapredWork(); w.setMapWork(getMapWork(conf)); w.setReduceWork(getReduceWork(conf)); return w; }
public static void setMapRedWork(Configuration conf, MapredWork w, Path hiveScratchDir) { String useName = conf.get(INPUT_NAME); if (useName == null) { useName = "mapreduce:" + hiveScratchDir; } conf.set(INPUT_NAME, useName); setMapWork(conf, w.getMapWork(), hiveScratchDir, true); if (w.getReduceWork() != null) { conf.set(INPUT_NAME, useName); setReduceWork(conf, w.getReduceWork(), hiveScratchDir, true); } }
MapredWork currPlan = (MapredWork) currTask.getWork(); TableDesc keyTblDesc = (TableDesc) currPlan.getReduceWork().getKeyDesc().clone(); List<String> joinKeys = Utilities .getColumnNames(keyTblDesc.getProperties()); MapWork newPlan = PlanUtils.getMapRedWork().getMapWork(); newPlan.getAliasToPartnInfo().put(alias, part); Operator<? extends OperatorDesc> reducer = clonePlan.getReduceWork().getReducer(); assert reducer instanceof JoinOperator; JoinOperator cloneJoinOp = (JoinOperator) reducer; newPlan.setInputformat(HiveInputFormat.class.getName()); MapredWork w = new MapredWork(); w.setMapWork(newPlan);
@Test public void mrTaskSumbitViaChildWithImpersonation() throws IOException, LoginException { Utils.getUGI().setAuthenticationMethod(PROXY); Context ctx = Mockito.mock(Context.class); when(ctx.getLocalTmpPath()).thenReturn(new Path(System.getProperty("java.io.tmpdir"))); DriverContext dctx = new DriverContext(ctx); QueryState queryState = new QueryState.Builder().build(); HiveConf conf= queryState.getConf(); conf.setBoolVar(HiveConf.ConfVars.SUBMITVIACHILD, true); MapredWork mrWork = new MapredWork(); mrWork.setMapWork(Mockito.mock(MapWork.class)); MapRedTask mrTask = Mockito.spy(new MapRedTask()); mrTask.setWork(mrWork); mrTask.initialize(queryState, null, dctx, null); mrTask.jobExecHelper = Mockito.mock(HadoopJobExecHelper.class); when(mrTask.jobExecHelper.progressLocal(Mockito.any(Process.class), Mockito.anyString())).thenReturn(0); mrTask.execute(dctx); ArgumentCaptor<String[]> captor = ArgumentCaptor.forClass(String[].class); verify(mrTask).spawn(Mockito.anyString(), Mockito.anyString(), captor.capture()); String expected = "HADOOP_PROXY_USER=" + Utils.getUGI().getUserName(); Assert.assertTrue(Arrays.asList(captor.getValue()).contains(expected)); }
private MapRedTask convertSMBTaskToMapJoinTask(MapredWork origWork, int bigTablePosition, SMBMapJoinOperator smbJoinOp) throws SemanticException { // deep copy a new mapred work MapredWork newWork = SerializationUtilities.clonePlan(origWork); // create a mapred task for this work MapRedTask newTask = (MapRedTask) TaskFactory.get(newWork); // generate the map join operator; already checked the map join MapJoinOperator newMapJoinOp = getMapJoinOperator(newTask, newWork, smbJoinOp, bigTablePosition); // The reducer needs to be restored - Consider a query like: // select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key; // The reducer contains a groupby, which needs to be restored. ReduceWork rWork = newWork.getReduceWork(); // create the local work for this plan MapJoinProcessor.genLocalWorkForMapJoin(newWork, newMapJoinOp, bigTablePosition); // restore the reducer newWork.setReduceWork(rWork); return newTask; }
MapWork mWork = work.getMapWork(); ReduceWork rWork = work.getReduceWork(); work.configureJobConf(job); List<Path> inputPaths = Utilities.getInputPaths(job, mWork, emptyScratchDir, ctx, false); Utilities.setInputPaths(job, inputPaths);
public static void initUnionPlan(GenMRProcContext opProcCtx, Task<? extends Serializable> currTask, boolean local) { MapredWork plan = (MapredWork) currTask.getWork(); UnionOperator currUnionOp = opProcCtx.getCurrUnionOp(); assert currUnionOp != null; GenMRUnionCtx uCtx = opProcCtx.getUnionTask(currUnionOp); assert uCtx != null; List<String> taskTmpDirLst = uCtx.getTaskTmpDir(); List<TableDesc> tt_descLst = uCtx.getTTDesc(); assert !taskTmpDirLst.isEmpty() && !tt_descLst.isEmpty(); assert taskTmpDirLst.size() == tt_descLst.size(); int size = taskTmpDirLst.size(); assert local == false; for (int pos = 0; pos < size; pos++) { String taskTmpDir = taskTmpDirLst.get(pos); TableDesc tt_desc = tt_descLst.get(pos); if (plan.getPathToAliases().get(taskTmpDir) == null) { plan.getPathToAliases().put(taskTmpDir, new ArrayList<String>()); plan.getPathToAliases().get(taskTmpDir).add(taskTmpDir); plan.getPathToPartitionInfo().put(taskTmpDir, new PartitionDesc(tt_desc, null)); plan.getAliasToWork().put(taskTmpDir, currUnionOp); } } }
if (plan.getPathToAliases().get(path) == null) { plan.getPathToAliases().put(path, new ArrayList<String>()); plan.getPathToAliases().get(path).add(alias); plan.getPathToPartitionInfo().put(path, new PartitionDesc(tt_desc, null)); plan.getAliasToWork().put(alias, topOp); } else { MapredLocalWork localPlan = plan.getMapLocalWork(); if (localPlan == null) { localPlan = new MapredLocalWork( localPlan.getAliasToWork().put(alias, topOp); localPlan.getAliasToFetchWork().put(alias, new FetchWork(alias, tt_desc)); plan.setMapLocalWork(localPlan);
/** * Create a MapredWork based on input path, the top operator and the input * table descriptor. * @param conf * @param topOp the table scan operator that is the root of the MapReduce task. * @param fsDesc the file sink descriptor that serves as the input to this merge task. * @param parentMR the parent MapReduce work * @param parentFS the last FileSinkOperator in the parent MapReduce work * @return the MapredWork */ private MapredWork createMergeTask(HiveConf conf, Operator<? extends Serializable> topOp, FileSinkDesc fsDesc) { ArrayList<String> aliases = new ArrayList<String>(); String inputDir = fsDesc.getDirName(); TableDesc tblDesc = fsDesc.getTableInfo(); aliases.add(inputDir); // dummy alias: just use the input path // constructing the default MapredWork MapredWork cplan = GenMapRedUtils.getMapRedWork(conf); cplan.getPathToAliases().put(inputDir, aliases); cplan.getPathToPartitionInfo().put(inputDir, new PartitionDesc(tblDesc, null)); cplan.setNumReduceTasks(0); cplan.getAliasToWork().put(inputDir, topOp); cplan.setMapperCannotSpanPartns(true); return cplan; } /**
public void cleanUpInputFileChangedOp() throws HiveException { Path fpath = new Path((new Path(this.getExecContext().getCurrentInputFile())) .toUri().getPath()); for (String onefile : conf.getPathToAliases().keySet()) { Path onepath = new Path(new Path(onefile).toUri().getPath()); // check for the operators who will process rows coming to this Map // Operator if (!onepath.toUri().relativize(fpath.toUri()).equals(fpath.toUri())) { String onealias = conf.getPathToAliases().get(onefile).get(0); Operator<? extends Serializable> op = conf.getAliasToWork().get(onealias); LOG.info("Processing alias " + onealias + " for file " + onefile); MapInputPath inp = new MapInputPath(onefile, onealias, op); setInspectorInput(inp); break; } } }
Properties props; if (isEmptyPath) { PartitionDesc partDesc = work.getPathToPartitionInfo().get(path); props = partDesc.getProperties(); outFileFormat = partDesc.getOutputFileFormatClass(); nonNative = partDesc.getTableDesc().isNonNative(); } else { TableDesc tableDesc = work.getAliasToPartnInfo().get(alias).getTableDesc(); props = tableDesc.getProperties(); outFileFormat = tableDesc.getOutputFileFormatClass(); LinkedHashMap<String, ArrayList<String>> pathToAliases = work.getPathToAliases(); work.setPathToAliases(pathToAliases); LinkedHashMap<String, PartitionDesc> pathToPartitionInfo = work.getPathToPartitionInfo(); if (isEmptyPath) { pathToPartitionInfo.put(newPath.toUri().toString(), pathToPartitionInfo.get(path)); pathToPartitionInfo.remove(path); } else { PartitionDesc pDesc = work.getAliasToPartnInfo().get(alias).clone(); pathToPartitionInfo.put(newPath.toUri().toString(), pDesc); work.setPathToPartitionInfo(pathToPartitionInfo);
@Override public Collection<MapWork> getMapWork() { return Collections.<MapWork>singleton(getWork().getMapWork()); }
protected void init(JobConf job) { mrwork = Utilities.getMapRedWork(job); pathToPartitionInfo = mrwork.getPathToPartitionInfo(); }
public void rework(HiveConf job, MapredWork work) throws IOException { Map<String, PartitionDesc> pathToParts = work.getPathToPartitionInfo(); List<String> toRemovePaths = new ArrayList<String>(); Map<String, PartitionDesc> toAddPathToPart = new HashMap<String, PartitionDesc>(); Map<String, ArrayList<String>> pathToAliases = work.getPathToAliases();
@SuppressWarnings("unchecked") private void populateMapRedPlan1(Table src) throws SemanticException { ArrayList<String> outputColumns = new ArrayList<String>(); for (int i = 0; i < 2; i++) { outputColumns.add("_col" + i); } // map-side work Operator<ReduceSinkDesc> op1 = OperatorFactory.get(ctx, PlanUtils .getReduceSinkDesc(Utilities.makeList(getStringColumn("key")), Utilities.makeList(getStringColumn("value")), outputColumns, true, -1, 1, -1, AcidUtils.Operation.NOT_ACID)); addMapWork(mr, src, "a", op1); ReduceWork rWork = new ReduceWork(); rWork.setNumReduceTasks(Integer.valueOf(1)); rWork.setKeyDesc(op1.getConf().getKeySerializeInfo()); rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo()); mr.setReduceWork(rWork); // reduce side work Operator<FileSinkDesc> op3 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(tmpdir + File.separator + "mapredplan1.out"), Utilities.defaultTd, false)); List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>(); cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString()+"."+outputColumns.get(1))); List<String> colNames = new ArrayList<String>(); colNames.add(HiveConf.getColumnInternalName(2)); Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols, colNames), op3); rWork.setReducer(op2); }
/** * Clones using the powers of XML. Do not use unless necessary. * @param plan The plan. * @return The clone. */ public static MapredWork clonePlan(MapredWork plan) { // TODO: need proper clone. Meanwhile, let's at least keep this horror in one place PerfLogger perfLogger = SessionState.getPerfLogger(); perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.CLONE_PLAN); Operator<?> op = plan.getAnyOperator(); CompilationOpContext ctx = (op == null) ? null : op.getCompilationOpContext(); ByteArrayOutputStream baos = new ByteArrayOutputStream(4096); serializePlan(plan, baos, true); MapredWork newPlan = deserializePlan(new ByteArrayInputStream(baos.toByteArray()), MapredWork.class, true); // Restore the context. for (Operator<?> newOp : newPlan.getAllOperators()) { newOp.setCompilationOpContext(ctx); } perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.CLONE_PLAN); return newPlan; }
@SuppressWarnings("nls") public static MapredWork getMapRedWork() { return new MapredWork(); }
/** * Update counters relevant to this task. */ private void updateCounters(Counters ctrs, RunningJob rj) throws IOException { mapProgress = Math.round(rj.mapProgress() * 100); reduceProgress = Math.round(rj.reduceProgress() * 100); taskCounters.put("CNTR_NAME_" + getId() + "_MAP_PROGRESS", Long.valueOf(mapProgress)); taskCounters.put("CNTR_NAME_" + getId() + "_REDUCE_PROGRESS", Long.valueOf(reduceProgress)); if (ctrs == null) { // hadoop might return null if it cannot locate the job. // we may still be able to retrieve the job status - so ignore return; } for (Operator<? extends Serializable> op : work.getAliasToWork().values()) { op.updateCounters(ctrs); } if (work.getReducer() != null) { work.getReducer().updateCounters(ctrs); } }