private void setupMRLegacyConfigs() { StringBuilder taskAttemptIdBuilder = new StringBuilder("attempt_"); taskAttemptIdBuilder.append(System.currentTimeMillis()) .append("_") .append(stageIdFormat.format(TaskContext.get().stageId())) .append("_"); if (isMap()) { taskAttemptIdBuilder.append("m_"); } else { taskAttemptIdBuilder.append("r_"); } // Hive requires this TaskAttemptId to be unique. MR's TaskAttemptId is composed // of "attempt_timestamp_jobNum_m/r_taskNum_attemptNum". The counterpart for // Spark should be "attempt_timestamp_stageNum_m/r_partitionId_attemptNum". // When there're multiple attempts for a task, Hive will rely on the partitionId // to figure out if the data are duplicate or not when collecting the final outputs // (see org.apache.hadoop.hive.ql.exec.Utils.removeTempOrDuplicateFiles) taskAttemptIdBuilder.append(taskIdFormat.format(TaskContext.get().partitionId())) .append("_").append(TaskContext.get().attemptNumber()); String taskAttemptIdStr = taskAttemptIdBuilder.toString(); jobConf.set("mapred.task.id", taskAttemptIdStr); jobConf.set("mapreduce.task.attempt.id", taskAttemptIdStr); jobConf.setInt("mapred.task.partition", TaskContext.get().partitionId()); } }
private void setupMRLegacyConfigs() { StringBuilder taskAttemptIdBuilder = new StringBuilder("attempt_"); taskAttemptIdBuilder.append(System.currentTimeMillis()) .append("_") .append(stageIdFormat.format(TaskContext.get().stageId())) .append("_"); if (isMap()) { taskAttemptIdBuilder.append("m_"); } else { taskAttemptIdBuilder.append("r_"); } // Hive requires this TaskAttemptId to be unique. MR's TaskAttemptId is composed // of "attempt_timestamp_jobNum_m/r_taskNum_attemptNum". The counterpart for // Spark should be "attempt_timestamp_stageNum_m/r_partitionId_attemptNum". // When there're multiple attempts for a task, Hive will rely on the partitionId // to figure out if the data are duplicate or not when collecting the final outputs // (see org.apache.hadoop.hive.ql.exec.Utils.removeTempOrDuplicateFiles) taskAttemptIdBuilder.append(taskIdFormat.format(TaskContext.get().partitionId())) .append("_").append(TaskContext.get().attemptNumber()); String taskAttemptIdStr = taskAttemptIdBuilder.toString(); jobConf.set("mapred.task.id", taskAttemptIdStr); jobConf.set("mapreduce.task.attempt.id", taskAttemptIdStr); jobConf.setInt("mapred.task.partition", TaskContext.get().partitionId()); } }
private void setupMRLegacyConfigs() { StringBuilder taskAttemptIdBuilder = new StringBuilder("attempt_"); taskAttemptIdBuilder.append(System.currentTimeMillis()) .append("_") .append(stageIdFormat.format(TaskContext.get().stageId())) .append("_"); if (isMap()) { taskAttemptIdBuilder.append("m_"); } else { taskAttemptIdBuilder.append("r_"); } // Spark task attempt id is increased by Spark context instead of task, which may introduce // unstable qtest output, since non Hive features depends on this, we always set it to 0 here. taskAttemptIdBuilder.append(taskIdFormat.format(TaskContext.get().partitionId())) .append("_0"); String taskAttemptIdStr = taskAttemptIdBuilder.toString(); jobConf.set("mapred.task.id", taskAttemptIdStr); jobConf.set("mapreduce.task.attempt.id", taskAttemptIdStr); jobConf.setInt("mapred.task.partition", TaskContext.get().partitionId()); } }