public ParquetRecordWriterWrapper( final OutputFormat<Void, ParquetHiveRecord> realOutputFormat, final JobConf jobConf, final String name, final Progressable progress, Properties tableProperties) throws IOException { try { // create a TaskInputOutputContext TaskAttemptID taskAttemptID = TaskAttemptID.forName(jobConf.get("mapred.task.id")); if (taskAttemptID == null) { taskAttemptID = new TaskAttemptID(); } taskContext = ContextUtil.newTaskAttemptContext(jobConf, taskAttemptID); LOG.info("initialize serde with table properties."); initializeSerProperties(taskContext, tableProperties); LOG.info("creating real writer to write at " + name); realWriter = ((ParquetOutputFormat) realOutputFormat).getRecordWriter(taskContext, new Path(name)); LOG.info("real writer: " + realWriter); } catch (final InterruptedException e) { throw new IOException(e); } }
static synchronized String getOutputName(TaskAttemptContext context) { return context.getConfiguration().get("mapreduce.output.basename", "part") + "-" + NUMBER_FORMAT.format(context.getTaskAttemptID().getTaskID().getId()); }
try (Closer closer = Closer.create()) { this.isSpeculativeEnabled = isSpeculativeExecutionEnabled(HadoopUtils.getStateFromConf(context.getConfiguration()).getProperties()); this.fs = FileSystem.get(context.getConfiguration()); this.taskStateStore = new FsStateStore<>(this.fs, FileOutputFormat.getOutputPath(context).toUri().getPath(), TaskState.class); String jobStateFileName = context.getConfiguration().get(ConfigurationKeys.JOB_STATE_DISTRIBUTED_CACHE_NAME); boolean foundStateFile = false; for (Path dcPath : DistributedCache.getLocalCacheFiles(context.getConfiguration())) { if (dcPath.getName().equals(jobStateFileName)) { SerializationUtils.deserializeStateFromInputStream( closer.register(new FileInputStream(dcPath.toUri().getPath())), this.jobState); foundStateFile = true; break; configuration.set(entry.getKey(), entry.getValue().unwrapped().toString()); configuration.get(ConfigurationKeys.METRICS_ENABLED_KEY, ConfigurationKeys.DEFAULT_METRICS_ENABLED))) { this.jobMetrics = Optional.of(JobMetrics.get(this.jobState)); this.jobMetrics.get() .startMetricReportingWithFileSuffix(HadoopUtils.getStateFromConf(configuration), context.getTaskAttemptID().toString());
@Override public org.apache.hadoop.mapreduce.TaskAttemptContext newTaskAttemptContext(Configuration conf, final Progressable progressable) { TaskAttemptID taskAttemptId = TaskAttemptID.forName(conf.get(MRJobConfig.TASK_ATTEMPT_ID)); if (taskAttemptId == null) { // If the caller is not within a mapper/reducer (if reading from the table via CliDriver), // then TaskAttemptID.forname() may return NULL. Fall back to using default constructor. taskAttemptId = new TaskAttemptID(); } return new TaskAttemptContextImpl(conf, taskAttemptId) { @Override public void progress() { progressable.progress(); } }; }
/** * @param ctx Context for IO operations. */ public HadoopV2Context(HadoopV2TaskContext ctx) { super(ctx.jobConf(), ctx.jobContext().getJobID()); taskAttemptID = ctx.attemptId(); conf.set(MRJobConfig.ID, taskAttemptID.getJobID().toString()); conf.set(MRJobConfig.TASK_ID, taskAttemptID.getTaskID().toString()); conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptID.toString()); output = ctx.output(); input = ctx.input(); this.ctx = ctx; }
FileOutputFormat.setOutputPath(job, outDir); Configuration conf = job.getConfiguration(); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext); File expectedFile = new File(new Path(committer.getWorkPath(), partFile) .toString()); assertFalse("task temp dir still exists", expectedFile.exists());
public ArrayList<String> readRecords(URL testFileUrl, int splitSize) throws IOException { // Set up context File testFile = new File(testFileUrl.getFile()); long testFileSize = testFile.length(); Path testFilePath = new Path(testFile.getAbsolutePath()); Configuration conf = new Configuration(); conf.setInt("io.file.buffer.size", 1); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); // Gather the records returned by the record reader ArrayList<String> records = new ArrayList<String>(); long offset = 0; while (offset < testFileSize) { FileSplit split = new FileSplit(testFilePath, offset, splitSize, null); LineRecordReader reader = new LineRecordReader(); reader.initialize(split, context); while (reader.nextKeyValue()) { records.add(reader.getCurrentValue().toString()); } offset += splitSize; } return records; }
private void writeThenReadByRecordReader(int intervalRecordCount, int writeCount, int splitNumber, long maxSplitSize, CompressionCodec codec) throws IOException, InterruptedException { Path testDir = new Path(System.getProperty("test.tmp.dir", ".") + "/mapred/testsmallfirstsplit"); Path testFile = new Path(testDir, "test_rcfile"); fs.delete(testFile, true); Configuration cloneConf = new Configuration(conf); RCFileOutputFormat.setColumnNumber(cloneConf, bytesArray.length); cloneConf.setInt(HiveConf.ConfVars.HIVE_RCFILE_RECORD_INTERVAL.varname, intervalRecordCount); Configuration jonconf = new Configuration(cloneConf); jonconf.set("mapred.input.dir", testDir.toString()); JobContext context = new Job(jonconf); HiveConf.setLongVar(context.getConfiguration(), for (int i = 0; i < splits.size(); i++) { TaskAttemptContext tac = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(jonconf, new TaskAttemptID()); RecordReader<LongWritable, BytesRefArrayWritable> rr = inputFormat.createRecordReader(splits.get(i), tac); rr.initialize(splits.get(i), tac);
@Override public void setup(Context context) throws IOException { Configuration conf = context.getConfiguration(); verifyChecksum = conf.getBoolean(CONF_CHECKSUM_VERIFY, true); filesGroup = conf.get(CONF_FILES_GROUP); filesUser = conf.get(CONF_FILES_USER); filesMode = (short)conf.getInt(CONF_FILES_MODE, 0); outputRoot = new Path(conf.get(CONF_OUTPUT_ROOT)); inputRoot = new Path(conf.get(CONF_INPUT_ROOT)); inputArchive = new Path(inputRoot, HConstants.HFILE_ARCHIVE_DIRECTORY); outputArchive = new Path(outputRoot, HConstants.HFILE_ARCHIVE_DIRECTORY); context.getCounter(c).increment(0); if (context.getConfiguration().getBoolean(Testing.CONF_TEST_FAILURE, false)) { testing.failuresCountToInject = conf.getInt(Testing.CONF_TEST_FAILURE_COUNT, 0); testing.injectedFailureCount = context.getTaskAttemptID().getId();
FileOutputFormat.setOutputPath(job, outDir); Configuration conf = job.getConfiguration(); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt( FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version); conf.setBoolean( FileOutputCommitter.FILEOUTPUTCOMMITTER_TASK_CLEANUP_ENABLED, taskCleanup); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext); new Path(outDir, FileOutputCommitter.PENDING_DIR_NAME).toString()); File taskOutputDir = new File(Path.getPathWithoutSchemeAndAuthority( committer.getWorkPath()).toString());
throws FileNotFoundException, IllegalArgumentException, IOException { Configuration conf = new Configuration(false); conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 1); TaskAttemptID tid0 = new TaskAttemptID("1363718006656", 1, TaskType.REDUCE, 14, 3); Path p = spy(new Path("/user/hadoop/out")); Path a = new Path("hdfs://user/hadoop/out"); Path p0 = new Path(a, "_temporary/1/attempt_1363718006656_0001_r_000014_0"); Path p1 = new Path(a, "_temporary/1/attempt_1363718006656_0001_r_000014_1"); Path p2 = new Path(a, "_temporary/1/attempt_1363718006656_0001_r_000013_0"); when(context.getTaskAttemptID()).thenReturn(tid0); when(context.getConfiguration()).thenReturn(conf);
this.configuration.set("mapreduce.output.basename", "tmp"); TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0"); this.configuration.set("mapred.task.id", taskAttemptID.toString()); this.configuration.setInt("mapred.task.partition", taskNumber + 1); this.configuration.set("mapreduce.task.attempt.id", taskAttemptID.toString()); this.configuration.setInt("mapreduce.task.partition", taskNumber + 1); this.context = new TaskAttemptContextImpl(this.configuration, taskAttemptID); this.outputCommitter = this.mapreduceOutputFormat.getOutputCommitter(this.context); this.outputCommitter.setupJob(new JobContextImpl(this.configuration, new JobID())); this.context.getCredentials().addAll(this.credentials); Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser()); if (currentUserCreds != null) { this.context.getCredentials().addAll(currentUserCreds); this.configuration.set("mapreduce.task.output.dir", ((FileOutputCommitter) this.outputCommitter).getWorkPath().toString());
@Test public void testGetOutputCommitter() { try { TaskAttemptContext context = new TaskAttemptContextImpl(new Configuration(), new TaskAttemptID("200707121733", 1, TaskType.MAP, 1, 1)); context.getConfiguration().set("mapred.output.dir", "/out"); Assert.assertTrue(new CopyOutputFormat().getOutputCommitter(context) instanceof CopyCommitter); } catch (IOException e) { LOG.error("Exception encountered ", e); Assert.fail("Unable to get output committer"); } }
@Override protected void doSetup(Context context) throws IOException { super.bindCurrentConfiguration(context.getConfiguration()); Configuration conf = context.getConfiguration(); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(); String cubeName = conf.get(BatchConstants.CFG_CUBE_NAME); CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName); cubeConfig = cube.getConfig(); baseCuboidId = cube.getCuboidScheduler().getBaseCuboidId(); baseCuboidRowCountInMappers = Lists.newLinkedList(); output = conf.get(BatchConstants.CFG_OUTPUT_PATH); samplingPercentage = Integer .parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT)); taskId = context.getTaskAttemptID().getTaskID().getId(); cuboidHLLMap = Maps.newHashMap(); }
@Override protected void setup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); this.deduplicate = conf.getBoolean("dedup", true); if (deduplicate) this.threshold = conf.getFloat("threshold", 1E-5f); this.columnBoundaries = SpatialSite.getReduceSpace(context.getConfiguration()); Path outputPath = DelaunayTriangulationOutputFormat.getOutputPath(context); Path finalPath = new Path(outputPath, String.format("m-%05d.final", context.getTaskAttemptID().getTaskID().getId())); FileSystem fs = finalPath.getFileSystem(context.getConfiguration()); writer = new TriangulationRecordWriter(fs, null, finalPath, context); }
private String generateKey(TaskAttemptContext context) throws IOException { String jobInfoString = context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO); if (StringUtils.isBlank(jobInfoString)) { // Avoid the NPE. throw new IOException("Could not retrieve OutputJobInfo for TaskAttempt " + context.getTaskAttemptID()); } OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(jobInfoString); return context.getTaskAttemptID().toString() + "@" + jobInfo.getLocation(); }
public static Path makeTmpPath( final Path basePath, final FileSystem fs, final DataSegment segmentTemplate, final TaskAttemptID taskAttemptID, DataSegmentPusher dataSegmentPusher ) { return new Path( prependFSIfNullScheme(fs, basePath), StringUtils.format( "./%s.%d", dataSegmentPusher.makeIndexPathName(segmentTemplate, JobHelper.INDEX_ZIP), taskAttemptID.getId() ) ); }
private void testSplitRecordsForFile(Configuration conf, long firstSplitLength, long testFileSize, Path testFilePath) throws IOException { conf.setInt(org.apache.hadoop.mapreduce.lib.input. LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE); assertTrue("unexpected test data at " + testFilePath, testFileSize > firstSplitLength); String delimiter = conf.get("textinputformat.record.delimiter"); byte[] recordDelimiterBytes = null; if (null != delimiter) { recordDelimiterBytes = delimiter.getBytes(StandardCharsets.UTF_8); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
@Test public void testRecordReader() throws Exception { List<String> paths = Lists.newArrayList("/path1", "/path2"); GobblinWorkUnitsInputFormat.GobblinSplit split = new GobblinWorkUnitsInputFormat.GobblinSplit(paths); GobblinWorkUnitsInputFormat inputFormat = new GobblinWorkUnitsInputFormat(); RecordReader<LongWritable, Text> recordReader = inputFormat.createRecordReader(split, new TaskAttemptContextImpl(new Configuration(), new TaskAttemptID("a", 1, TaskType.MAP, 1, 1))); recordReader.nextKeyValue(); Assert.assertEquals(recordReader.getCurrentKey().get(), 0); Assert.assertEquals(recordReader.getCurrentValue().toString(), "/path1"); recordReader.nextKeyValue(); Assert.assertEquals(recordReader.getCurrentKey().get(), 1); Assert.assertEquals(recordReader.getCurrentValue().toString(), "/path2"); Assert.assertFalse(recordReader.nextKeyValue()); }