@Override public TaskAttemptID createTaskAttemptID() { return new TaskAttemptID("", 0, TaskType.MAP, 0, 0); }
@Override public org.apache.hadoop.mapreduce.TaskAttemptContext newTaskAttemptContext(Configuration conf, final Progressable progressable) { TaskAttemptID taskAttemptId = TaskAttemptID.forName(conf.get(MRJobConfig.TASK_ATTEMPT_ID)); if (taskAttemptId == null) { // If the caller is not within a mapper/reducer (if reading from the table via CliDriver), // then TaskAttemptID.forname() may return NULL. Fall back to using default constructor. taskAttemptId = new TaskAttemptID(); } return new TaskAttemptContextImpl(conf, taskAttemptId) { @Override public void progress() { progressable.progress(); } }; }
@Override public void open(HadoopInputSplit split) throws IOException { TaskAttemptContext context = new TaskAttemptContextImpl(configuration, new TaskAttemptID()); try { this.recordReader = this.hCatInputFormat .createRecordReader(split.getHadoopInputSplit(), context); this.recordReader.initialize(split.getHadoopInputSplit(), context); } catch (InterruptedException e) { throw new IOException("Could not create RecordReader.", e); } finally { this.fetched = false; } }
@Override public void open(HadoopInputSplit split) throws IOException { // enforce sequential open() calls synchronized (OPEN_MUTEX) { TaskAttemptContext context = new TaskAttemptContextImpl(configuration, new TaskAttemptID()); try { this.recordReader = this.mapreduceInputFormat .createRecordReader(split.getHadoopInputSplit(), context); this.recordReader.initialize(split.getHadoopInputSplit(), context); } catch (InterruptedException e) { throw new IOException("Could not create RecordReader.", e); } finally { this.fetched = false; } } }
@Override public TaskAttemptID newTaskAttemptID(JobID jobId, boolean isMap, int taskId, int id) { return new TaskAttemptID(jobId.getJtIdentifier(), jobId.getId(), isMap ? TaskType.MAP : TaskType.REDUCE, taskId, id); }
public ParquetRecordWriterWrapper( final OutputFormat<Void, ParquetHiveRecord> realOutputFormat, final JobConf jobConf, final String name, final Progressable progress, Properties tableProperties) throws IOException { try { // create a TaskInputOutputContext TaskAttemptID taskAttemptID = TaskAttemptID.forName(jobConf.get("mapred.task.id")); if (taskAttemptID == null) { taskAttemptID = new TaskAttemptID(); } taskContext = ContextUtil.newTaskAttemptContext(jobConf, taskAttemptID); LOG.info("initialize serde with table properties."); initializeSerProperties(taskContext, tableProperties); LOG.info("creating real writer to write at " + name); realWriter = ((ParquetOutputFormat) realOutputFormat).getRecordWriter(taskContext, new Path(name)); LOG.info("real writer: " + realWriter); } catch (final InterruptedException e) { throw new IOException(e); } }
public ParquetRecordWriterWrapper( final OutputFormat<Void, ParquetHiveRecord> realOutputFormat, final JobConf jobConf, final String name, final Progressable progress, Properties tableProperties) throws IOException { try { // create a TaskInputOutputContext TaskAttemptID taskAttemptID = TaskAttemptID.forName(jobConf.get("mapred.task.id")); if (taskAttemptID == null) { taskAttemptID = new TaskAttemptID(); } taskContext = ContextUtil.newTaskAttemptContext(jobConf, taskAttemptID); LOG.info("initialize serde with table properties."); initializeSerProperties(taskContext, tableProperties); LOG.info("creating real writer to write at " + name); realWriter = ((ParquetOutputFormat) realOutputFormat).getRecordWriter(taskContext, new Path(name)); LOG.info("real writer: " + realWriter); } catch (final InterruptedException e) { throw new IOException(e); } }
private void mockTaskAttemptContext(String indexType) { TaskAttemptID fakeTaskId = new TaskAttemptID(new TaskID("foo_task_" + indexType, 123, TaskType.REDUCE, 2), 2); when(fakeTaskAttemptContext.getTaskAttemptID()).thenReturn(fakeTaskId); when(fakeTaskAttemptContext.getConfiguration()).thenReturn(job.getConfiguration()); }
@Test public void testRecordReader() throws Exception { List<String> paths = Lists.newArrayList("/path1", "/path2"); GobblinWorkUnitsInputFormat.GobblinSplit split = new GobblinWorkUnitsInputFormat.GobblinSplit(paths); GobblinWorkUnitsInputFormat inputFormat = new GobblinWorkUnitsInputFormat(); RecordReader<LongWritable, Text> recordReader = inputFormat.createRecordReader(split, new TaskAttemptContextImpl(new Configuration(), new TaskAttemptID("a", 1, TaskType.MAP, 1, 1))); recordReader.nextKeyValue(); Assert.assertEquals(recordReader.getCurrentKey().get(), 0); Assert.assertEquals(recordReader.getCurrentValue().toString(), "/path1"); recordReader.nextKeyValue(); Assert.assertEquals(recordReader.getCurrentKey().get(), 1); Assert.assertEquals(recordReader.getCurrentValue().toString(), "/path2"); Assert.assertFalse(recordReader.nextKeyValue()); }
taskAttemptID = new TaskAttemptID();
@Override public Iterator<HCatRecord> read() throws HCatException { HCatInputFormat inpFmt = new HCatInputFormat(); RecordReader<WritableComparable, HCatRecord> rr; try { TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(conf, new TaskAttemptID()); rr = inpFmt.createRecordReader(split, cntxt); rr.initialize(split, cntxt); } catch (IOException e) { throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); } catch (InterruptedException e) { throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); } return new HCatRecordItr(rr); }
@Test public void testRecordReader() { List<KafkaWritable> serRecords = expectedRecords.stream() .map((consumerRecord) -> new KafkaWritable(consumerRecord.partition(), consumerRecord.offset(), consumerRecord.timestamp(), consumerRecord.value(), consumerRecord.key())) .collect(Collectors.toList()); KafkaRecordReader recordReader = new KafkaRecordReader(); TaskAttemptContext context = new TaskAttemptContextImpl(this.conf, new TaskAttemptID()); recordReader.initialize(new KafkaInputSplit(currentTopic, 0, 50L, 100L, null), context); for (int i = 50; i < 100; ++i) { KafkaWritable record = new KafkaWritable(); Assert.assertTrue(recordReader.next(null, record)); Assert.assertEquals(serRecords.get(i), record); } recordReader.close(); }
taskAttemptID = new TaskAttemptID();
for (int i = 0; i < splits.size(); i++) { TaskAttemptContext tac = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(jonconf, new TaskAttemptID()); RecordReader<LongWritable, BytesRefArrayWritable> rr = inputFormat.createRecordReader(splits.get(i), tac); rr.initialize(splits.get(i), tac);
private static SortedSet<byte[]> readFileToSearch(final Configuration conf, final FileSystem fs, final LocatedFileStatus keyFileStatus) throws IOException, InterruptedException { SortedSet<byte []> result = new TreeSet<>(Bytes.BYTES_COMPARATOR); // Return entries that are flagged Counts.UNDEFINED in the value. Return the row. This is // what is missing. TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); try (SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader rr = new SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader()) { InputSplit is = new FileSplit(keyFileStatus.getPath(), 0, keyFileStatus.getLen(), new String [] {}); rr.initialize(is, context); while (rr.nextKeyValue()) { rr.getCurrentKey(); BytesWritable bw = rr.getCurrentValue(); if (Verify.VerifyReducer.whichType(bw.getBytes()) == Verify.Counts.UNDEFINED) { byte[] key = new byte[rr.getCurrentKey().getLength()]; System.arraycopy(rr.getCurrentKey().getBytes(), 0, key, 0, rr.getCurrentKey() .getLength()); result.add(key); } } } return result; } }
HCatOutputFormat outFormat = new HCatOutputFormat(); TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( conf, new TaskAttemptID(ShimLoader.getHadoopShims().getHCatShim().createTaskID(), id)); OutputCommitter committer = null; RecordWriter<WritableComparable<?>, HCatRecord> writer;
public HadoopElementIterator(final HadoopGraph graph) { try { this.graph = graph; final Configuration configuration = ConfUtil.makeHadoopConfiguration(this.graph.configuration()); final InputFormat<NullWritable, VertexWritable> inputFormat = ConfUtil.getReaderAsInputFormat(configuration); if (inputFormat instanceof FileInputFormat) { final Storage storage = FileSystemStorage.open(configuration); if (!this.graph.configuration().containsKey(Constants.GREMLIN_HADOOP_INPUT_LOCATION)) return; // there is no input location and thus, no data (empty graph) if (!Constants.getSearchGraphLocation(this.graph.configuration().getInputLocation(), storage).isPresent()) return; // there is no data at the input location (empty graph) configuration.set(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR, Constants.getSearchGraphLocation(this.graph.configuration().getInputLocation(), storage).get()); } final List<InputSplit> splits = inputFormat.getSplits(new JobContextImpl(configuration, new JobID(UUID.randomUUID().toString(), 1))); for (final InputSplit split : splits) { this.readers.add(inputFormat.createRecordReader(split, new TaskAttemptContextImpl(configuration, new TaskAttemptID()))); } } catch (final Exception e) { throw new IllegalStateException(e.getMessage(), e); } }
public ArrayList<String> readRecords(URL testFileUrl, int splitSize) throws IOException { // Set up context File testFile = new File(testFileUrl.getFile()); long testFileSize = testFile.length(); Path testFilePath = new Path(testFile.getAbsolutePath()); Configuration conf = new Configuration(); conf.setInt("io.file.buffer.size", 1); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); // Gather the records returned by the record reader ArrayList<String> records = new ArrayList<String>(); long offset = 0; while (offset < testFileSize) { FileSplit split = new FileSplit(testFilePath, offset, splitSize, null); LineRecordReader reader = new LineRecordReader(); reader.initialize(split, context); while (reader.nextKeyValue()) { records.add(reader.getCurrentValue().toString()); } offset += splitSize; } return records; }
@Before public void before() throws Exception { Configuration conf = new Configuration(); conf.set(CamusJob.KAFKA_CLIENT_NAME, "DummyClientName"); TaskAttemptContext context = null; try { Class<?> taskAttemptContextImplClass = Class.forName("org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl"); context = (TaskAttemptContext) taskAttemptContextImplClass.getDeclaredConstructor(Configuration.class, TaskAttemptID.class).newInstance(conf, new TaskAttemptID()); } catch (ClassNotFoundException e) { context = (TaskAttemptContext) Class.forName("org.apache.hadoop.mapreduce.TaskAttemptContext") .getDeclaredConstructor(Configuration.class, TaskAttemptID.class).newInstance(conf, new TaskAttemptID()); } EtlRequest request = new EtlRequest(); request.setOffset(0); request.setLatestOffset(1); request.setURI(new URI("http://localhost:8888")); this.kafkaReader = new KafkaReader(new EtlInputFormat(), context, request, 100, 100); }
@Test public void testCloneMapContext() throws Exception { TaskID taskId = new TaskID(jobId, TaskType.MAP, 0); TaskAttemptID taskAttemptid = new TaskAttemptID(taskId, 0); MapContext<IntWritable, IntWritable, IntWritable, IntWritable> mapContext = new MapContextImpl<IntWritable, IntWritable, IntWritable, IntWritable>( conf, taskAttemptid, null, null, null, null, null); Mapper<IntWritable, IntWritable, IntWritable, IntWritable>.Context mapperContext = new WrappedMapper<IntWritable, IntWritable, IntWritable, IntWritable>().getMapContext( mapContext); ContextFactory.cloneMapContext(mapperContext, conf, null, null); }