Refine search
@Test public void testRecordReader() throws Exception { List<String> paths = Lists.newArrayList("/path1", "/path2"); GobblinWorkUnitsInputFormat.GobblinSplit split = new GobblinWorkUnitsInputFormat.GobblinSplit(paths); GobblinWorkUnitsInputFormat inputFormat = new GobblinWorkUnitsInputFormat(); RecordReader<LongWritable, Text> recordReader = inputFormat.createRecordReader(split, new TaskAttemptContextImpl(new Configuration(), new TaskAttemptID("a", 1, TaskType.MAP, 1, 1))); recordReader.nextKeyValue(); Assert.assertEquals(recordReader.getCurrentKey().get(), 0); Assert.assertEquals(recordReader.getCurrentValue().toString(), "/path1"); recordReader.nextKeyValue(); Assert.assertEquals(recordReader.getCurrentKey().get(), 1); Assert.assertEquals(recordReader.getCurrentValue().toString(), "/path2"); Assert.assertFalse(recordReader.nextKeyValue()); }
@Override public void init() throws IOException { super.init(); Configuration taskConf = new Configuration(); Path stagingResultDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME); taskConf.set(FileOutputFormat.OUTDIR, stagingResultDir.toString()); ExecutionBlockId ebId = taskAttemptId.getTaskId().getExecutionBlockId(); writerContext = new TaskAttemptContextImpl(taskConf, new TaskAttemptID(ebId.getQueryId().toString(), ebId.getId(), TaskType.MAP, taskAttemptId.getTaskId().getId(), taskAttemptId.getId())); HFileOutputFormat2 hFileOutputFormat2 = new HFileOutputFormat2(); try { writer = hFileOutputFormat2.getRecordWriter(writerContext); committer = new FileOutputCommitter(FileOutputFormat.getOutputPath(writerContext), writerContext); workingFilePath = committer.getWorkPath(); } catch (InterruptedException e) { throw new IOException(e.getMessage(), e); } LOG.info("Created hbase file writer: " + workingFilePath); }
FileOutputFormat.setOutputPath(job, outDir); Configuration conf = job.getConfiguration(); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
@Before public void setup() throws Exception { Configuration conf = new Configuration(); input = ClassLoader.getSystemClassLoader().getResource("test.cram").getFile(); reference = ClassLoader.getSystemClassLoader().getResource("auxf.fa").toURI().toString(); conf.set("mapred.input.dir", "file://" + input); conf.set(CRAMInputFormat.REFERENCE_SOURCE_PATH_PROPERTY, reference); taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class)); jobContext = new JobContextImpl(conf, taskAttemptContext.getJobID()); }
public ArrayList<String> readRecords(URL testFileUrl, int splitSize) throws IOException { // Set up context File testFile = new File(testFileUrl.getFile()); long testFileSize = testFile.length(); Path testFilePath = new Path(testFile.getAbsolutePath()); Configuration conf = new Configuration(); conf.setInt("io.file.buffer.size", 1); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); // Gather the records returned by the record reader ArrayList<String> records = new ArrayList<String>(); long offset = 0; while (offset < testFileSize) { FileSplit split = new FileSplit(testFilePath, offset, splitSize, null); LineRecordReader reader = new LineRecordReader(); reader.initialize(split, context); while (reader.nextKeyValue()) { records.add(reader.getCurrentValue().toString()); } offset += splitSize; } return records; }
private org.apache.hadoop.mapreduce.OutputCommitter createOutputCommitter(boolean newApiCommitter, JobID jobId, Configuration conf) throws Exception { org.apache.hadoop.mapreduce.OutputCommitter committer = null; LOG.info("OutputCommitter set in config " + conf.get("mapred.output.committer.class")); if (newApiCommitter) { org.apache.hadoop.mapreduce.TaskID taskId = new org.apache.hadoop.mapreduce.TaskID(jobId, TaskType.MAP, 0); org.apache.hadoop.mapreduce.TaskAttemptID taskAttemptID = new org.apache.hadoop.mapreduce.TaskAttemptID(taskId, 0); org.apache.hadoop.mapreduce.TaskAttemptContext taskContext = new TaskAttemptContextImpl(conf, taskAttemptID); OutputFormat outputFormat = ReflectionUtils.newInstance(taskContext.getOutputFormatClass(), conf); committer = outputFormat.getOutputCommitter(taskContext); } else { committer = ReflectionUtils.newInstance(conf.getClass( "mapred.output.committer.class", FileOutputCommitter.class, org.apache.hadoop.mapred.OutputCommitter.class), conf); } LOG.info("OutputCommitter is " + committer.getClass().getName()); return committer; }
@Before public void setup() throws Exception { conf = new Configuration(); testBAMFileName = ClassLoader.getSystemClassLoader() .getResource("test.bam").getFile(); conf.set("mapred.input.dir", "file://" + testBAMFileName); // fetch the SAMFile header from the original input to get the expected count expectedRecordCount = getBAMRecordCount(new File(testBAMFileName)); samFileHeader = SAMHeaderReader.readSAMHeaderFrom(new Path(testBAMFileName), conf); taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class)); }
@Before public void setup() throws IOException, NoSuchMethodException, IllegalAccessException, InvocationTargetException, InstantiationException, InterruptedException, NoSuchFieldException { Configuration conf = new Configuration(); String input_file = ClassLoader.getSystemClassLoader().getResource(filename).getFile(); conf.set("hadoopbam.vcf.trust-exts", "true"); conf.set("mapred.input.dir", "file://" + input_file); conf.setStrings("io.compression.codecs", BGZFEnhancedGzipCodec.class.getCanonicalName(), BGZFCodec.class.getCanonicalName()); taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class)); JobContext ctx = new JobContextImpl(conf, taskAttemptContext.getJobID());
public ComposableRecordReader<?, ?> createRecordReader(InputSplit split, TaskAttemptContext taskContext) throws IOException, InterruptedException { try { if (!rrCstrMap.containsKey(ident)) { throw new IOException("No RecordReader for " + ident); } Configuration conf = getConf(taskContext.getConfiguration()); TaskAttemptContext context = new TaskAttemptContextImpl(conf, TaskAttemptID.forName(conf.get(MRJobConfig.TASK_ATTEMPT_ID)), new WrappedStatusReporter(taskContext)); return rrCstrMap.get(ident).newInstance(id, inf.createRecordReader(split, context), cmpcl); } catch (IllegalAccessException e) { throw new IOException(e); } catch (InstantiationException e) { throw new IOException(e); } catch (InvocationTargetException e) { throw new IOException(e); } }
this.configuration.set("mapreduce.output.basename", "tmp"); TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0"); this.configuration.set("mapred.task.id", taskAttemptID.toString()); this.configuration.setInt("mapred.task.partition", taskNumber + 1); this.configuration.set("mapreduce.task.attempt.id", taskAttemptID.toString()); this.configuration.setInt("mapreduce.task.partition", taskNumber + 1); this.context = new TaskAttemptContextImpl(this.configuration, taskAttemptID); this.outputCommitter = this.mapreduceOutputFormat.getOutputCommitter(this.context); this.outputCommitter.setupJob(new JobContextImpl(this.configuration, new JobID())); this.context.getCredentials().addAll(this.credentials); Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser()); if (currentUserCreds != null) { this.context.getCredentials().addAll(currentUserCreds);
private TaskAttemptContext getTaskAttemptContext(TaskAttemptContext context, String newOutputName) throws IOException { Job job = new Job(context.getConfiguration()); DynamicPartitioningOutputFormat.setOutputName(job, newOutputName); // CDAP-4806 We must set this parameter in addition to calling FileOutputFormat#setOutputName, because // AvroKeyOutputFormat/AvroKeyValueOutputFormat use a different parameter for the output name than FileOutputFormat. if (isAvroOutputFormat(fileOutputFormatName)) { job.getConfiguration().set("avro.mo.config.namedOutput", newOutputName); } Path jobOutputPath = DynamicPartitioningOutputFormat.createJobSpecificPath(FileOutputFormat.getOutputPath(job), context); DynamicPartitioningOutputFormat.setOutputPath(job, jobOutputPath); return new TaskAttemptContextImpl(job.getConfiguration(), context.getTaskAttemptID()); }
public static TaskAttemptContext createDummyMapTaskAttemptContext( Configuration conf) { TaskAttemptID tid = new TaskAttemptID("jt", 1, TaskType.MAP, 0, 0); conf.set(MRJobConfig.TASK_ATTEMPT_ID, tid.toString()); return new TaskAttemptContextImpl(conf, tid); }
public HadoopElementIterator(final HadoopGraph graph) { try { this.graph = graph; final Configuration configuration = ConfUtil.makeHadoopConfiguration(this.graph.configuration()); final InputFormat<NullWritable, VertexWritable> inputFormat = ConfUtil.getReaderAsInputFormat(configuration); if (inputFormat instanceof FileInputFormat) { final Storage storage = FileSystemStorage.open(configuration); if (!this.graph.configuration().containsKey(Constants.GREMLIN_HADOOP_INPUT_LOCATION)) return; // there is no input location and thus, no data (empty graph) if (!Constants.getSearchGraphLocation(this.graph.configuration().getInputLocation(), storage).isPresent()) return; // there is no data at the input location (empty graph) configuration.set(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR, Constants.getSearchGraphLocation(this.graph.configuration().getInputLocation(), storage).get()); } final List<InputSplit> splits = inputFormat.getSplits(new JobContextImpl(configuration, new JobID(UUID.randomUUID().toString(), 1))); for (final InputSplit split : splits) { this.readers.add(inputFormat.createRecordReader(split, new TaskAttemptContextImpl(configuration, new TaskAttemptID()))); } } catch (final Exception e) { throw new IllegalStateException(e.getMessage(), e); } }
@Test public void testInvalidVersionNumber() throws IOException { Job job = Job.getInstance(); FileOutputFormat.setOutputPath(job, outDir); Configuration conf = job.getConfiguration(); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 3); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); try { new FileOutputCommitter(outDir, tContext); fail("should've thrown an exception!"); } catch (IOException e) { //test passed } }
private void testSplitRecordsForFile(Configuration conf, long firstSplitLength, long testFileSize, Path testFilePath) throws IOException { conf.setInt(org.apache.hadoop.mapreduce.lib.input. LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE); assertTrue("unexpected test data at " + testFilePath, testFileSize > firstSplitLength); String delimiter = conf.get("textinputformat.record.delimiter"); byte[] recordDelimiterBytes = null; if (null != delimiter) { recordDelimiterBytes = delimiter.getBytes(StandardCharsets.UTF_8); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
public void testEmptyOutput() throws Exception { Job job = Job.getInstance(); FileOutputFormat.setOutputPath(job, outDir); Configuration conf = job.getConfiguration(); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext); // setup committer.setupJob(jContext); committer.setupTask(tContext); // Do not write any output // do commit committer.commitTask(tContext); committer.commitJob(jContext); FileUtil.fullyDelete(new File(outDir.toString())); }
private CSVRecordReader getCSVRecordReader(String file) throws IOException, URISyntaxException { InputSplit split = ColumnBasedHandlerTestUtil.getSplit(file); TaskAttemptContext ctx = new TaskAttemptContextImpl(conf, new TaskAttemptID()); TypeRegistry.reset(); TypeRegistry.getInstance(ctx.getConfiguration()); log.debug(TypeRegistry.getContents()); CSVRecordReader reader = new CSVRecordReader(); reader.initialize(split, ctx); return reader; }
private TaskAttemptContext getContext(String nameOutput) throws IOException { TaskAttemptContext taskContext = taskContexts.get(nameOutput); if (taskContext != null) { return taskContext; } // the job thus supporting arbitrary output formats. taskContexts.put(nameOutput, taskContext); NameOutput out = namedOutputs.get(nameOutput); Job job = new Job(context.getConfiguration()); job.setOutputFormatClass(out.outputFormatClass); job.setOutputKeyClass(out.keyClass); job.setOutputValueClass(out.valueClass); taskContext = new TaskAttemptContextImpl(job.getConfiguration(), context.getTaskAttemptID(), new WrappedStatusReporter(context)); taskContexts.put(nameOutput, taskContext); return taskContext; }
@Override public void open(HadoopInputSplit split) throws IOException { TaskAttemptContext context = new TaskAttemptContextImpl(configuration, new TaskAttemptID()); try { this.recordReader = this.hCatInputFormat .createRecordReader(split.getHadoopInputSplit(), context); this.recordReader.initialize(split.getHadoopInputSplit(), context); } catch (InterruptedException e) { throw new IOException("Could not create RecordReader.", e); } finally { this.fetched = false; } }
public ComposableRecordReader<?, ?> createRecordReader(InputSplit split, TaskAttemptContext taskContext) throws IOException, InterruptedException { try { if (!rrCstrMap.containsKey(ident)) { throw new IOException("No RecordReader for " + ident); } Configuration conf = getConf(taskContext.getConfiguration()); TaskAttemptContext context = new TaskAttemptContextImpl(conf, TaskAttemptID.forName(conf.get(MRJobConfig.TASK_ATTEMPT_ID)), new WrappedStatusReporter(taskContext)); return rrCstrMap.get(ident).newInstance(id, inf.createRecordReader(split, context), cmpcl); } catch (IllegalAccessException e) { throw new IOException(e); } catch (InstantiationException e) { throw new IOException(e); } catch (InvocationTargetException e) { throw new IOException(e); } }