org.apache.hadoop.mapred.TextInputFormat.configure java code examples

@Override
public void configure(JobConf job) {
 this.job = job;
 format.configure(job);
}

 @Override
 protected FileInputFormat<LongWritable, Text> getFileInputFormat(State state, JobConf jobConf) {
  TextInputFormat textInputFormat = ReflectionUtils.newInstance(TextInputFormat.class, jobConf);
  textInputFormat.configure(jobConf);
  return textInputFormat;
 }
}

  /** {@inheritDoc} */
  @Override public void configure(JobConf job) {
    super.configure(job);
    flags.put("inputFormatWasConfigured", true);
  }
}

@BeforeClass
public void setUp()
{
  inputFormat = new TextInputFormat();
  inputFormat.configure(new JobConf());
}

JobConf newjob = new JobConf(job);
newjob.setInputFormat(TextInputFormat.class);
inputFormat.configure(newjob);

JobConf newjob = new JobConf(job);
newjob.setInputFormat(TextInputFormat.class);
inputFormat.configure(newjob);

@Override
public RecordReader<LongWritable, Text> getRecordReader(
  InputSplit split, JobConf job, Reporter reporter) throws IOException {
 InputSplit targetSplit = ((SymlinkTextInputSplit)split).getTargetSplit();
 // The target data is in TextInputFormat.
 TextInputFormat inputFormat = new TextInputFormat();
 inputFormat.configure(job);
 RecordReader innerReader = null;
 try {
  innerReader = inputFormat.getRecordReader(targetSplit, job,
    reporter);
 } catch (Exception e) {
  innerReader = HiveIOExceptionHandlerUtil
    .handleRecordReaderCreationException(e, job);
 }
 HiveRecordReader rr = new HiveRecordReader(innerReader, job);
 rr.initIOContext((FileSplit)targetSplit, job, TextInputFormat.class, innerReader);
 return rr;
}

JobConf targetJob = toJobConf(targetFilesystem.getConf());
targetJob.setInputFormat(TextInputFormat.class);
targetInputFormat.configure(targetJob);
FileInputFormat.setInputPaths(targetJob, targetPath);
InputSplit[] targetSplits = targetInputFormat.getSplits(targetJob, 0);

@Override
public RecordReader<LongWritable, Text> getRecordReader(
  InputSplit split, JobConf job, Reporter reporter) throws IOException {
 InputSplit targetSplit = ((SymlinkTextInputSplit)split).getTargetSplit();
 // The target data is in TextInputFormat.
 TextInputFormat inputFormat = new TextInputFormat();
 inputFormat.configure(job);
 RecordReader innerReader = null;
 try {
  innerReader = inputFormat.getRecordReader(targetSplit, job,
    reporter);
 } catch (Exception e) {
  innerReader = HiveIOExceptionHandlerUtil
    .handleRecordReaderCreationException(e, job);
 }
 HiveRecordReader rr = new HiveRecordReader(innerReader, job);
 rr.initIOContext((FileSplit)targetSplit, job, TextInputFormat.class, innerReader);
 return rr;
}

@Test
public void testIgnoreDirs() throws Exception {
 Configuration conf = getConfiguration();
 conf.setBoolean(FileInputFormat.INPUT_DIR_NONRECURSIVE_IGNORE_SUBDIRS, true);
 conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads);
 conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR, "test:///a1");
 MockFileSystem mockFs = (MockFileSystem) new Path("test:///").getFileSystem(conf);
 JobConf job = new JobConf(conf);
 TextInputFormat fileInputFormat = new TextInputFormat();
 fileInputFormat.configure(job);
 InputSplit[] splits = fileInputFormat.getSplits(job, 1);
 Assert.assertEquals("Input splits are not correct", 1, splits.length);
 FileSystem.closeAll();
}

@Test
public void testListStatusSimple() throws IOException {
 Configuration conf = new Configuration();
 conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads);
 List<Path> expectedPaths = org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat
   .configureTestSimple(conf, localFs);
 JobConf jobConf = new JobConf(conf);
 TextInputFormat fif = new TextInputFormat();
 fif.configure(jobConf);
 FileStatus[] statuses = fif.listStatus(jobConf);
 org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat
   .verifyFileStatuses(expectedPaths, Lists.newArrayList(statuses),
     localFs);
}

@Test
public void testListStatusNestedNonRecursive() throws IOException {
 Configuration conf = new Configuration();
 conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads);
 List<Path> expectedPaths = org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat
   .configureTestNestedNonRecursive(conf, localFs);
 JobConf jobConf = new JobConf(conf);
 TextInputFormat fif = new TextInputFormat();
 fif.configure(jobConf);
 FileStatus[] statuses = fif.listStatus(jobConf);
 org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat
   .verifyFileStatuses(expectedPaths, Lists.newArrayList(statuses),
     localFs);
}

@Test
public void testListStatusNestedRecursive() throws IOException {
 Configuration conf = new Configuration();
 conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads);
 List<Path> expectedPaths = org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat
   .configureTestNestedRecursive(conf, localFs);
 JobConf jobConf = new JobConf(conf);
 TextInputFormat fif = new TextInputFormat();
 fif.configure(jobConf);
 FileStatus[] statuses = fif.listStatus(jobConf);
 org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat
   .verifyFileStatuses(expectedPaths, Lists.newArrayList(statuses),
     localFs);
}

@Test
public void testListStatusErrorOnNonExistantDir() throws IOException {
 Configuration conf = new Configuration();
 conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads);
 org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat
   .configureTestErrorOnNonExistantDir(conf, localFs);
 JobConf jobConf = new JobConf(conf);
 TextInputFormat fif = new TextInputFormat();
 fif.configure(jobConf);
 try {
  fif.listStatus(jobConf);
  Assert.fail("Expecting an IOException for a missing Input path");
 } catch (IOException e) {
  Path expectedExceptionPath = new Path(TEST_ROOT_DIR, "input2");
  expectedExceptionPath = localFs.makeQualified(expectedExceptionPath);
  Assert.assertTrue(e instanceof InvalidInputException);
  Assert.assertEquals(
    "Input path does not exist: " + expectedExceptionPath.toString(),
    e.getMessage());
 }
}

@Test
public void testMaxBlockLocationsOldSplitsWithErasureCoding()
  throws Exception {
 JobConf jobConf = new JobConf(conf);
 org.apache.hadoop.mapred.TextInputFormat fileInputFormat
   = new org.apache.hadoop.mapred.TextInputFormat();
 fileInputFormat.configure(jobConf);
 final org.apache.hadoop.mapred.InputSplit[] splits =
   fileInputFormat.getSplits(jobConf, 1);
 JobSplitWriter.createSplitFiles(submitDir, conf, fs, splits);
 validateSplitMetaInfo();
}

@Test
public void testListLocatedStatus() throws Exception {
 Configuration conf = getConfiguration();
 conf.setBoolean("fs.test.impl.disable.cache", false);
 conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads);
 conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR,
   "test:///a1/a2");
 MockFileSystem mockFs =
   (MockFileSystem) new Path("test:///").getFileSystem(conf);
 Assert.assertEquals("listLocatedStatus already called",
   0, mockFs.numListLocatedStatusCalls);
 JobConf job = new JobConf(conf);
 TextInputFormat fileInputFormat = new TextInputFormat();
 fileInputFormat.configure(job);
 InputSplit[] splits = fileInputFormat.getSplits(job, 1);
 Assert.assertEquals("Input splits are not correct", 2, splits.length);
 Assert.assertEquals("listLocatedStatuss calls",
   1, mockFs.numListLocatedStatusCalls);
 FileSystem.closeAll();
}

@Override
public void configure(JobConf job) {
 this.job = job;
 format.configure(job);
}

@Test
public void testSplitLocationInfo() throws Exception {
 Configuration conf = getConfiguration();
 conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR,
   "test:///a1/a2");
 JobConf job = new JobConf(conf);
 TextInputFormat fileInputFormat = new TextInputFormat();
 fileInputFormat.configure(job);
 FileSplit[] splits = (FileSplit[]) fileInputFormat.getSplits(job, 1);
 String[] locations = splits[0].getLocations();
 Assert.assertEquals(2, locations.length);
 SplitLocationInfo[] locationInfo = splits[0].getLocationInfo();
 Assert.assertEquals(2, locationInfo.length);
 SplitLocationInfo localhostInfo = locations[0].equals("localhost") ?
   locationInfo[0] : locationInfo[1];
 SplitLocationInfo otherhostInfo = locations[0].equals("otherhost") ?
   locationInfo[0] : locationInfo[1];
 Assert.assertTrue(localhostInfo.isOnDisk());
 Assert.assertTrue(localhostInfo.isInMemory());
 Assert.assertTrue(otherhostInfo.isOnDisk());
 Assert.assertFalse(otherhostInfo.isInMemory());
}

@BeforeClass
public void setUp()
{
  inputFormat = new TextInputFormat();
  inputFormat.configure(new JobConf());
}

@BeforeClass
public void setUp()
{
  inputFormat = new TextInputFormat();
  inputFormat.configure(new JobConf());
}

Popular methods of TextInputFormat

Popular in Java

Start an intent from android
getExternalFilesDir (Context)
getContentResolver (Context)
startActivity (Activity)
FileOutputStream (java.io)
An output stream that writes bytes to a file. If the output file exists, it can be replaced or appen
ServerSocket (java.net)
This class represents a server-side socket that waits for incoming client connections. A ServerSocke
SimpleDateFormat (java.text)
Formats and parses dates in a locale-sensitive manner. Formatting turns a Date into a String, and pa
Logger (org.slf4j)
The org.slf4j.Logger interface is the main user entry point of SLF4J API. It is expected that loggin
GridLayout (java.awt)
The GridLayout class is a layout manager that lays out a container's components in a rectangular gri
ImageIO (javax.imageio)
Github Copilot alternatives

How to use configuremethodin org.apache.hadoop.mapred.TextInputFormat

Best Java code snippets using org.apache.hadoop.mapred.TextInputFormat.configure (Showing top 20 results out of 315)

How to use
configure
method
in
org.apache.hadoop.mapred.TextInputFormat