org.apache.hadoop.hbase.mapred.TableMapReduceUtil java code examples

if (addDependencyJars) {
 try {
  addDependencyJars(job);
 } catch (IOException e) {
  LOG.error("IOException encountered while adding dependency jars", e);
 initCredentials(job);
} catch (IOException ioe) {

@Test
@SuppressWarnings("deprecation")
public void shoudBeValidMapReduceEvaluation() throws Exception {
 Configuration cfg = UTIL.getConfiguration();
 JobConf jobConf = new JobConf(cfg);
 try {
  jobConf.setJobName("process row task");
  jobConf.setNumReduceTasks(1);
  TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY),
    ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class,
    jobConf);
  TableMapReduceUtil.initTableReduceJob(TABLE_NAME,
    ClassificatorRowReduce.class, jobConf);
  RunningJob job = JobClient.runJob(jobConf);
  assertTrue(job.isSuccessful());
 } finally {
  if (jobConf != null)
   FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
 }
}

@Override
public void testRestoreSnapshotDoesNotCreateBackRefLinksInit(TableName tableName,
  String snapshotName, Path tmpTableDir) throws Exception {
 JobConf job = new JobConf(UTIL.getConfiguration());
 TableMapReduceUtil.initTableSnapshotMapJob(snapshotName,
  COLUMNS, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
  NullWritable.class, job, false, tmpTableDir);
}

@Test
public void shouldNumberOfMapTaskNotExceedNumberOfRegionsForGivenTable()
  throws IOException {
 Configuration cfg = UTIL.getConfiguration();
 JobConf jobConf = new JobConf(cfg);
 TableMapReduceUtil.setNumReduceTasks(TABLE_NAME, jobConf);
 TableMapReduceUtil.limitNumMapTasks(TABLE_NAME, jobConf);
 assertEquals(1, jobConf.getNumMapTasks());
 jobConf.setNumMapTasks(10);
 TableMapReduceUtil.setNumMapTasks(TABLE_NAME, jobConf);
 TableMapReduceUtil.limitNumMapTasks(TABLE_NAME, jobConf);
 assertEquals(1, jobConf.getNumMapTasks());
}

public static void initTableMapJob(String table, String columns,
 Class<? extends TableMap> mapper,
 Class<?> outputKeyClass,
 Class<?> outputValueClass, JobConf job, boolean addDependencyJars) {
 initTableMapJob(table, columns, mapper, outputKeyClass, outputValueClass, job,
  addDependencyJars, TableInputFormat.class);
}

@Override
public void checkOutputSpecs(FileSystem fs, JobConf jc) throws IOException {
 //obtain delegation tokens for the job
 if (UserGroupInformation.getCurrentUser().hasKerberosCredentials()) {
  TableMapReduceUtil.initCredentials(jc);
 }
 String hbaseTableName = jc.get(HBaseSerDe.HBASE_TABLE_NAME);
 jc.set(TableOutputFormat.OUTPUT_TABLE, hbaseTableName);
 Job job = new Job(jc);
 JobContext jobContext = ShimLoader.getHadoopShims().newJobContext(job);
 try {
  checkOutputSpecs(jobContext);
 } catch (InterruptedException e) {
  throw new IOException(e);
 }
}

/**
 * Use this before submitting a TableReduce job. It will
 * appropriately set up the JobConf.
 *
 * @param table  The output table.
 * @param reducer  The reducer class to use.
 * @param job  The current job configuration to adjust.
 * @throws IOException When determining the region count fails.
 */
public static void initTableReduceJob(String table,
 Class<? extends TableReduce> reducer, JobConf job)
throws IOException {
 initTableReduceJob(table, reducer, job, null);
}

@Override
protected void runJob(String jobName, Configuration c, List<Scan> scans)
  throws IOException, InterruptedException, ClassNotFoundException {
 JobConf job = new JobConf(TEST_UTIL.getConfiguration());
 job.setJobName(jobName);
 job.setMapperClass(Mapper.class);
 job.setReducerClass(Reducer.class);
 TableMapReduceUtil.initMultiTableSnapshotMapperJob(getSnapshotScanMapping(scans), Mapper.class,
   ImmutableBytesWritable.class, ImmutableBytesWritable.class, job, true, restoreDir);
 TableMapReduceUtil.addDependencyJars(job);
 job.setReducerClass(Reducer.class);
 job.setNumReduceTasks(1); // one to get final "first" and "last" key
 FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
 LOG.info("Started " + job.getJobName());
 RunningJob runningJob = JobClient.runJob(job);
 runningJob.waitForCompletion();
 assertTrue(runningJob.isSuccessful());
 LOG.info("After map/reduce completion - job " + jobName);
}

addDependencyJars(job);

/**
 * Use this before submitting a TableMap job. It will
 * appropriately set up the JobConf.
 *
 * @param table table name
 * @param columns columns to scan
 * @param mapper mapper class
 * @param job job configuration
 */
@SuppressWarnings("unchecked")
public static void initJob(String table, String columns,
 Class<? extends TableMap> mapper, JobConf job) {
 TableMapReduceUtil.initTableMapJob(table, columns, mapper,
  ImmutableBytesWritable.class,
  Result.class, job);
}

 org.apache.hadoop.hbase.mapred.TableMapReduceUtil.initCredentials(hbaseJobConf);
 ShimLoader.getHadoopShims().mergeCredentials(jobConf, hbaseJobConf);
} catch (Exception e) {

/**
 * Use this before submitting a TableReduce job. It will
 * appropriately set up the JobConf.
 *
 * @param table  The output table.
 * @param reducer  The reducer class to use.
 * @param job  The current job configuration to adjust.
 * @param partitioner  Partitioner to use. Pass <code>null</code> to use
 * default partitioner.
 * @throws IOException When determining the region count fails.
 */
public static void initTableReduceJob(String table,
 Class<? extends TableReduce> reducer, JobConf job, Class partitioner)
throws IOException {
 initTableReduceJob(table, reducer, job, partitioner, true);
}

@Override
protected void runJob(String jobName, Configuration c, List<Scan> scans)
  throws IOException, InterruptedException, ClassNotFoundException {
 JobConf job = new JobConf(TEST_UTIL.getConfiguration());
 job.setJobName(jobName);
 job.setMapperClass(Mapper.class);
 job.setReducerClass(Reducer.class);
 TableMapReduceUtil.initMultiTableSnapshotMapperJob(getSnapshotScanMapping(scans), Mapper.class,
   ImmutableBytesWritable.class, ImmutableBytesWritable.class, job, true, restoreDir);
 TableMapReduceUtil.addDependencyJars(job);
 job.setReducerClass(Reducer.class);
 job.setNumReduceTasks(1); // one to get final "first" and "last" key
 FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
 LOG.info("Started " + job.getJobName());
 RunningJob runningJob = JobClient.runJob(job);
 runningJob.waitForCompletion();
 assertTrue(runningJob.isSuccessful());
 LOG.info("After map/reduce completion - job " + jobName);
}

addDependencyJars(job);

@Test
public void shouldNumberOfMapTaskNotExceedNumberOfRegionsForGivenTable()
  throws IOException {
 Configuration cfg = UTIL.getConfiguration();
 JobConf jobConf = new JobConf(cfg);
 TableMapReduceUtil.setNumReduceTasks(TABLE_NAME, jobConf);
 TableMapReduceUtil.limitNumMapTasks(TABLE_NAME, jobConf);
 assertEquals(1, jobConf.getNumMapTasks());
 jobConf.setNumMapTasks(10);
 TableMapReduceUtil.setNumMapTasks(TABLE_NAME, jobConf);
 TableMapReduceUtil.limitNumMapTasks(TABLE_NAME, jobConf);
 assertEquals(1, jobConf.getNumMapTasks());
}

 addDependencyJars(job);
initCredentials(job);

@Test
@SuppressWarnings("deprecation")
public void shoudBeValidMapReduceWithPartitionerEvaluation()
  throws IOException {
 Configuration cfg = UTIL.getConfiguration();
 JobConf jobConf = new JobConf(cfg);
 try {
  jobConf.setJobName("process row task");
  jobConf.setNumReduceTasks(2);
  TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY),
    ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class,
    jobConf);
  TableMapReduceUtil.initTableReduceJob(TABLE_NAME,
    ClassificatorRowReduce.class, jobConf, HRegionPartitioner.class);
  RunningJob job = JobClient.runJob(jobConf);
  assertTrue(job.isSuccessful());
 } finally {
  if (jobConf != null)
   FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
 }
}

/**
 * Use this before submitting a TableMap job. It will
 * appropriately set up the JobConf.
 *
 * @param table  The table name to read from.
 * @param columns  The columns to scan.
 * @param mapper  The mapper class to use.
 * @param outputKeyClass  The class of the output key.
 * @param outputValueClass  The class of the output value.
 * @param job  The current job configuration to adjust.
 */
public static void initTableMapJob(String table, String columns,
 Class<? extends TableMap> mapper,
 Class<?> outputKeyClass,
 Class<?> outputValueClass, JobConf job) {
 initTableMapJob(table, columns, mapper, outputKeyClass, outputValueClass, job,
  true, TableInputFormat.class);
}

TableMapReduceUtil.initCredentials(jobConf);

/**
 * Use this before submitting a TableReduce job. It will
 * appropriately set up the JobConf.
 *
 * @param table  The output table.
 * @param reducer  The reducer class to use.
 * @param job  The current job configuration to adjust.
 * @throws IOException When determining the region count fails.
 */
public static void initTableReduceJob(String table,
 Class<? extends TableReduce> reducer, JobConf job)
throws IOException {
 initTableReduceJob(table, reducer, job, null);
}

Javadoc

Utility for TableMap and TableReduce

Most used methods

initCredentials
addDependencyJars
initTableMapJob
Use this before submitting a TableMap job. It will appropriately set up the JobConf.
initTableReduceJob
Use this before submitting a TableReduce job. It will appropriately set up the JobConf.
initMultiTableSnapshotMapperJob
Sets up the job for reading from one or more multiple table snapshots, with one or more scans per sn
initTableSnapshotMapJob
Sets up the job for reading from a table snapshot. It bypasses hbase servers and read directly from
limitNumMapTasks
Ensures that the given number of map tasks for the given job configuration does not exceed the numbe
limitNumReduceTasks
Ensures that the given number of reduce tasks for the given job configuration does not exceed the nu
setNumMapTasks
Sets the number of map tasks for the given job configuration to the number of regions the given tabl
setNumReduceTasks
Sets the number of reduce tasks for the given job configuration to the number of regions the given t
setScannerCaching
Sets the number of rows to return and cache with each scanner iteration. Higher caching values will

setScannerCaching

Popular in Java

Reactive rest calls using spring rest template
scheduleAtFixedRate (Timer)
putExtra (Intent)
startActivity (Activity)
File (java.io)
An "abstract" representation of a file system entity identified by a pathname. The pathname may be a
SQLException (java.sql)
An exception that indicates a failed JDBC operation. It provides the following information about pro
NoSuchElementException (java.util)
Thrown when trying to retrieve an element past the end of an Enumeration or Iterator.
BlockingQueue (java.util.concurrent)
A java.util.Queue that additionally supports operations that wait for the queue to become non-empty
FileUtils (org.apache.commons.io)
General file manipulation utilities. Facilities are provided in the following areas: * writing to a
BasicDataSource (org.apache.commons.dbcp)
Basic implementation of javax.sql.DataSource that is configured via JavaBeans properties. This is no
Github Copilot alternatives

How to useTableMapReduceUtil in org.apache.hadoop.hbase.mapred

Best Java code snippets using org.apache.hadoop.hbase.mapred.TableMapReduceUtil (Showing top 20 results out of 315)

How to use
TableMapReduceUtil
in
org.apache.hadoop.hbase.mapred