org.apache.hadoop.hbase.mapred.TableMapReduceUtil.initTableMapJob java code examples

public static void initTableMapJob(String table, String columns,
 Class<? extends TableMap> mapper,
 Class<?> outputKeyClass,
 Class<?> outputValueClass, JobConf job, boolean addDependencyJars) {
 initTableMapJob(table, columns, mapper, outputKeyClass, outputValueClass, job,
  addDependencyJars, TableInputFormat.class);
}

/**
 * Use this before submitting a TableMap job. It will
 * appropriately set up the JobConf.
 *
 * @param table table name
 * @param columns columns to scan
 * @param mapper mapper class
 * @param job job configuration
 */
@SuppressWarnings("unchecked")
public static void initJob(String table, String columns,
 Class<? extends TableMap> mapper, JobConf job) {
 TableMapReduceUtil.initTableMapJob(table, columns, mapper,
  ImmutableBytesWritable.class,
  Result.class, job);
}

/**
 * Use this before submitting a TableMap job. It will
 * appropriately set up the JobConf.
 *
 * @param table  The table name to read from.
 * @param columns  The columns to scan.
 * @param mapper  The mapper class to use.
 * @param outputKeyClass  The class of the output key.
 * @param outputValueClass  The class of the output value.
 * @param job  The current job configuration to adjust.
 */
public static void initTableMapJob(String table, String columns,
 Class<? extends TableMap> mapper,
 Class<?> outputKeyClass,
 Class<?> outputValueClass, JobConf job) {
 initTableMapJob(table, columns, mapper, outputKeyClass, outputValueClass, job,
  true, TableInputFormat.class);
}

/**
 * Use this before submitting a TableMap job. It will appropriately set up the
 * JobConf.
 *
 * @param table table to be processed
 * @param columns space separated list of columns to fetch
 * @param groupColumns space separated list of columns used to form the key
 * used in collect
 * @param mapper map class
 * @param job job configuration object
 */
@SuppressWarnings("unchecked")
public static void initJob(String table, String columns, String groupColumns,
 Class<? extends TableMap> mapper, JobConf job) {
 TableMapReduceUtil.initTableMapJob(table, columns, mapper,
   ImmutableBytesWritable.class, Result.class, job);
 job.set(GROUP_COLUMNS, groupColumns);
}

/**
 * @param args
 * @return the JobConf
 * @throws IOException
 */
public JobConf createSubmittableJob(String[] args) throws IOException {
 JobConf c = new JobConf(getConf(), getClass());
 c.setJobName(NAME);
 // Columns are space delimited
 StringBuilder sb = new StringBuilder();
 final int columnoffset = 2;
 for (int i = columnoffset; i < args.length; i++) {
  if (i > columnoffset) {
   sb.append(" ");
  }
  sb.append(args[i]);
 }
 // Second argument is the table name.
 TableMapReduceUtil.initTableMapJob(args[1], sb.toString(),
  RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, c);
 c.setNumReduceTasks(0);
 // First arg is the output directory.
 FileOutputFormat.setOutputPath(c, new Path(args[0]));
 return c;
}

/**
 * Sets up the job for reading from a table snapshot. It bypasses hbase servers
 * and read directly from snapshot files.
 *
 * @param snapshotName The name of the snapshot (of a table) to read from.
 * @param columns  The columns to scan.
 * @param mapper  The mapper class to use.
 * @param outputKeyClass  The class of the output key.
 * @param outputValueClass  The class of the output value.
 * @param job  The current job to adjust.  Make sure the passed job is
 * carrying all necessary HBase configuration.
 * @param addDependencyJars upload HBase jars and jars for any of the configured
 *           job classes via the distributed cache (tmpjars).
 * @param tmpRestoreDir a temporary directory to copy the snapshot files into. Current user should
 * have write permissions to this directory, and this should not be a subdirectory of rootdir.
 * After the job is finished, restore directory can be deleted.
 * @throws IOException When setting up the details fails.
 * @see TableSnapshotInputFormat
 */
public static void initTableSnapshotMapJob(String snapshotName, String columns,
  Class<? extends TableMap> mapper,
  Class<?> outputKeyClass,
  Class<?> outputValueClass, JobConf job,
  boolean addDependencyJars, Path tmpRestoreDir)
throws IOException {
 TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir);
 initTableMapJob(snapshotName, columns, mapper, outputKeyClass, outputValueClass, job,
  addDependencyJars, TableSnapshotInputFormat.class);
 org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.resetCacheConfig(job);
}

TableSnapshotInputFormat.setInput(jobConf, snapshotName, tmpRestoreDir, splitAlgo,
    numSplitsPerRegion);
initTableMapJob(snapshotName, columns, mapper, outputKeyClass, outputValueClass, jobConf,
    addDependencyJars, TableSnapshotInputFormat.class);
org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.resetCacheConfig(jobConf);

/**
 * Open and close a TableOutputFormat.  The closing the RecordWriter should release HBase
 * Connection (ZK) resources, and will throw exception if they are exhausted.
 */
static void openCloseTableOutputFormat(int iter)  throws IOException {
 LOG.info("Instantiating TableOutputFormat connection  " + iter);
 JobConf conf = new JobConf();
 conf.addResource(UTIL.getConfiguration());
 conf.set(TableOutputFormat.OUTPUT_TABLE, TABLE);
 TableMapReduceUtil.initTableMapJob(TABLE, FAMILY, TableMap.class,
   ImmutableBytesWritable.class, ImmutableBytesWritable.class, conf);
 TableOutputFormat tof = new TableOutputFormat();
 RecordWriter rw = tof.getRecordWriter(null, conf, TABLE, null);
 rw.close(null);
}

@Test
@SuppressWarnings("deprecation")
public void shoudBeValidMapReduceEvaluation() throws Exception {
 Configuration cfg = UTIL.getConfiguration();
 JobConf jobConf = new JobConf(cfg);
 try {
  jobConf.setJobName("process row task");
  jobConf.setNumReduceTasks(1);
  TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY),
    ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class,
    jobConf);
  TableMapReduceUtil.initTableReduceJob(TABLE_NAME,
    ClassificatorRowReduce.class, jobConf);
  RunningJob job = JobClient.runJob(jobConf);
  assertTrue(job.isSuccessful());
 } finally {
  if (jobConf != null)
   FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
 }
}

@Test
@SuppressWarnings("deprecation")
public void shoudBeValidMapReduceWithPartitionerEvaluation()
  throws IOException {
 Configuration cfg = UTIL.getConfiguration();
 JobConf jobConf = new JobConf(cfg);
 try {
  jobConf.setJobName("process row task");
  jobConf.setNumReduceTasks(2);
  TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY),
    ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class,
    jobConf);
  TableMapReduceUtil.initTableReduceJob(TABLE_NAME,
    ClassificatorRowReduce.class, jobConf, HRegionPartitioner.class);
  RunningJob job = JobClient.runJob(jobConf);
  assertTrue(job.isSuccessful());
 } finally {
  if (jobConf != null)
   FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
 }
}

 @Override
 protected void runTestOnTable(Table table) throws IOException {
  JobConf jobConf = null;
  try {
   LOG.info("Before map/reduce startup");
   jobConf = new JobConf(UTIL.getConfiguration(), TestTableMapReduce.class);
   jobConf.setJobName("process column contents");
   jobConf.setNumReduceTasks(1);
   TableMapReduceUtil.initTableMapJob(table.getName().getNameAsString(),
    Bytes.toString(INPUT_FAMILY), ProcessContentsMapper.class,
    ImmutableBytesWritable.class, Put.class, jobConf);
   TableMapReduceUtil.initTableReduceJob(table.getName().getNameAsString(),
    IdentityTableReduce.class, jobConf);

   LOG.info("Started " + table.getName());
   RunningJob job = JobClient.runJob(jobConf);
   assertTrue(job.isSuccessful());
   LOG.info("After map/reduce completion");

   // verify map-reduce results
   verify(table.getName());
  } finally {
   if (jobConf != null) {
    FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
   }
  }
 }
}

public static void initTableMapJob(String table, String columns,
 Class<? extends TableMap> mapper,
 Class<?> outputKeyClass,
 Class<?> outputValueClass, JobConf job, boolean addDependencyJars) {
 initTableMapJob(table, columns, mapper, outputKeyClass, outputValueClass, job,
  addDependencyJars, TableInputFormat.class);
}

public static void initTableMapJob(String table, String columns,
 Class<? extends TableMap> mapper,
 Class<?> outputKeyClass,
 Class<?> outputValueClass, JobConf job, boolean addDependencyJars) {
 initTableMapJob(table, columns, mapper, outputKeyClass, outputValueClass, job,
  addDependencyJars, TableInputFormat.class);
}

public static void initTableMapJob(String table, String columns,
 Class<? extends TableMap> mapper,
 Class<?> outputKeyClass,
 Class<?> outputValueClass, JobConf job, boolean addDependencyJars) {
 initTableMapJob(table, columns, mapper, outputKeyClass, outputValueClass, job,
  addDependencyJars, TableInputFormat.class);
}

/**
 * Use this before submitting a TableMap job. It will
 * appropriately set up the JobConf.
 *
 * @param table table name
 * @param columns columns to scan
 * @param mapper mapper class
 * @param job job configuration
 */
@SuppressWarnings("unchecked")
public static void initJob(String table, String columns,
 Class<? extends TableMap> mapper, JobConf job) {
 TableMapReduceUtil.initTableMapJob(table, columns, mapper,
  ImmutableBytesWritable.class,
  Result.class, job);
}

/**
 * Use this before submitting a TableMap job. It will
 * appropriately set up the JobConf.
 *
 * @param table table name
 * @param columns columns to scan
 * @param mapper mapper class
 * @param job job configuration
 */
@SuppressWarnings("unchecked")
public static void initJob(String table, String columns,
 Class<? extends TableMap> mapper, JobConf job) {
 TableMapReduceUtil.initTableMapJob(table, columns, mapper,
  ImmutableBytesWritable.class,
  Result.class, job);
}

/**
 * Use this before submitting a TableMap job. It will
 * appropriately set up the JobConf.
 *
 * @param table table name
 * @param columns columns to scan
 * @param mapper mapper class
 * @param job job configuration
 */
@SuppressWarnings("unchecked")
public static void initJob(String table, String columns,
 Class<? extends TableMap> mapper, JobConf job) {
 TableMapReduceUtil.initTableMapJob(table, columns, mapper,
  ImmutableBytesWritable.class,
  Result.class, job);
}

/**
 * Use this before submitting a TableMap job. It will
 * appropriately set up the JobConf.
 *
 * @param table  The table name to read from.
 * @param columns  The columns to scan.
 * @param mapper  The mapper class to use.
 * @param outputKeyClass  The class of the output key.
 * @param outputValueClass  The class of the output value.
 * @param job  The current job configuration to adjust.
 */
public static void initTableMapJob(String table, String columns,
 Class<? extends TableMap> mapper,
 Class<? extends WritableComparable> outputKeyClass,
 Class<? extends Writable> outputValueClass, JobConf job) {
 initTableMapJob(table, columns, mapper, outputKeyClass, outputValueClass, job, true);
}

/**
 * Open and close a TableOutputFormat.  The closing the RecordWriter should release HBase
 * Connection (ZK) resources, and will throw exception if they are exhausted.
 */
static void openCloseTableOutputFormat(int iter)  throws IOException {
 LOG.info("Instantiating TableOutputFormat connection  " + iter);
 JobConf conf = new JobConf();
 conf.addResource(UTIL.getConfiguration());
 conf.set(TableOutputFormat.OUTPUT_TABLE, TABLE);
 TableMapReduceUtil.initTableMapJob(TABLE, FAMILY, TableMap.class,
   ImmutableBytesWritable.class, ImmutableBytesWritable.class, conf);
 TableOutputFormat tof = new TableOutputFormat();
 RecordWriter rw = tof.getRecordWriter(null, conf, TABLE, null);
 rw.close(null);
}

/**
 * Open and close a TableOutputFormat.  The closing the RecordWriter should release HBase
 * Connection (ZK) resources, and will throw exception if they are exhausted.
 */
static void openCloseTableOutputFormat(int iter)  throws IOException {
 LOG.info("Instantiating TableOutputFormat connection  " + iter);
 JobConf conf = new JobConf();
 conf.addResource(UTIL.getConfiguration());
 conf.set(TableOutputFormat.OUTPUT_TABLE, TABLE);
 TableMapReduceUtil.initTableMapJob(TABLE, FAMILY, TableMap.class,
   ImmutableBytesWritable.class, ImmutableBytesWritable.class, conf);
 TableOutputFormat tof = new TableOutputFormat();
 RecordWriter rw = tof.getRecordWriter(null, conf, TABLE, null);
 rw.close(null);
}

Javadoc

Use this before submitting a TableMap job. It will appropriately set up the JobConf.

Popular methods of TableMapReduceUtil

initCredentials
addDependencyJars
initTableReduceJob
Use this before submitting a TableReduce job. It will appropriately set up the JobConf.
initMultiTableSnapshotMapperJob
Sets up the job for reading from one or more multiple table snapshots, with one or more scans per sn
initTableSnapshotMapJob
Sets up the job for reading from a table snapshot. It bypasses hbase servers and read directly from
limitNumMapTasks
Ensures that the given number of map tasks for the given job configuration does not exceed the numbe
limitNumReduceTasks
Ensures that the given number of reduce tasks for the given job configuration does not exceed the nu
setNumMapTasks
Sets the number of map tasks for the given job configuration to the number of regions the given tabl
setNumReduceTasks
Sets the number of reduce tasks for the given job configuration to the number of regions the given t
setScannerCaching
Sets the number of rows to return and cache with each scanner iteration. Higher caching values will

Popular in Java

Finding current android device location
onCreateOptionsMenu (Activity)
getExternalFilesDir (Context)
setContentView (Activity)
File (java.io)
An "abstract" representation of a file system entity identified by a pathname. The pathname may be a
SQLException (java.sql)
An exception that indicates a failed JDBC operation. It provides the following information about pro
NoSuchElementException (java.util)
Thrown when trying to retrieve an element past the end of an Enumeration or Iterator.
BlockingQueue (java.util.concurrent)
A java.util.Queue that additionally supports operations that wait for the queue to become non-empty
FileUtils (org.apache.commons.io)
General file manipulation utilities. Facilities are provided in the following areas: * writing to a
BasicDataSource (org.apache.commons.dbcp)
Basic implementation of javax.sql.DataSource that is configured via JavaBeans properties. This is no
Top plugins for Android Studio

How to use initTableMapJobmethodin org.apache.hadoop.hbase.mapred.TableMapReduceUtil

Best Java code snippets using org.apache.hadoop.hbase.mapred.TableMapReduceUtil.initTableMapJob (Showing top 20 results out of 315)

How to use
initTableMapJob
method
in
org.apache.hadoop.hbase.mapred.TableMapReduceUtil