org.apache.hadoop.hbase.mapred.TableSnapshotInputFormat java code examples

setColumns(job);
final RecordReader<ImmutableBytesWritable, Result> rr =
 delegate.getRecordReader(((HBaseSplit) split).getSnapshotSplit(), job, reporter);

@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
 setColumns(job);
 // hive depends on FileSplits, so wrap in HBaseSplit
 Path[] tablePaths = FileInputFormat.getInputPaths(job);
 InputSplit [] results = delegate.getSplits(job, numSplits);
 for (int i = 0; i < results.length; i++) {
  results[i] = new HBaseSplit(results[i], tablePaths[0]);
 }
 return results;
}

/**
 * Sets up the job for reading from a table snapshot. It bypasses hbase servers
 * and read directly from snapshot files.
 *
 * @param snapshotName The name of the snapshot (of a table) to read from.
 * @param columns  The columns to scan.
 * @param mapper  The mapper class to use.
 * @param outputKeyClass  The class of the output key.
 * @param outputValueClass  The class of the output value.
 * @param job  The current job to adjust.  Make sure the passed job is
 * carrying all necessary HBase configuration.
 * @param addDependencyJars upload HBase jars and jars for any of the configured
 *           job classes via the distributed cache (tmpjars).
 * @param tmpRestoreDir a temporary directory to copy the snapshot files into. Current user should
 * have write permissions to this directory, and this should not be a subdirectory of rootdir.
 * After the job is finished, restore directory can be deleted.
 * @throws IOException When setting up the details fails.
 * @see TableSnapshotInputFormat
 */
public static void initTableSnapshotMapJob(String snapshotName, String columns,
  Class<? extends TableMap> mapper,
  Class<?> outputKeyClass,
  Class<?> outputValueClass, JobConf job,
  boolean addDependencyJars, Path tmpRestoreDir)
throws IOException {
 TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir);
 initTableMapJob(snapshotName, columns, mapper, outputKeyClass, outputValueClass, job,
  addDependencyJars, TableSnapshotInputFormat.class);
 org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.resetCacheConfig(job);
}

private void verifyWithMockedMapReduce(JobConf job, int numRegions, int expectedNumSplits,
  byte[] startRow, byte[] stopRow) throws IOException, InterruptedException {
 TableSnapshotInputFormat tsif = new TableSnapshotInputFormat();
 InputSplit[] splits = tsif.getSplits(job, 0);
  RecordReader<ImmutableBytesWritable, Result> rr = tsif.getRecordReader(split, job, reporter);

private void verifyWithMockedMapReduce(JobConf job, int numRegions, int expectedNumSplits,
  byte[] startRow, byte[] stopRow) throws IOException, InterruptedException {
 TableSnapshotInputFormat tsif = new TableSnapshotInputFormat();
 InputSplit[] splits = tsif.getSplits(job, 0);
  RecordReader<ImmutableBytesWritable, Result> rr = tsif.getRecordReader(split, job, reporter);

private void verifyWithMockedMapReduce(JobConf job, int numRegions, int expectedNumSplits,
  byte[] startRow, byte[] stopRow) throws IOException, InterruptedException {
 TableSnapshotInputFormat tsif = new TableSnapshotInputFormat();
 InputSplit[] splits = tsif.getSplits(job, 0);
  RecordReader<ImmutableBytesWritable, Result> rr = tsif.getRecordReader(split, job, reporter);

                     int numSplitsPerRegion)
   throws IOException {
TableSnapshotInputFormat.setInput(jobConf, snapshotName, tmpRestoreDir, splitAlgo,
    numSplitsPerRegion);
initTableMapJob(snapshotName, columns, mapper, outputKeyClass, outputValueClass, jobConf,

@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
 setColumns(job);
 // hive depends on FileSplits, so wrap in HBaseSplit
 Path[] tablePaths = FileInputFormat.getInputPaths(job);
 InputSplit [] results = delegate.getSplits(job, numSplits);
 for (int i = 0; i < results.length; i++) {
  results[i] = new HBaseSplit(results[i], tablePaths[0]);
 }
 return results;
}

setColumns(job);
final RecordReader<ImmutableBytesWritable, Result> rr =
 delegate.getRecordReader(((HBaseSplit) split).getSnapshotSplit(), job, reporter);

/**
 * Sets up the job for reading from a table snapshot. It bypasses hbase servers
 * and read directly from snapshot files.
 *
 * @param snapshotName The name of the snapshot (of a table) to read from.
 * @param columns  The columns to scan.
 * @param mapper  The mapper class to use.
 * @param outputKeyClass  The class of the output key.
 * @param outputValueClass  The class of the output value.
 * @param job  The current job to adjust.  Make sure the passed job is
 * carrying all necessary HBase configuration.
 * @param addDependencyJars upload HBase jars and jars for any of the configured
 *           job classes via the distributed cache (tmpjars).
 * @param tmpRestoreDir a temporary directory to copy the snapshot files into. Current user should
 * have write permissions to this directory, and this should not be a subdirectory of rootdir.
 * After the job is finished, restore directory can be deleted.
 * @throws IOException When setting up the details fails.
 * @see TableSnapshotInputFormat
 */
public static void initTableSnapshotMapJob(String snapshotName, String columns,
  Class<? extends TableMap> mapper,
  Class<?> outputKeyClass,
  Class<?> outputValueClass, JobConf job,
  boolean addDependencyJars, Path tmpRestoreDir)
throws IOException {
 TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir);
 initTableMapJob(snapshotName, columns, mapper, outputKeyClass, outputValueClass, job,
  addDependencyJars, TableSnapshotInputFormat.class);
 org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.resetCacheConfig(job);
}

/**
 * Sets up the job for reading from a table snapshot. It bypasses hbase servers
 * and read directly from snapshot files.
 *
 * @param snapshotName The name of the snapshot (of a table) to read from.
 * @param columns  The columns to scan.
 * @param mapper  The mapper class to use.
 * @param outputKeyClass  The class of the output key.
 * @param outputValueClass  The class of the output value.
 * @param job  The current job to adjust.  Make sure the passed job is
 * carrying all necessary HBase configuration.
 * @param addDependencyJars upload HBase jars and jars for any of the configured
 *           job classes via the distributed cache (tmpjars).
 * @param tmpRestoreDir a temporary directory to copy the snapshot files into. Current user should
 * have write permissions to this directory, and this should not be a subdirectory of rootdir.
 * After the job is finished, restore directory can be deleted.
 * @throws IOException When setting up the details fails.
 * @see TableSnapshotInputFormat
 */
public static void initTableSnapshotMapJob(String snapshotName, String columns,
  Class<? extends TableMap> mapper,
  Class<?> outputKeyClass,
  Class<?> outputValueClass, JobConf job,
  boolean addDependencyJars, Path tmpRestoreDir)
throws IOException {
 TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir);
 initTableMapJob(snapshotName, columns, mapper, outputKeyClass, outputValueClass, job,
  addDependencyJars, TableSnapshotInputFormat.class);
 org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.resetCacheConfig(job);
}

/**
 * Sets up the job for reading from a table snapshot. It bypasses hbase servers
 * and read directly from snapshot files.
 *
 * @param snapshotName The name of the snapshot (of a table) to read from.
 * @param columns  The columns to scan.
 * @param mapper  The mapper class to use.
 * @param outputKeyClass  The class of the output key.
 * @param outputValueClass  The class of the output value.
 * @param job  The current job to adjust.  Make sure the passed job is
 * carrying all necessary HBase configuration.
 * @param addDependencyJars upload HBase jars and jars for any of the configured
 *           job classes via the distributed cache (tmpjars).
 * @param tmpRestoreDir a temporary directory to copy the snapshot files into. Current user should
 * have write permissions to this directory, and this should not be a subdirectory of rootdir.
 * After the job is finished, restore directory can be deleted.
 * @throws IOException When setting up the details fails.
 * @see TableSnapshotInputFormat
 */
public static void initTableSnapshotMapJob(String snapshotName, String columns,
  Class<? extends TableMap> mapper,
  Class<?> outputKeyClass,
  Class<?> outputValueClass, JobConf job,
  boolean addDependencyJars, Path tmpRestoreDir)
throws IOException {
 TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir);
 initTableMapJob(snapshotName, columns, mapper, outputKeyClass, outputValueClass, job,
  addDependencyJars, TableSnapshotInputFormat.class);
 org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.resetCacheConfig(job);
}

                     int numSplitsPerRegion)
   throws IOException {
TableSnapshotInputFormat.setInput(jobConf, snapshotName, tmpRestoreDir, splitAlgo,
    numSplitsPerRegion);
initTableMapJob(snapshotName, columns, mapper, outputKeyClass, outputValueClass, jobConf,

                     int numSplitsPerRegion)
   throws IOException {
TableSnapshotInputFormat.setInput(jobConf, snapshotName, tmpRestoreDir, splitAlgo,
    numSplitsPerRegion);
initTableMapJob(snapshotName, columns, mapper, outputKeyClass, outputValueClass, jobConf,

Javadoc

TableSnapshotInputFormat allows a MapReduce job to run over a table snapshot. Further documentation available on org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat.

Most used methods

getRecordReader
getSplits
setInput
Configures the job to use TableSnapshotInputFormat to read from a snapshot.
<init>

Popular in Java

Running tasks concurrently on multiple threads
addToBackStack (FragmentTransaction)
onRequestPermissionsResult (Fragment)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
File (java.io)
An "abstract" representation of a file system entity identified by a pathname. The pathname may be a
Path (java.nio.file)
ResourceBundle (java.util)
ResourceBundle is an abstract class which is the superclass of classes which provide Locale-specifi
ConcurrentHashMap (java.util.concurrent)
A plug-in replacement for JDK1.5 java.util.concurrent.ConcurrentHashMap. This version is based on or
Container (java.awt)
A generic Abstract Window Toolkit(AWT) container object is a component that can contain other AWT co
GridBagLayout (java.awt)
The GridBagLayout class is a flexible layout manager that aligns components vertically and horizonta
Best plugins for Eclipse

How to useTableSnapshotInputFormat in org.apache.hadoop.hbase.mapred

Best Java code snippets using org.apache.hadoop.hbase.mapred.TableSnapshotInputFormat (Showing top 14 results out of 315)

How to use
TableSnapshotInputFormat
in
org.apache.hadoop.hbase.mapred