org.apache.hadoop.hbase.mapreduce.TableInputFormatBase java code examples

/**
 * Calculates the splits that will serve as input for the map tasks. The
 * number of splits matches the number of regions in a table. Splits are shuffled if
 * required.
 * @param context  The current job context.
 * @return The list of input splits.
 * @throws IOException When creating the list of splits fails.
 * @see org.apache.hadoop.mapreduce.InputFormat#getSplits(
 *   org.apache.hadoop.mapreduce.JobContext)
 */
@Override
public List<InputSplit> getSplits(JobContext context) throws IOException {
 List<InputSplit> splits = super.getSplits(context);
 if ((conf.get(SHUFFLE_MAPS) != null) && "true".equals(conf.get(SHUFFLE_MAPS).toLowerCase(Locale.ROOT))) {
  Collections.shuffle(splits);
 }
 return splits;
}

  createRegionSizeCalculator(getRegionLocator(), getAdmin());
TableName tableName = getTable().getName();
Pair<byte[][], byte[][]> keys = getStartEndKeys();
if (keys == null || keys.getFirst() == null ||
  keys.getFirst().length == 0) {
 HRegionLocation regLoc =
   getRegionLocator().getRegionLocation(HConstants.EMPTY_BYTE_ARRAY, false);
 if (null == regLoc) {
  throw new IOException("Expecting at least one region.");
 if (!includeRegionInSplit(keys.getFirst()[i], keys.getSecond()[i])) {
  continue;
    keys.getSecond()[i] : stopRow;
  HRegionLocation location = getRegionLocator().getRegionLocation(keys.getFirst()[i], false);
  regionLocation = reverseDNS(regionAddress);

/**
 * Close the Table and related objects that were initialized via
 * {@link #initializeTable(Connection, TableName)}.
 *
 * @throws IOException
 */
protected void closeTable() throws IOException {
 close(admin, table, regionLocator, connection);
 admin = null;
 table = null;
 regionLocator = null;
 connection = null;
}

 initialize(context);
 closeOnFinish = true;
 if (getTable() == null) {
 List<InputSplit> splits = oneInputSplitPerRegion();
  List<InputSplit> res = new ArrayList<>();
  for (int i = 0; i < splits.size(); i++) {
   List<InputSplit> tmp = createNInputSplitsUniform(splits.get(i), nSplitsPerRegion);
   res.addAll(tmp);
  long maxAveRegionSize = context.getConfiguration()
    .getLong(MAX_AVERAGE_REGION_SIZE, 8L*1073741824); //8GB
  return calculateAutoBalancedSplits(splits, maxAveRegionSize);
} finally {
 if (closeOnFinish) {
  closeTable();

 initialize(context);
 if (getTable() == null) {
sc.setStopRow(tSplit.getEndRow());
trr.setScan(sc);
trr.setTable(getTable());
return new RecordReader<ImmutableBytesWritable, Result>() {

 initialize(context);
 closeOnFinish = true;
 if (getTable() == null) {
RegionSizeCalculator sizeCalculator = new RegionSizeCalculator(regionLocator, admin);
Pair<byte[][], byte[][]> keys = getStartEndKeys();
if (keys == null || keys.getFirst() == null ||
  keys.getFirst().length == 0) {
 if ( !includeRegionInSplit(keys.getFirst()[i], keys.getSecond()[i])) {
  continue;
 String regionLocation;
 try {
  regionLocation = reverseDNS(regionAddress);
 } catch (NamingException e) {
  LOG.warn("Cannot resolve the host name for " + regionAddress + " because of " + e);
  averageRegionSize = 1;
 return calculateRebalancedSplits(splits, context, averageRegionSize);
} else {
 return splits;
  closeTable();

if ( !includeRegionInSplit(keys.getFirst()[i], keys.getSecond()[i])) {
 continue;
String regionLocation;
try {
 regionLocation = reverseDNS(regionAddress);
} catch (NamingException e) {
 LOG.error("Cannot resolve the host name for " + regionAddress +

@Override
protected Pair<byte[][], byte[][]> getStartEndKeys() throws IOException {
 if (conf.get(SPLIT_TABLE) != null) {
  TableName splitTableName = TableName.valueOf(conf.get(SPLIT_TABLE));
  try (Connection conn = ConnectionFactory.createConnection(getConf())) {
   try (RegionLocator rl = conn.getRegionLocator(splitTableName)) {
    return rl.getStartEndKeys();
   }
  }
 }
 return super.getStartEndKeys();
}

@Override
public void close() throws IOException {
 trr.close();
 closeTable();
}

protected Pair<byte[][],byte[][]> getStartEndKeys() throws IOException {
 return getRegionLocator().getStartEndKeys();
}

/**
 * Allows subclasses to get the {@link HTable}.
 *
 * @deprecated use {@link #getTable()}
 */
@Deprecated
protected HTable getHTable() {
 return (HTable) this.getTable();
}

 initialize(context);
 closeOnFinish = true;
 if (getTable() == null) {
 List<InputSplit> splits = oneInputSplitPerRegion();
  List<InputSplit> res = new ArrayList<>();
  for (int i = 0; i < splits.size(); i++) {
   List<InputSplit> tmp = createNInputSplitsUniform(splits.get(i), nSplitsPerRegion);
   res.addAll(tmp);
  long maxAveRegionSize = context.getConfiguration()
    .getLong(MAX_AVERAGE_REGION_SIZE, 8L*1073741824); //8GB
  return calculateAutoBalancedSplits(splits, maxAveRegionSize);
} finally {
 if (closeOnFinish) {
  closeTable();

 initialize(context);
 if (getTable() == null) {
sc.setStopRow(tSplit.getEndRow());
trr.setScan(sc);
trr.setTable(getTable());
return new RecordReader<ImmutableBytesWritable, Result>() {

@Override
protected Pair<byte[][], byte[][]> getStartEndKeys() throws IOException {
 if (conf.get(SPLIT_TABLE) != null) {
  TableName splitTableName = TableName.valueOf(conf.get(SPLIT_TABLE));
  try (Connection conn = ConnectionFactory.createConnection(getConf())) {
   try (RegionLocator rl = conn.getRegionLocator(splitTableName)) {
    return rl.getStartEndKeys();
   }
  }
 }
 return super.getStartEndKeys();
}

@Override
public void close() throws IOException {
 trr.close();
 closeTable();
}

protected Pair<byte[][],byte[][]> getStartEndKeys() throws IOException {
 return getRegionLocator().getStartEndKeys();
}

  createRegionSizeCalculator(getRegionLocator(), getAdmin());
TableName tableName = getTable().getName();
Pair<byte[][], byte[][]> keys = getStartEndKeys();
if (keys == null || keys.getFirst() == null ||
  keys.getFirst().length == 0) {
 HRegionLocation regLoc =
   getRegionLocator().getRegionLocation(HConstants.EMPTY_BYTE_ARRAY, false);
 if (null == regLoc) {
  throw new IOException("Expecting at least one region.");
 if (!includeRegionInSplit(keys.getFirst()[i], keys.getSecond()[i])) {
  continue;
    keys.getSecond()[i] : stopRow;
  HRegionLocation location = getRegionLocator().getRegionLocation(keys.getFirst()[i], false);
  regionLocation = reverseDNS(regionAddress);

 initialize(context);
 closeOnFinish = true;
 if (getTable() == null) {
 List<InputSplit> splits = oneInputSplitPerRegion();
  List<InputSplit> res = new ArrayList<>();
  for (int i = 0; i < splits.size(); i++) {
   List<InputSplit> tmp = createNInputSplitsUniform(splits.get(i), nSplitsPerRegion);
   res.addAll(tmp);
  long maxAveRegionSize = context.getConfiguration()
    .getLong(MAX_AVERAGE_REGION_SIZE, 8L*1073741824); //8GB
  return calculateAutoBalancedSplits(splits, maxAveRegionSize);
} finally {
 if (closeOnFinish) {
  closeTable();

Path[] tablePaths = FileInputFormat.getInputPaths(jobContext);
List<org.apache.hadoop.mapreduce.InputSplit> splits = super.getSplits(jobContext);
InputSplit[] results = new InputSplit[splits.size()];

 initialize(context);
 if (getTable() == null) {
sc.setStopRow(tSplit.getEndRow());
trr.setScan(sc);
trr.setTable(getTable());
return new RecordReader<ImmutableBytesWritable, Result>() {

Javadoc

A base for TableInputFormats. Receives a Connection, a TableName, an Scan instance that defines the input columns etc. Subclasses may use other TableRecordReader implementations. Subclasses MUST ensure initializeTable(Connection, TableName) is called for an instance to function properly. Each of the entry points to this class used by the MapReduce framework, #createRecordReader(InputSplit,TaskAttemptContext) and #getSplits(JobContext), will call #initialize(JobContext) as a convenient centralized location to handle retrieving the necessary configuration information. If your subclass overrides either of these methods, either call the parent version or call initialize yourself.

An example of a subclass:

 
class ExampleTIF extends TableInputFormatBase { 
 @Override 
protected void initialize(JobContext context) throws IOException { 
// We are responsible for the lifecycle of this connection until we hand it over in 
// initializeTable. 
Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create( 
job.getConfiguration())); 
TableName tableName = TableName.valueOf("exampleTable"); 
// mandatory. once passed here, TableInputFormatBase will handle closing the connection. 
initializeTable(connection, tableName); 
byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"), 
Bytes.toBytes("columnB") }; 
// optional, by default we'll get everything for the table. 
Scan scan = new Scan(); 
for (byte[] family : inputColumns) { 
scan.addFamily(family); 
} 
Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*")); 
scan.setFilter(exampleFilter); 
setScan(scan); 
} 
}

The number of InputSplits(mappers) match the number of regions in a table by default. Set "hbase.mapreduce.tableinput.mappers.per.region" to specify how many mappers per region, set this property will disable autobalance below.\ Set "hbase.mapreduce.tif.input.autobalance" to enable autobalance, hbase will assign mappers based on average region size; For regions, whose size larger than average region size may assigned more mappers, and for smaller one, they may group together to use one mapper. If actual average region size is too big, like 50G, it is not good to only assign 1 mapper for those large regions. Use "hbase.mapreduce.tif.ave.regionsize" to set max average region size when enable "autobalanece", default mas average region size is 8G.

Most used methods

getSplits
Calculates the splits that will serve as input for the map tasks.
includeRegionInSplit
Test if the given region is to be included in the InputSplit while splitting the regions of a table.
reverseDNS
close
closeTable
Close the Table and related objects that were initialized via #initializeTable(Connection,TableName)
getRegionLocator
Allows subclasses to get the RegionLocator.
getStartEndKeys
getTable
Allows subclasses to get the Table.
initialize
Handle subclass specific set up. Each of the entry points used by the MapReduce framework, #createRe
calculateAutoBalancedSplits
Calculates the number of MapReduce input splits for the map tasks. The number of MapReduce input spl
createNInputSplitsUniform
Create n splits for one InputSplit, For now only support uniform distribution
getAdmin
Allows subclasses to get the Admin.

Popular in Java

Start an intent from android
scheduleAtFixedRate (Timer)
getSupportFragmentManager (FragmentActivity)
findViewById (Activity)
BufferedInputStream (java.io)
A BufferedInputStream adds functionality to another input stream-namely, the ability to buffer the i
FileInputStream (java.io)
An input stream that reads bytes from a file. File file = ...finally if (in != null) in.clos
Arrays (java.util)
This class contains various methods for manipulating arrays (such as sorting and searching). This cl
Map (java.util)
A Map is a data structure consisting of a set of keys and values in which each key is mapped to a si
ThreadPoolExecutor (java.util.concurrent)
An ExecutorService that executes each submitted task using one of possibly several pooled threads, n
IOUtils (org.apache.commons.io)
General IO stream manipulation utilities. This class provides static utility methods for input/outpu
Top 12 Jupyter Notebook extensions

How to useTableInputFormatBase in org.apache.hadoop.hbase.mapreduce

Best Java code snippets using org.apache.hadoop.hbase.mapreduce.TableInputFormatBase (Showing top 20 results out of 315)

How to use
TableInputFormatBase
in
org.apache.hadoop.hbase.mapreduce