/** * Calculates the splits that will serve as input for the map tasks. The * number of splits matches the number of regions in a table. Splits are shuffled if * required. * @param context The current job context. * @return The list of input splits. * @throws IOException When creating the list of splits fails. * @see org.apache.hadoop.mapreduce.InputFormat#getSplits( * org.apache.hadoop.mapreduce.JobContext) */ @Override public List<InputSplit> getSplits(JobContext context) throws IOException { List<InputSplit> splits = super.getSplits(context); if ((conf.get(SHUFFLE_MAPS) != null) && "true".equals(conf.get(SHUFFLE_MAPS).toLowerCase(Locale.ROOT))) { Collections.shuffle(splits); } return splits; }
createRegionSizeCalculator(getRegionLocator(), getAdmin()); TableName tableName = getTable().getName(); Pair<byte[][], byte[][]> keys = getStartEndKeys(); if (keys == null || keys.getFirst() == null || keys.getFirst().length == 0) { HRegionLocation regLoc = getRegionLocator().getRegionLocation(HConstants.EMPTY_BYTE_ARRAY, false); if (null == regLoc) { throw new IOException("Expecting at least one region."); if (!includeRegionInSplit(keys.getFirst()[i], keys.getSecond()[i])) { continue; keys.getSecond()[i] : stopRow; HRegionLocation location = getRegionLocator().getRegionLocation(keys.getFirst()[i], false); regionLocation = reverseDNS(regionAddress);
/** * Close the Table and related objects that were initialized via * {@link #initializeTable(Connection, TableName)}. * * @throws IOException */ protected void closeTable() throws IOException { close(admin, table, regionLocator, connection); admin = null; table = null; regionLocator = null; connection = null; }
initialize(context); closeOnFinish = true; if (getTable() == null) { List<InputSplit> splits = oneInputSplitPerRegion(); List<InputSplit> res = new ArrayList<>(); for (int i = 0; i < splits.size(); i++) { List<InputSplit> tmp = createNInputSplitsUniform(splits.get(i), nSplitsPerRegion); res.addAll(tmp); long maxAveRegionSize = context.getConfiguration() .getLong(MAX_AVERAGE_REGION_SIZE, 8L*1073741824); //8GB return calculateAutoBalancedSplits(splits, maxAveRegionSize); } finally { if (closeOnFinish) { closeTable();
initialize(context); if (getTable() == null) { sc.setStopRow(tSplit.getEndRow()); trr.setScan(sc); trr.setTable(getTable()); return new RecordReader<ImmutableBytesWritable, Result>() {
initialize(context); closeOnFinish = true; if (getTable() == null) { RegionSizeCalculator sizeCalculator = new RegionSizeCalculator(regionLocator, admin); Pair<byte[][], byte[][]> keys = getStartEndKeys(); if (keys == null || keys.getFirst() == null || keys.getFirst().length == 0) { if ( !includeRegionInSplit(keys.getFirst()[i], keys.getSecond()[i])) { continue; String regionLocation; try { regionLocation = reverseDNS(regionAddress); } catch (NamingException e) { LOG.warn("Cannot resolve the host name for " + regionAddress + " because of " + e); averageRegionSize = 1; return calculateRebalancedSplits(splits, context, averageRegionSize); } else { return splits; closeTable();
if ( !includeRegionInSplit(keys.getFirst()[i], keys.getSecond()[i])) { continue; String regionLocation; try { regionLocation = reverseDNS(regionAddress); } catch (NamingException e) { LOG.error("Cannot resolve the host name for " + regionAddress +
@Override protected Pair<byte[][], byte[][]> getStartEndKeys() throws IOException { if (conf.get(SPLIT_TABLE) != null) { TableName splitTableName = TableName.valueOf(conf.get(SPLIT_TABLE)); try (Connection conn = ConnectionFactory.createConnection(getConf())) { try (RegionLocator rl = conn.getRegionLocator(splitTableName)) { return rl.getStartEndKeys(); } } } return super.getStartEndKeys(); }
@Override public void close() throws IOException { trr.close(); closeTable(); }
protected Pair<byte[][],byte[][]> getStartEndKeys() throws IOException { return getRegionLocator().getStartEndKeys(); }
/** * Allows subclasses to get the {@link HTable}. * * @deprecated use {@link #getTable()} */ @Deprecated protected HTable getHTable() { return (HTable) this.getTable(); }
initialize(context); closeOnFinish = true; if (getTable() == null) { List<InputSplit> splits = oneInputSplitPerRegion(); List<InputSplit> res = new ArrayList<>(); for (int i = 0; i < splits.size(); i++) { List<InputSplit> tmp = createNInputSplitsUniform(splits.get(i), nSplitsPerRegion); res.addAll(tmp); long maxAveRegionSize = context.getConfiguration() .getLong(MAX_AVERAGE_REGION_SIZE, 8L*1073741824); //8GB return calculateAutoBalancedSplits(splits, maxAveRegionSize); } finally { if (closeOnFinish) { closeTable();
initialize(context); if (getTable() == null) { sc.setStopRow(tSplit.getEndRow()); trr.setScan(sc); trr.setTable(getTable()); return new RecordReader<ImmutableBytesWritable, Result>() {
@Override protected Pair<byte[][], byte[][]> getStartEndKeys() throws IOException { if (conf.get(SPLIT_TABLE) != null) { TableName splitTableName = TableName.valueOf(conf.get(SPLIT_TABLE)); try (Connection conn = ConnectionFactory.createConnection(getConf())) { try (RegionLocator rl = conn.getRegionLocator(splitTableName)) { return rl.getStartEndKeys(); } } } return super.getStartEndKeys(); }
@Override public void close() throws IOException { trr.close(); closeTable(); }
protected Pair<byte[][],byte[][]> getStartEndKeys() throws IOException { return getRegionLocator().getStartEndKeys(); }
createRegionSizeCalculator(getRegionLocator(), getAdmin()); TableName tableName = getTable().getName(); Pair<byte[][], byte[][]> keys = getStartEndKeys(); if (keys == null || keys.getFirst() == null || keys.getFirst().length == 0) { HRegionLocation regLoc = getRegionLocator().getRegionLocation(HConstants.EMPTY_BYTE_ARRAY, false); if (null == regLoc) { throw new IOException("Expecting at least one region."); if (!includeRegionInSplit(keys.getFirst()[i], keys.getSecond()[i])) { continue; keys.getSecond()[i] : stopRow; HRegionLocation location = getRegionLocator().getRegionLocation(keys.getFirst()[i], false); regionLocation = reverseDNS(regionAddress);
initialize(context); closeOnFinish = true; if (getTable() == null) { List<InputSplit> splits = oneInputSplitPerRegion(); List<InputSplit> res = new ArrayList<>(); for (int i = 0; i < splits.size(); i++) { List<InputSplit> tmp = createNInputSplitsUniform(splits.get(i), nSplitsPerRegion); res.addAll(tmp); long maxAveRegionSize = context.getConfiguration() .getLong(MAX_AVERAGE_REGION_SIZE, 8L*1073741824); //8GB return calculateAutoBalancedSplits(splits, maxAveRegionSize); } finally { if (closeOnFinish) { closeTable();
Path[] tablePaths = FileInputFormat.getInputPaths(jobContext); List<org.apache.hadoop.mapreduce.InputSplit> splits = super.getSplits(jobContext); InputSplit[] results = new InputSplit[splits.size()];
initialize(context); if (getTable() == null) { sc.setStopRow(tSplit.getEndRow()); trr.setScan(sc); trr.setTable(getTable()); return new RecordReader<ImmutableBytesWritable, Result>() {