public static List<String> getBestLocations(Configuration conf, HDFSBlocksDistribution blockDistribution) { // 3 nodes will contain highly local blocks. So default to 3. return getBestLocations(conf, blockDistribution, 3); }
/** * Configures {@code conf} for the snapshot job. Call only when * {@link #assertSupportsTableSnapshots()} returns true. */ public static void configureJob(Configuration conf, String snapshotName, Path restoreDir) throws IOException { TableSnapshotInputFormatImpl.setInput(conf, snapshotName, restoreDir); }
public static List<InputSplit> getSplits(Configuration conf) throws IOException { String snapshotName = getSnapshotName(conf); Path rootDir = FSUtils.getRootDir(conf); FileSystem fs = rootDir.getFileSystem(conf); SnapshotManifest manifest = getSnapshotManifest(conf, snapshotName, rootDir, fs); List<HRegionInfo> regionInfos = getRegionInfosFromManifest(manifest); // TODO: mapred does not support scan as input API. Work around for now. Scan scan = extractScanFromConf(conf); // the temp dir where the snapshot is restored Path restoreDir = new Path(conf.get(RESTORE_DIR_KEY)); RegionSplitter.SplitAlgorithm splitAlgo = getSplitAlgo(conf); int numSplits = conf.getInt(NUM_SPLITS_PER_REGION, 1); return getSplits(scan, manifest, regionInfos, restoreDir, conf, splitAlgo, numSplits); }
TableSnapshotInputFormatImpl.getSnapshotManifest(conf, snapshotName, rootDir, fs); List<HRegionInfo> regionInfos = TableSnapshotInputFormatImpl.getRegionInfosFromManifest(manifest); TableSnapshotInputFormatImpl.getSplits(scan, manifest, regionInfos, restoreDir, conf); rtn.addAll(splits);
@Test public void testGetBestLocations() throws IOException { TableSnapshotInputFormatImpl tsif = new TableSnapshotInputFormatImpl(); Configuration conf = UTIL.getConfiguration(); TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution)); TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution)); TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution)); TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution)); blockDistribution.addHostsAndBlockWeight(new String[] {"h4"}, 1); Assert.assertEquals(Lists.newArrayList("h1"), TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution)); TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution)); TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution)); TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
sp[i + 1])) { List<String> hosts = calculateLocationsForInputSplit(conf, htd, hri, tableDir, localityEnabled); hri.getStartKey(), hri.getEndKey())) { List<String> hosts = calculateLocationsForInputSplit(conf, htd, hri, tableDir, localityEnabled); splits.add(new InputSplit(htd, hri, hosts, scan, restoreDir));
@Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { List<TableSnapshotInputFormatImpl.InputSplit> splits = TableSnapshotInputFormatImpl.getSplits(job); InputSplit[] results = new InputSplit[splits.size()]; for (int i = 0; i < splits.size(); i++) { results[i] = new TableSnapshotRegionSplit(splits.get(i)); } return results; }
TableSnapshotInputFormatImpl.getSnapshotManifest(conf, snapshotName, rootDir, fs); List<HRegionInfo> regionInfos = TableSnapshotInputFormatImpl.getRegionInfosFromManifest(manifest); TableSnapshotInputFormatImpl.getSplits(scan, manifest, regionInfos, restoreDir, conf); rtn.addAll(splits);
@Test public void testGetBestLocations() throws IOException { TableSnapshotInputFormatImpl tsif = new TableSnapshotInputFormatImpl(); Configuration conf = UTIL.getConfiguration(); TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution)); TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution)); TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution)); TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution)); blockDistribution.addHostsAndBlockWeight(new String[] {"h4"}, 1); Assert.assertEquals(Lists.newArrayList("h1"), TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution)); TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution)); TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution)); TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
sp[i + 1])) { List<String> hosts = calculateLocationsForInputSplit(conf, htd, hri, tableDir, localityEnabled); hri.getStartKey(), hri.getEndKey())) { List<String> hosts = calculateLocationsForInputSplit(conf, htd, hri, tableDir, localityEnabled); splits.add(new InputSplit(htd, hri, hosts, scan, restoreDir));
@Override public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException { List<InputSplit> results = new ArrayList<>(); for (TableSnapshotInputFormatImpl.InputSplit split : TableSnapshotInputFormatImpl.getSplits(job.getConfiguration())) { results.add(new TableSnapshotRegionSplit(split)); } return results; }
public static List<InputSplit> getSplits(Configuration conf) throws IOException { String snapshotName = getSnapshotName(conf); Path rootDir = FSUtils.getRootDir(conf); FileSystem fs = rootDir.getFileSystem(conf); SnapshotManifest manifest = getSnapshotManifest(conf, snapshotName, rootDir, fs); List<HRegionInfo> regionInfos = getRegionInfosFromManifest(manifest); // TODO: mapred does not support scan as input API. Work around for now. Scan scan = extractScanFromConf(conf); // the temp dir where the snapshot is restored Path restoreDir = new Path(conf.get(RESTORE_DIR_KEY)); return getSplits(scan, manifest, regionInfos, restoreDir, conf); }
/** * Configures the job to use TableSnapshotInputFormat to read from a snapshot. * @param job the job to configure * @param snapshotName the name of the snapshot to read from * @param restoreDir a temporary directory to restore the snapshot into. Current user should * have write permissions to this directory, and this should not be a subdirectory of rootdir. * After the job is finished, restoreDir can be deleted. * @throws IOException if an error occurs */ public static void setInput(JobConf job, String snapshotName, Path restoreDir) throws IOException { TableSnapshotInputFormatImpl.setInput(job, snapshotName, restoreDir); }
TableSnapshotInputFormatImpl.getSnapshotManifest(conf, snapshotName, rootDir, fs); List<HRegionInfo> regionInfos = TableSnapshotInputFormatImpl.getRegionInfosFromManifest(manifest); TableSnapshotInputFormatImpl.getSplits(scan, manifest, regionInfos, restoreDir, conf); rtn.addAll(splits);
/** * Compute block locations for snapshot files (which will get the locations for referred hfiles) * only when localityEnabled is true. */ private static List<String> calculateLocationsForInputSplit(Configuration conf, TableDescriptor htd, HRegionInfo hri, Path tableDir, boolean localityEnabled) throws IOException { if (localityEnabled) { // care block locality return getBestLocations(conf, HRegion.computeHDFSBlocksDistribution(conf, htd, hri, tableDir)); } else { // do not care block locality return null; } }
@Test public void testGetBestLocations() throws IOException { TableSnapshotInputFormatImpl tsif = new TableSnapshotInputFormatImpl(); Configuration conf = UTIL.getConfiguration(); TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution)); TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution)); TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution)); TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution)); blockDistribution.addHostsAndBlockWeight(new String[] {"h4"}, 1); Assert.assertEquals(Lists.newArrayList("h1"), TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution)); TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution)); TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution)); TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
sp[i + 1])) { List<String> hosts = calculateLocationsForInputSplit(conf, htd, hri, tableDir, localityEnabled); hri.getStartKey(), hri.getEndKey())) { List<String> hosts = calculateLocationsForInputSplit(conf, htd, hri, tableDir, localityEnabled); splits.add(new InputSplit(htd, hri, hosts, scan, restoreDir));
@Override public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException { List<InputSplit> results = new ArrayList<>(); for (TableSnapshotInputFormatImpl.InputSplit split : TableSnapshotInputFormatImpl.getSplits(job.getConfiguration())) { results.add(new TableSnapshotRegionSplit(split)); } return results; }
public static List<InputSplit> getSplits(Configuration conf) throws IOException { String snapshotName = getSnapshotName(conf); Path rootDir = FSUtils.getRootDir(conf); FileSystem fs = rootDir.getFileSystem(conf); SnapshotManifest manifest = getSnapshotManifest(conf, snapshotName, rootDir, fs); List<HRegionInfo> regionInfos = getRegionInfosFromManifest(manifest); // TODO: mapred does not support scan as input API. Work around for now. Scan scan = extractScanFromConf(conf); // the temp dir where the snapshot is restored Path restoreDir = new Path(conf.get(RESTORE_DIR_KEY)); RegionSplitter.SplitAlgorithm splitAlgo = getSplitAlgo(conf); int numSplits = conf.getInt(NUM_SPLITS_PER_REGION, 1); return getSplits(scan, manifest, regionInfos, restoreDir, conf, splitAlgo, numSplits); }
/** * Configures the job to use TableSnapshotInputFormat to read from a snapshot. * @param conf the job to configuration * @param snapshotName the name of the snapshot to read from * @param restoreDir a temporary directory to restore the snapshot into. Current user should have * write permissions to this directory, and this should not be a subdirectory of rootdir. * After the job is finished, restoreDir can be deleted. * @throws IOException if an error occurs */ public static void setInput(Configuration conf, String snapshotName, Path restoreDir) throws IOException { setInput(conf, snapshotName, restoreDir, null, 1); }