setColumns(job); final RecordReader<ImmutableBytesWritable, Result> rr = delegate.getRecordReader(((HBaseSplit) split).getSnapshotSplit(), job, reporter);
@Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { setColumns(job); // hive depends on FileSplits, so wrap in HBaseSplit Path[] tablePaths = FileInputFormat.getInputPaths(job); InputSplit [] results = delegate.getSplits(job, numSplits); for (int i = 0; i < results.length; i++) { results[i] = new HBaseSplit(results[i], tablePaths[0]); } return results; }
/** * Sets up the job for reading from a table snapshot. It bypasses hbase servers * and read directly from snapshot files. * * @param snapshotName The name of the snapshot (of a table) to read from. * @param columns The columns to scan. * @param mapper The mapper class to use. * @param outputKeyClass The class of the output key. * @param outputValueClass The class of the output value. * @param job The current job to adjust. Make sure the passed job is * carrying all necessary HBase configuration. * @param addDependencyJars upload HBase jars and jars for any of the configured * job classes via the distributed cache (tmpjars). * @param tmpRestoreDir a temporary directory to copy the snapshot files into. Current user should * have write permissions to this directory, and this should not be a subdirectory of rootdir. * After the job is finished, restore directory can be deleted. * @throws IOException When setting up the details fails. * @see TableSnapshotInputFormat */ public static void initTableSnapshotMapJob(String snapshotName, String columns, Class<? extends TableMap> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, JobConf job, boolean addDependencyJars, Path tmpRestoreDir) throws IOException { TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir); initTableMapJob(snapshotName, columns, mapper, outputKeyClass, outputValueClass, job, addDependencyJars, TableSnapshotInputFormat.class); org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.resetCacheConfig(job); }
private void verifyWithMockedMapReduce(JobConf job, int numRegions, int expectedNumSplits, byte[] startRow, byte[] stopRow) throws IOException, InterruptedException { TableSnapshotInputFormat tsif = new TableSnapshotInputFormat(); InputSplit[] splits = tsif.getSplits(job, 0); RecordReader<ImmutableBytesWritable, Result> rr = tsif.getRecordReader(split, job, reporter);
private void verifyWithMockedMapReduce(JobConf job, int numRegions, int expectedNumSplits, byte[] startRow, byte[] stopRow) throws IOException, InterruptedException { TableSnapshotInputFormat tsif = new TableSnapshotInputFormat(); InputSplit[] splits = tsif.getSplits(job, 0); RecordReader<ImmutableBytesWritable, Result> rr = tsif.getRecordReader(split, job, reporter);
private void verifyWithMockedMapReduce(JobConf job, int numRegions, int expectedNumSplits, byte[] startRow, byte[] stopRow) throws IOException, InterruptedException { TableSnapshotInputFormat tsif = new TableSnapshotInputFormat(); InputSplit[] splits = tsif.getSplits(job, 0); RecordReader<ImmutableBytesWritable, Result> rr = tsif.getRecordReader(split, job, reporter);
int numSplitsPerRegion) throws IOException { TableSnapshotInputFormat.setInput(jobConf, snapshotName, tmpRestoreDir, splitAlgo, numSplitsPerRegion); initTableMapJob(snapshotName, columns, mapper, outputKeyClass, outputValueClass, jobConf,
@Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { setColumns(job); // hive depends on FileSplits, so wrap in HBaseSplit Path[] tablePaths = FileInputFormat.getInputPaths(job); InputSplit [] results = delegate.getSplits(job, numSplits); for (int i = 0; i < results.length; i++) { results[i] = new HBaseSplit(results[i], tablePaths[0]); } return results; }
setColumns(job); final RecordReader<ImmutableBytesWritable, Result> rr = delegate.getRecordReader(((HBaseSplit) split).getSnapshotSplit(), job, reporter);
/** * Sets up the job for reading from a table snapshot. It bypasses hbase servers * and read directly from snapshot files. * * @param snapshotName The name of the snapshot (of a table) to read from. * @param columns The columns to scan. * @param mapper The mapper class to use. * @param outputKeyClass The class of the output key. * @param outputValueClass The class of the output value. * @param job The current job to adjust. Make sure the passed job is * carrying all necessary HBase configuration. * @param addDependencyJars upload HBase jars and jars for any of the configured * job classes via the distributed cache (tmpjars). * @param tmpRestoreDir a temporary directory to copy the snapshot files into. Current user should * have write permissions to this directory, and this should not be a subdirectory of rootdir. * After the job is finished, restore directory can be deleted. * @throws IOException When setting up the details fails. * @see TableSnapshotInputFormat */ public static void initTableSnapshotMapJob(String snapshotName, String columns, Class<? extends TableMap> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, JobConf job, boolean addDependencyJars, Path tmpRestoreDir) throws IOException { TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir); initTableMapJob(snapshotName, columns, mapper, outputKeyClass, outputValueClass, job, addDependencyJars, TableSnapshotInputFormat.class); org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.resetCacheConfig(job); }
/** * Sets up the job for reading from a table snapshot. It bypasses hbase servers * and read directly from snapshot files. * * @param snapshotName The name of the snapshot (of a table) to read from. * @param columns The columns to scan. * @param mapper The mapper class to use. * @param outputKeyClass The class of the output key. * @param outputValueClass The class of the output value. * @param job The current job to adjust. Make sure the passed job is * carrying all necessary HBase configuration. * @param addDependencyJars upload HBase jars and jars for any of the configured * job classes via the distributed cache (tmpjars). * @param tmpRestoreDir a temporary directory to copy the snapshot files into. Current user should * have write permissions to this directory, and this should not be a subdirectory of rootdir. * After the job is finished, restore directory can be deleted. * @throws IOException When setting up the details fails. * @see TableSnapshotInputFormat */ public static void initTableSnapshotMapJob(String snapshotName, String columns, Class<? extends TableMap> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, JobConf job, boolean addDependencyJars, Path tmpRestoreDir) throws IOException { TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir); initTableMapJob(snapshotName, columns, mapper, outputKeyClass, outputValueClass, job, addDependencyJars, TableSnapshotInputFormat.class); org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.resetCacheConfig(job); }
/** * Sets up the job for reading from a table snapshot. It bypasses hbase servers * and read directly from snapshot files. * * @param snapshotName The name of the snapshot (of a table) to read from. * @param columns The columns to scan. * @param mapper The mapper class to use. * @param outputKeyClass The class of the output key. * @param outputValueClass The class of the output value. * @param job The current job to adjust. Make sure the passed job is * carrying all necessary HBase configuration. * @param addDependencyJars upload HBase jars and jars for any of the configured * job classes via the distributed cache (tmpjars). * @param tmpRestoreDir a temporary directory to copy the snapshot files into. Current user should * have write permissions to this directory, and this should not be a subdirectory of rootdir. * After the job is finished, restore directory can be deleted. * @throws IOException When setting up the details fails. * @see TableSnapshotInputFormat */ public static void initTableSnapshotMapJob(String snapshotName, String columns, Class<? extends TableMap> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, JobConf job, boolean addDependencyJars, Path tmpRestoreDir) throws IOException { TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir); initTableMapJob(snapshotName, columns, mapper, outputKeyClass, outputValueClass, job, addDependencyJars, TableSnapshotInputFormat.class); org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.resetCacheConfig(job); }
int numSplitsPerRegion) throws IOException { TableSnapshotInputFormat.setInput(jobConf, snapshotName, tmpRestoreDir, splitAlgo, numSplitsPerRegion); initTableMapJob(snapshotName, columns, mapper, outputKeyClass, outputValueClass, jobConf,
int numSplitsPerRegion) throws IOException { TableSnapshotInputFormat.setInput(jobConf, snapshotName, tmpRestoreDir, splitAlgo, numSplitsPerRegion); initTableMapJob(snapshotName, columns, mapper, outputKeyClass, outputValueClass, jobConf,