/** * Configure a MapReduce Job to perform an incremental load into the given * table. This * <ul> * <li>Inspects the table to configure a total order partitioner</li> * <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li> * <li>Sets the number of reduce tasks to match the current number of regions</li> * <li>Sets the output key/value class to match HFileOutputFormat2's requirements</li> * <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or * PutSortReducer)</li> * </ul> * The user should be sure to set the map output value class to either KeyValue or Put before * running this function. */ public static void configureIncrementalLoad(Job job, Table table, RegionLocator regionLocator) throws IOException { configureIncrementalLoad(job, table.getDescriptor(), regionLocator); }
/** * Configure a MapReduce Job to perform an incremental load into the given * table. This * <ul> * <li>Inspects the table to configure a total order partitioner</li> * <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li> * <li>Sets the number of reduce tasks to match the current number of regions</li> * <li>Sets the output key/value class to match HFileOutputFormat2's requirements</li> * <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or * PutSortReducer)</li> * </ul> * The user should be sure to set the map output value class to either KeyValue or Put before * running this function. */ public static void configureIncrementalLoad(Job job, TableDescriptor tableDescriptor, RegionLocator regionLocator) throws IOException { ArrayList<TableInfo> singleTableInfo = new ArrayList<>(); singleTableInfo.add(new TableInfo(tableDescriptor, regionLocator)); configureIncrementalLoad(job, singleTableInfo, HFileOutputFormat2.class); }
Table table = conn.getTable(tableName); RegionLocator regionLocator = conn.getRegionLocator(tableName)) { HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(), regionLocator);
Table table = conn.getTable(tableName); RegionLocator regionLocator = conn.getRegionLocator(tableName)) { HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(), regionLocator); job.setMapperClass(CellSortImporter.class); job.setReducerClass(CellReducer.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(MapReduceExtendedCell.class); HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(), regionLocator); TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), org.apache.hbase.thirdparty.com.google.common.base.Preconditions.class);
Path outputDir = new Path(hfileOutPath); FileOutputFormat.setOutputPath(job, outputDir); HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(), regionLocator);
HFileOutputFormat2.configureIncrementalLoad(job, htable, htable.getRegionLocator());
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void testJobConfiguration() throws Exception { Configuration conf = new Configuration(this.util.getConfiguration()); conf.set(HConstants.TEMPORARY_FS_DIRECTORY_KEY, util.getDataTestDir("testJobConfiguration") .toString()); Job job = new Job(conf); job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration")); Table table = Mockito.mock(Table.class); RegionLocator regionLocator = Mockito.mock(RegionLocator.class); setupMockStartKeys(regionLocator); setupMockTableName(regionLocator); HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator); assertEquals(job.getNumReduceTasks(), 4); }
private void runIncrementalPELoad(Configuration conf, List<HFileOutputFormat2.TableInfo> tableInfo, Path outDir, boolean putSortReducer) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(conf, "testLocalMRIncrementalLoad"); job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad")); job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), CellSerialization.class.getName()); setupRandomGeneratorMapper(job, putSortReducer); if (tableInfo.size() > 1) { MultiTableHFileOutputFormat.configureIncrementalLoad(job, tableInfo); int sum = 0; for (HFileOutputFormat2.TableInfo tableInfoSingle : tableInfo) { sum += tableInfoSingle.getRegionLocator().getAllRegionLocations().size(); } assertEquals(sum, job.getNumReduceTasks()); } else { RegionLocator regionLocator = tableInfo.get(0).getRegionLocator(); HFileOutputFormat2.configureIncrementalLoad(job, tableInfo.get(0).getHTableDescriptor(), regionLocator); assertEquals(regionLocator.getAllRegionLocations().size(), job.getNumReduceTasks()); } FileOutputFormat.setOutputPath(job, outDir); assertFalse(util.getTestFileSystem().exists(outDir)) ; assertTrue(job.waitForCompletion(true)); }
HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings")); setupRandomGeneratorMapper(job, false); HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator); FileOutputFormat.setOutputPath(job, dir); context = createTestTaskAttemptContext(job);
/** * Submits the job and waits for completion. * @param job * @param outputPath * @return * @throws Exception */ private Job configureRunnableJobUsingBulkLoad(Job job, Path outputPath) throws Exception { job.setMapperClass(PhoenixIndexImportMapper.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(KeyValue.class); final Configuration configuration = job.getConfiguration(); final String physicalIndexTable = PhoenixConfigurationUtil.getPhysicalTableName(configuration); org.apache.hadoop.hbase.client.Connection conn = ConnectionFactory.createConnection(configuration); TableName tablename = TableName.valueOf(physicalIndexTable); HFileOutputFormat2.configureIncrementalLoad(job, conn.getTable(tablename),conn.getRegionLocator(tablename)); return job; }
/** * Configure a MapReduce Job to perform an incremental load into the given * table. This * <ul> * <li>Inspects the table to configure a total order partitioner</li> * <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li> * <li>Sets the number of reduce tasks to match the current number of regions</li> * <li>Sets the output key/value class to match HFileOutputFormat2's requirements</li> * <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or * PutSortReducer)</li> * </ul> * The user should be sure to set the map output value class to either KeyValue or Put before * running this function. */ public static void configureIncrementalLoad(Job job, Table table, RegionLocator regionLocator) throws IOException { configureIncrementalLoad(job, table.getDescriptor(), regionLocator); }
/** * Configure a MapReduce Job to perform an incremental load into the given * table. This * <ul> * <li>Inspects the table to configure a total order partitioner</li> * <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li> * <li>Sets the number of reduce tasks to match the current number of regions</li> * <li>Sets the output key/value class to match HFileOutputFormat2's requirements</li> * <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or * PutSortReducer)</li> * </ul> * The user should be sure to set the map output value class to either KeyValue or Put before * running this function. */ public static void configureIncrementalLoad(Job job, Table table, RegionLocator regionLocator) throws IOException { configureIncrementalLoad(job, table.getDescriptor(), regionLocator); }
/** * Configure a MapReduce Job to perform an incremental load into the given * table. This * <ul> * <li>Inspects the table to configure a total order partitioner</li> * <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li> * <li>Sets the number of reduce tasks to match the current number of regions</li> * <li>Sets the output key/value class to match HFileOutputFormat2's requirements</li> * <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or * PutSortReducer)</li> * </ul> * The user should be sure to set the map output value class to either KeyValue or Put before * running this function. */ public static void configureIncrementalLoad(Job job, TableDescriptor tableDescriptor, RegionLocator regionLocator) throws IOException { ArrayList<TableInfo> singleTableInfo = new ArrayList<>(); singleTableInfo.add(new TableInfo(tableDescriptor, regionLocator)); configureIncrementalLoad(job, singleTableInfo, HFileOutputFormat2.class); }
/** * Submits the job and waits for completion. * @param job * @param outputPath * @return * @throws Exception */ private Job configureRunnableJobUsingBulkLoad(Job job, Path outputPath) throws Exception { job.setMapperClass(PhoenixIndexImportMapper.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(KeyValue.class); final Configuration configuration = job.getConfiguration(); final String physicalIndexTable = PhoenixConfigurationUtil.getPhysicalTableName(configuration); org.apache.hadoop.hbase.client.Connection conn = ConnectionFactory.createConnection(configuration); TableName tablename = TableName.valueOf(physicalIndexTable); HFileOutputFormat2.configureIncrementalLoad(job, conn.getTable(tablename),conn.getRegionLocator(tablename)); return job; }
/** * Submits the job and waits for completion. * @param job * @param outputPath * @return * @throws Exception */ private Job configureRunnableJobUsingBulkLoad(Job job, Path outputPath) throws Exception { job.setMapperClass(PhoenixIndexImportMapper.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(KeyValue.class); final Configuration configuration = job.getConfiguration(); final String physicalIndexTable = PhoenixConfigurationUtil.getPhysicalTableName(configuration); org.apache.hadoop.hbase.client.Connection conn = ConnectionFactory.createConnection(configuration); TableName tablename = TableName.valueOf(physicalIndexTable); HFileOutputFormat2.configureIncrementalLoad(job, conn.getTable(tablename),conn.getRegionLocator(tablename)); return job; }
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void testJobConfiguration() throws Exception { Configuration conf = new Configuration(this.util.getConfiguration()); conf.set(HConstants.TEMPORARY_FS_DIRECTORY_KEY, util.getDataTestDir("testJobConfiguration") .toString()); Job job = new Job(conf); job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration")); Table table = Mockito.mock(Table.class); RegionLocator regionLocator = Mockito.mock(RegionLocator.class); setupMockStartKeys(regionLocator); setupMockTableName(regionLocator); HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator); assertEquals(job.getNumReduceTasks(), 4); }
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void testJobConfiguration() throws Exception { Configuration conf = new Configuration(this.util.getConfiguration()); conf.set(HConstants.TEMPORARY_FS_DIRECTORY_KEY, util.getDataTestDir("testJobConfiguration") .toString()); Job job = new Job(conf); job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration")); Table table = Mockito.mock(Table.class); RegionLocator regionLocator = Mockito.mock(RegionLocator.class); setupMockStartKeys(regionLocator); setupMockTableName(regionLocator); HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator); assertEquals(job.getNumReduceTasks(), 4); }
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void testJobConfiguration() throws Exception { Configuration conf = new Configuration(this.util.getConfiguration()); conf.set(HConstants.TEMPORARY_FS_DIRECTORY_KEY, util.getDataTestDir("testJobConfiguration") .toString()); Job job = new Job(conf); job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration")); Table table = Mockito.mock(Table.class); RegionLocator regionLocator = Mockito.mock(RegionLocator.class); setupMockStartKeys(regionLocator); setupMockTableName(regionLocator); HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator); assertEquals(job.getNumReduceTasks(), 4); }
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void testJobConfiguration() throws Exception { Configuration conf = new Configuration(this.util.getConfiguration()); conf.set(HConstants.TEMPORARY_FS_DIRECTORY_KEY, util.getDataTestDir("testJobConfiguration") .toString()); Job job = new Job(conf); job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration")); Table table = Mockito.mock(Table.class); RegionLocator regionLocator = Mockito.mock(RegionLocator.class); setupMockStartKeys(regionLocator); setupMockTableName(regionLocator); HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator); assertEquals(job.getNumReduceTasks(), 4); }