/** * Ensures that the given number of reduce tasks for the given job * configuration does not exceed the number of regions for the given table. * * @param table The table to get the region count for. * @param job The current job to adjust. * @throws IOException When retrieving the table details fails. */ public static void limitNumReduceTasks(String table, Job job) throws IOException { int regions = MetaTableAccessor.getRegionCount(job.getConfiguration(), TableName.valueOf(table)); if (job.getNumReduceTasks() > regions) job.setNumReduceTasks(regions); }
job.setPartitionerClass(HRegionPartitioner.class); int regions = MetaTableAccessor.getRegionCount(conf, TableName.valueOf(table)); if (job.getNumReduceTasks() > regions) { job.setNumReduceTasks(regions);
private void setSchema(Job job, Schema keySchema, Schema valSchema) { boolean isMaponly = job.getNumReduceTasks() == 0; if (keySchema != null) { if (isMaponly) AvroJob.setMapOutputKeySchema(job, keySchema); else AvroJob.setOutputKeySchema(job, keySchema); } if (valSchema != null) { if (isMaponly) AvroJob.setMapOutputValueSchema(job, valSchema); else AvroJob.setOutputValueSchema(job, valSchema); } }
private void runIncrementalPELoad(Configuration conf, List<HFileOutputFormat2.TableInfo> tableInfo, Path outDir, boolean putSortReducer) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(conf, "testLocalMRIncrementalLoad"); job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad")); job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), CellSerialization.class.getName()); setupRandomGeneratorMapper(job, putSortReducer); if (tableInfo.size() > 1) { MultiTableHFileOutputFormat.configureIncrementalLoad(job, tableInfo); int sum = 0; for (HFileOutputFormat2.TableInfo tableInfoSingle : tableInfo) { sum += tableInfoSingle.getRegionLocator().getAllRegionLocations().size(); } assertEquals(sum, job.getNumReduceTasks()); } else { RegionLocator regionLocator = tableInfo.get(0).getRegionLocator(); HFileOutputFormat2.configureIncrementalLoad(job, tableInfo.get(0).getHTableDescriptor(), regionLocator); assertEquals(regionLocator.getAllRegionLocations().size(), job.getNumReduceTasks()); } FileOutputFormat.setOutputPath(job, outDir); assertFalse(util.getTestFileSystem().exists(outDir)) ; assertTrue(job.waitForCompletion(true)); }
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void testJobConfiguration() throws Exception { Configuration conf = new Configuration(this.util.getConfiguration()); conf.set(HConstants.TEMPORARY_FS_DIRECTORY_KEY, util.getDataTestDir("testJobConfiguration") .toString()); Job job = new Job(conf); job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration")); Table table = Mockito.mock(Table.class); RegionLocator regionLocator = Mockito.mock(RegionLocator.class); setupMockStartKeys(regionLocator); setupMockTableName(regionLocator); HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator); assertEquals(job.getNumReduceTasks(), 4); }
protected void setNumOfReducersAndSplitSizes() throws IOException { long inputSize = getInputSize(); int maxFiles = job.getConfiguration().getInt(MAX_FILES, DEFAULT_MAX_FILES); int numTasks = Math.min((int) (inputSize / targetFileSize) + 1, maxFiles); if (job.getNumReduceTasks() != 0) { determineAndSetNumOfReducers(numTasks); } else { setSplitSizes(inputSize / numTasks); } }
final InputFormat inf = ReflectionUtils.newInstance(job.getInputFormatClass(), conf); int numPartitions = job.getNumReduceTasks(); K[] samples = (K[])sampler.getSample(inf, job); LOG.info("Using " + samples.length + " samples");
/** * Is this a map-only job? * * @param job the job * @param jobField the field to reflect for annotations * @return <code>true</code> if map only, <code>false</code> otherwise. */ protected boolean isMapOnlyJob(Job job, Field jobField) { if (job.getNumReduceTasks() > 0) { return false; } // See if we have a ReducerInfo annotation - otherwise // we'll consider this a "map only" job return !jobField.isAnnotationPresent(ReducerInfo.class); }
@Override protected void setupReducer(final Job job) { job.setReducerClass(IngestReducer.class); if (job.getNumReduceTasks() <= 1) { // the default is one reducer, if its only one, set it to 8 as the // default job.setNumReduceTasks(8); } } }
/** * Ensures that the given number of reduce tasks for the given job * configuration does not exceed the number of regions for the given table. * * @param table The table to get the region count for. * @param job The current job to adjust. * @throws IOException When retrieving the table details fails. */ public static void limitNumReduceTasks(String table, Job job) throws IOException { int regions = MetaTableAccessor.getRegionCount(job.getConfiguration(), TableName.valueOf(table)); if (job.getNumReduceTasks() > regions) job.setNumReduceTasks(regions); }
/** * Ensures that the given number of reduce tasks for the given job * configuration does not exceed the number of regions for the given table. * * @param table The table to get the region count for. * @param job The current job to adjust. * @throws IOException When retrieving the table details fails. */ public static void limitNumReduceTasks(String table, Job job) throws IOException { int regions = MetaTableAccessor.getRegionCount(job.getConfiguration(), TableName.valueOf(table)); if (job.getNumReduceTasks() > regions) job.setNumReduceTasks(regions); }
/** * Ensures that the given number of reduce tasks for the given job * configuration does not exceed the number of regions for the given table. * * @param table The table to get the region count for. * @param job The current job to adjust. * @throws IOException When retrieving the table details fails. */ public static void limitNumReduceTasks(String table, Job job) throws IOException { int regions = MetaTableAccessor.getRegionCount(job.getConfiguration(), TableName.valueOf(table)); if (job.getNumReduceTasks() > regions) job.setNumReduceTasks(regions); }
/** * Ensures that the given number of reduce tasks for the given job * configuration does not exceed the number of regions for the given table. * * @param table The table to get the region count for. * @param job The current job to adjust. * @throws IOException When retrieving the table details fails. */ public static void limitNumReduceTasks(String table, Job job) throws IOException { HTable outputTable = new HTable(job.getConfiguration(), table); int regions = outputTable.getRegionsInfo().size(); if (job.getNumReduceTasks() > regions) job.setNumReduceTasks(regions); }
/** * Ensures that the given number of reduce tasks for the given job * configuration does not exceed the number of regions for the given table. * * @param table * The table to get the region count for. * @param job * The current job to adjust. * @throws IOException * When retrieving the table details fails. */ public static void limitNumReduceTasks(String table, Job job) throws IOException { HTable outputTable = new HTable(job.getConfiguration(), table); int regions = outputTable.getRegionLocations().size(); if (job.getNumReduceTasks() > regions) job.setNumReduceTasks(regions); }
public static void log(Job job, Logger log) throws ClassNotFoundException { log.debug ("{} -> {} ({}, {}) -> {}#{} ({}, {}) -> {}", new Object[]{ job.getInputFormatClass().getSimpleName(), job.getMapperClass().getSimpleName(), job.getMapOutputKeyClass().getSimpleName(), job.getMapOutputValueClass().getSimpleName(), job.getReducerClass().getSimpleName(), job.getNumReduceTasks(), job.getOutputKeyClass().getSimpleName(), job.getOutputValueClass().getSimpleName(), job.getOutputFormatClass().getSimpleName() } ); Path[] inputs = FileInputFormat.getInputPaths(job); Path output = FileOutputFormat.getOutputPath(job); log.debug("input: {}", inputs[0]); log.debug("output: {}", output); }
private void setSchema(Job job, Schema keySchema, Schema valSchema) { boolean isMaponly = job.getNumReduceTasks() == 0; if (keySchema != null) { if (isMaponly) AvroJob.setMapOutputKeySchema(job, keySchema); else AvroJob.setOutputKeySchema(job, keySchema); } if (valSchema != null) { if (isMaponly) AvroJob.setMapOutputValueSchema(job, valSchema); else AvroJob.setOutputValueSchema(job, valSchema); } }
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void testJobConfiguration() throws Exception { Configuration conf = new Configuration(this.util.getConfiguration()); conf.set(HConstants.TEMPORARY_FS_DIRECTORY_KEY, util.getDataTestDir("testJobConfiguration") .toString()); Job job = new Job(conf); job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration")); Table table = Mockito.mock(Table.class); RegionLocator regionLocator = Mockito.mock(RegionLocator.class); setupMockStartKeys(regionLocator); setupMockTableName(regionLocator); HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator); assertEquals(job.getNumReduceTasks(), 4); }
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void testJobConfiguration() throws Exception { Configuration conf = new Configuration(this.util.getConfiguration()); conf.set(HConstants.TEMPORARY_FS_DIRECTORY_KEY, util.getDataTestDir("testJobConfiguration") .toString()); Job job = new Job(conf); job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration")); Table table = Mockito.mock(Table.class); RegionLocator regionLocator = Mockito.mock(RegionLocator.class); setupMockStartKeys(regionLocator); setupMockTableName(regionLocator); HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator); assertEquals(job.getNumReduceTasks(), 4); }
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void testJobConfiguration() throws Exception { Configuration conf = new Configuration(this.util.getConfiguration()); conf.set(HConstants.TEMPORARY_FS_DIRECTORY_KEY, util.getDataTestDir("testJobConfiguration") .toString()); Job job = new Job(conf); job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration")); Table table = Mockito.mock(Table.class); RegionLocator regionLocator = Mockito.mock(RegionLocator.class); setupMockStartKeys(regionLocator); setupMockTableName(regionLocator); HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator); assertEquals(job.getNumReduceTasks(), 4); }
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void testJobConfiguration() throws Exception { Configuration conf = new Configuration(this.util.getConfiguration()); conf.set(HConstants.TEMPORARY_FS_DIRECTORY_KEY, util.getDataTestDir("testJobConfiguration") .toString()); Job job = new Job(conf); job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration")); Table table = Mockito.mock(Table.class); RegionLocator regionLocator = Mockito.mock(RegionLocator.class); setupMockStartKeys(regionLocator); setupMockTableName(regionLocator); HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator); assertEquals(job.getNumReduceTasks(), 4); }