/** * Use this before submitting a TableMap job. It will appropriately set up * the job. * * @param table The table name. * @param scan The scan with the columns to scan. * @param mapper The mapper class. * @param job The job configuration. * @throws IOException When setting up the job fails. */ @SuppressWarnings("rawtypes") public static void initJob(String table, Scan scan, Class<? extends TableMapper> mapper, Job job) throws IOException { TableMapReduceUtil.initTableMapperJob(table, scan, mapper, ImmutableBytesWritable.class, Result.class, job); }
/** * Use this before submitting a Multi TableMap job. It will appropriately set * up the job. * * @param scans The list of {@link Scan} objects to read from. * @param mapper The mapper class to use. * @param outputKeyClass The class of the output key. * @param outputValueClass The class of the output value. * @param job The current job to adjust. Make sure the passed job is carrying * all necessary HBase configuration. * @throws IOException When setting up the details fails. */ public static void initTableMapperJob(List<Scan> scans, Class<? extends TableMapper> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, Job job) throws IOException { initTableMapperJob(scans, mapper, outputKeyClass, outputValueClass, job, true); }
/** * Use this before submitting a TableMap job. It will appropriately set up * the job. * * @param table The table name to read from. * @param scan The scan instance with the columns, time range etc. * @param mapper The mapper class to use. * @param outputKeyClass The class of the output key. * @param outputValueClass The class of the output value. * @param job The current job to adjust. Make sure the passed job is * carrying all necessary HBase configuration. * @throws IOException When setting up the details fails. */ public static void initTableMapperJob(String table, Scan scan, Class<? extends TableMapper> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, Job job) throws IOException { initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass, job, true); }
/** * Use this before submitting a Multi TableMap job. It will appropriately set * up the job. * * @param scans The list of {@link Scan} objects to read from. * @param mapper The mapper class to use. * @param outputKeyClass The class of the output key. * @param outputValueClass The class of the output value. * @param job The current job to adjust. Make sure the passed job is carrying * all necessary HBase configuration. * @param addDependencyJars upload HBase jars and jars for any of the * configured job classes via the distributed cache (tmpjars). * @throws IOException When setting up the details fails. */ public static void initTableMapperJob(List<Scan> scans, Class<? extends TableMapper> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, Job job, boolean addDependencyJars) throws IOException { initTableMapperJob(scans, mapper, outputKeyClass, outputValueClass, job, addDependencyJars, true); }
/** * Use this before submitting a TableMap job. It will appropriately set up * the job. * * @param table The table name to read from. * @param scan The scan instance with the columns, time range etc. * @param mapper The mapper class to use. * @param outputKeyClass The class of the output key. * @param outputValueClass The class of the output value. * @param job The current job to adjust. Make sure the passed job is * carrying all necessary HBase configuration. * @param addDependencyJars upload HBase jars and jars for any of the configured * job classes via the distributed cache (tmpjars). * @throws IOException When setting up the details fails. */ public static void initTableMapperJob(String table, Scan scan, Class<? extends TableMapper> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, Job job, boolean addDependencyJars) throws IOException { initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass, job, addDependencyJars, TableInputFormat.class); }
/** * Use this before submitting a TableMap job. It will appropriately set up * the job. * * @param table Binary representation of the table name to read from. * @param scan The scan instance with the columns, time range etc. * @param mapper The mapper class to use. * @param outputKeyClass The class of the output key. * @param outputValueClass The class of the output value. * @param job The current job to adjust. Make sure the passed job is * carrying all necessary HBase configuration. * @throws IOException When setting up the details fails. */ public static void initTableMapperJob(byte[] table, Scan scan, Class<? extends TableMapper> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, Job job) throws IOException { initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass, outputValueClass, job, true); }
/** * Use this before submitting a TableMap job. It will appropriately set up * the job. * * @param table Binary representation of the table name to read from. * @param scan The scan instance with the columns, time range etc. * @param mapper The mapper class to use. * @param outputKeyClass The class of the output key. * @param outputValueClass The class of the output value. * @param job The current job to adjust. Make sure the passed job is * carrying all necessary HBase configuration. * @param addDependencyJars upload HBase jars and jars for any of the configured * job classes via the distributed cache (tmpjars). * @throws IOException When setting up the details fails. */ public static void initTableMapperJob(byte[] table, Scan scan, Class<? extends TableMapper> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, Job job, boolean addDependencyJars) throws IOException { initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass, outputValueClass, job, addDependencyJars, TableInputFormat.class); }
/** * Use this before submitting a TableMap job. It will appropriately set up * the job. * * @param table The table to be processed. * @param scan The scan with the columns etc. * @param groupColumns A space separated list of columns used to form the * key used in collect. * @param mapper The mapper class. * @param job The current job. * @throws IOException When setting up the job fails. */ @SuppressWarnings("unchecked") public static void initJob(String table, Scan scan, String groupColumns, Class<? extends TableMapper> mapper, Job job) throws IOException { TableMapReduceUtil.initTableMapperJob(table, scan, mapper, ImmutableBytesWritable.class, Result.class, job); job.getConfiguration().set(GROUP_COLUMNS, groupColumns); }
@Override protected void initJob(List<Scan> scans, Job job) throws IOException { TableMapReduceUtil.initTableMapperJob(scans, ScanMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job); } }
private void initCopyTableMapperReducerJob(Job job, Scan scan) throws IOException { Class<? extends TableMapper> mapper = bulkload ? CellImporter.class : Importer.class; if (readingSnapshot) { TableMapReduceUtil.initTableSnapshotMapperJob(snapshot, scan, mapper, null, null, job, true, generateUniqTempDir(true)); } else { TableMapReduceUtil.initTableMapperJob(tableName, scan, mapper, null, null, job); } }
public Job createSubmittableJob(String[] args) throws IOException { Path partitionsPath = new Path(destPath, PARTITIONS_FILE_NAME); generatePartitions(partitionsPath); Job job = Job.getInstance(getConf(), getConf().get("mapreduce.job.name", "hashTable_" + tableHash.tableName)); Configuration jobConf = job.getConfiguration(); jobConf.setLong(HASH_BATCH_SIZE_CONF_KEY, tableHash.batchSize); job.setJarByClass(HashTable.class); TableMapReduceUtil.initTableMapperJob(tableHash.tableName, tableHash.initScan(), HashMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job); // use a TotalOrderPartitioner and reducers to group region output into hash files job.setPartitionerClass(TotalOrderPartitioner.class); TotalOrderPartitioner.setPartitionFile(jobConf, partitionsPath); job.setReducerClass(Reducer.class); // identity reducer job.setNumReduceTasks(tableHash.numHashFiles); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(ImmutableBytesWritable.class); job.setOutputFormatClass(MapFileOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(destPath, HASH_DATA_DIR)); return job; }
private void runTestOnTable() throws IOException, InterruptedException, ClassNotFoundException { Job job = null; try { job = new Job(UTIL.getConfiguration(), "test123"); job.setOutputFormatClass(NullOutputFormat.class); job.setNumReduceTasks(0); Scan scan = new Scan(); scan.addColumn(FAMILY_NAME, COLUMN_NAME); scan.setTimeRange(MINSTAMP, MAXSTAMP); scan.setMaxVersions(); TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan, ProcessTimeRangeMapper.class, Text.class, Text.class, job); job.waitForCompletion(true); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { if (job != null) { FileUtil.fullyDelete( new File(job.getConfiguration().get("hadoop.tmp.dir"))); } } }
private int doVerify(Path outputDir, int numReducers) throws IOException, InterruptedException, ClassNotFoundException { job = new Job(getConf()); job.setJobName("Link Verifier"); job.setNumReduceTasks(numReducers); job.setJarByClass(getClass()); setJobScannerConf(job); Scan scan = new Scan(); scan.addColumn(FAMILY_NAME, COLUMN_PREV); scan.setCaching(10000); scan.setCacheBlocks(false); String[] split = labels.split(COMMA); scan.setAuthorizations(new Authorizations(split[this.labelIndex * 2], split[(this.labelIndex * 2) + 1])); TableMapReduceUtil.initTableMapperJob(tableName.getName(), scan, VerifyMapper.class, BytesWritable.class, BytesWritable.class, job); TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class); job.getConfiguration().setBoolean("mapreduce.map.speculative", false); job.setReducerClass(VerifyReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
protected void doVerify(Configuration conf, HTableDescriptor htd) throws Exception { Path outputDir = getTestDir(TEST_NAME, "verify-output"); LOG.info("Verify output dir: " + outputDir); Job job = Job.getInstance(conf); job.setJarByClass(this.getClass()); job.setJobName(TEST_NAME + " Verification for " + htd.getTableName()); setJobScannerConf(job); Scan scan = new Scan(); TableMapReduceUtil.initTableMapperJob( htd.getTableName().getNameAsString(), scan, VerifyMapper.class, BytesWritable.class, BytesWritable.class, job); TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), AbstractHBaseTool.class); int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING); TableMapReduceUtil.setScannerCaching(job, scannerCaching); job.setReducerClass(VerifyReducer.class); job.setNumReduceTasks(conf.getInt(NUM_REDUCE_TASKS_KEY, NUM_REDUCE_TASKS_DEFAULT)); FileOutputFormat.setOutputPath(job, outputDir); assertTrue(job.waitForCompletion(true)); long numOutputRecords = job.getCounters().findCounter(Counters.ROWS_WRITTEN).getValue(); assertEquals(0, numOutputRecords); }
private Job doVerify(Configuration conf, HTableDescriptor htd, String... auths) throws IOException, InterruptedException, ClassNotFoundException { Path outputDir = getTestDir(TEST_NAME, "verify-output"); Job job = new Job(conf); job.setJarByClass(this.getClass()); job.setJobName(TEST_NAME + " Verification for " + htd.getTableName()); setJobScannerConf(job); Scan scan = new Scan(); scan.setAuthorizations(new Authorizations(auths)); TableMapReduceUtil.initTableMapperJob(htd.getTableName().getNameAsString(), scan, VerifyMapper.class, NullWritable.class, NullWritable.class, job); TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), AbstractHBaseTool.class); int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING); TableMapReduceUtil.setScannerCaching(job, scannerCaching); job.setNumReduceTasks(0); FileOutputFormat.setOutputPath(job, outputDir); assertTrue(job.waitForCompletion(true)); return job; }
@Test public void testInitTableMapperJob2() throws Exception { Configuration configuration = new Configuration(); Job job = new Job(configuration, "tableName"); TableMapReduceUtil.initTableMapperJob(Bytes.toBytes("Table"), new Scan(), Import.Importer.class, Text.class, Text.class, job, false, WALInputFormat.class); assertEquals(WALInputFormat.class, job.getInputFormatClass()); assertEquals(Import.Importer.class, job.getMapperClass()); assertEquals(LongWritable.class, job.getOutputKeyClass()); assertEquals(Text.class, job.getOutputValueClass()); assertNull(job.getCombinerClass()); assertEquals("Table", job.getConfiguration().get(TableInputFormat.INPUT_TABLE)); }
/** * Tests Number of inputSplits for MR job when specify number of mappers for TableInputFormatXXX * This test does not run MR job */ protected void testNumOfSplits(int splitsPerRegion, int expectedNumOfSplits) throws IOException, InterruptedException, ClassNotFoundException { String jobName = "TestJobForNumOfSplits"; LOG.info("Before map/reduce startup - job " + jobName); Configuration c = new Configuration(TEST_UTIL.getConfiguration()); Scan scan = new Scan(); scan.addFamily(INPUT_FAMILYS[0]); scan.addFamily(INPUT_FAMILYS[1]); c.setInt("hbase.mapreduce.tableinput.mappers.per.region", splitsPerRegion); c.set(KEY_STARTROW, ""); c.set(KEY_LASTROW, ""); Job job = Job.getInstance(c, jobName); TableMapReduceUtil.initTableMapperJob(TABLE_NAME.getNameAsString(), scan, ScanMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job); TableInputFormat tif = new TableInputFormat(); tif.setConf(job.getConfiguration()); Assert.assertEquals(TABLE_NAME, table.getName()); List<InputSplit> splits = tif.getSplits(job); Assert.assertEquals(expectedNumOfSplits, splits.size()); }
@Test public void testInitTableMapperJob3() throws Exception { Configuration configuration = new Configuration(); Job job = new Job(configuration, "tableName"); TableMapReduceUtil.initTableMapperJob(Bytes.toBytes("Table"), new Scan(), Import.Importer.class, Text.class, Text.class, job); assertEquals(TableInputFormat.class, job.getInputFormatClass()); assertEquals(Import.Importer.class, job.getMapperClass()); assertEquals(LongWritable.class, job.getOutputKeyClass()); assertEquals(Text.class, job.getOutputValueClass()); assertNull(job.getCombinerClass()); assertEquals("Table", job.getConfiguration().get(TableInputFormat.INPUT_TABLE)); }
@Test public void testInitTableMapperJob4() throws Exception { Configuration configuration = new Configuration(); Job job = new Job(configuration, "tableName"); TableMapReduceUtil.initTableMapperJob(Bytes.toBytes("Table"), new Scan(), Import.Importer.class, Text.class, Text.class, job, false); assertEquals(TableInputFormat.class, job.getInputFormatClass()); assertEquals(Import.Importer.class, job.getMapperClass()); assertEquals(LongWritable.class, job.getOutputKeyClass()); assertEquals(Text.class, job.getOutputValueClass()); assertNull(job.getCombinerClass()); assertEquals("Table", job.getConfiguration().get(TableInputFormat.INPUT_TABLE)); } }
@Test public void testInitTableMapperJob1() throws Exception { Configuration configuration = new Configuration(); Job job = new Job(configuration, "tableName"); // test TableMapReduceUtil.initTableMapperJob("Table", new Scan(), Import.Importer.class, Text.class, Text.class, job, false, WALInputFormat.class); assertEquals(WALInputFormat.class, job.getInputFormatClass()); assertEquals(Import.Importer.class, job.getMapperClass()); assertEquals(LongWritable.class, job.getOutputKeyClass()); assertEquals(Text.class, job.getOutputValueClass()); assertNull(job.getCombinerClass()); assertEquals("Table", job.getConfiguration().get(TableInputFormat.INPUT_TABLE)); }