Refine search
conf.setStrings(TABLES_KEY, tables); conf.setStrings(TABLE_MAP_KEY, tableMap); conf.set(FileInputFormat.INPUT_DIR, inputDirs); Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + System.currentTimeMillis())); job.setJarByClass(WALPlayer.class); job.setInputFormatClass(WALInputFormat.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), org.apache.hbase.thirdparty.com.google.common.base.Preconditions.class); } else { job.setMapperClass(WALMapper.class); job.setOutputFormatClass(MultiTableOutputFormat.class); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.initCredentials(job); job.setNumReduceTasks(0); TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), Class.forName(codecCls)); } catch (Exception e) {
public Job createSubmittableJob(String[] args) throws IOException { FileSystem fs = sourceHashDir.getFileSystem(getConf()); if (!fs.exists(sourceHashDir)) { throw new IOException("Source hash dir not found: " + sourceHashDir); Path dataDir = new Path(sourceHashDir, HashTable.HASH_DATA_DIR); int dataSubdirCount = 0; for (FileStatus file : fs.listStatus(dataDir)) { if (file.getPath().getName().startsWith(HashTable.OUTPUT_DATA_FILE_PREFIX)) { dataSubdirCount++; jobConf.setBoolean(DO_PUTS_CONF_KEY, doPuts); TableMapReduceUtil.initTableMapperJob(targetTableName, tableHash.initScan(), SyncMapper.class, null, null, job); TableMapReduceUtil.initTableReducerJob(targetTableName, null, job, null, targetZkCluster, null, null); Configuration peerConf = HBaseConfiguration.createClusterConf(job.getConfiguration(), sourceZkCluster); TableMapReduceUtil.initCredentialsForCluster(job, peerConf);
Class<? extends InputFormat> inputFormatClass) throws IOException { job.setInputFormatClass(inputFormatClass); if (outputValueClass != null) job.setMapOutputValueClass(outputValueClass); if (outputKeyClass != null) job.setMapOutputKeyClass(outputKeyClass); job.setMapperClass(mapper); if (Put.class.equals(outputValueClass)) { Configuration conf = job.getConfiguration(); HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf)); conf.set(TableInputFormat.INPUT_TABLE, table); conf.set(TableInputFormat.SCAN, convertScanToString(scan)); conf.setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), CellSerialization.class.getName()); if (addDependencyJars) { addDependencyJars(job); initCredentials(job);
private void initCopyTableMapperReducerJob(Job job, Scan scan) throws IOException { Class<? extends TableMapper> mapper = bulkload ? CellImporter.class : Importer.class; if (readingSnapshot) { TableMapReduceUtil.initTableSnapshotMapperJob(snapshot, scan, mapper, null, null, job, true, generateUniqTempDir(true)); } else { TableMapReduceUtil.initTableMapperJob(tableName, scan, mapper, null, null, job); } }
boolean addDependencyJars, boolean initCredentials) throws IOException { job.setInputFormatClass(MultiTableInputFormat.class); if (outputValueClass != null) { job.setMapOutputValueClass(outputValueClass); job.setMapOutputKeyClass(outputKeyClass); job.setMapperClass(mapper); scanStrings.add(convertScanToString(scan)); job.getConfiguration().setStrings(MultiTableInputFormat.SCANS, scanStrings.toArray(new String[scanStrings.size()])); addDependencyJars(job); initCredentials(job);
throws IOException, ClassNotFoundException { Job job = null; boolean isDryRun = conf.getBoolean(DRY_RUN_CONF_KEY, false); try (Connection connection = ConnectionFactory.createConnection(conf)) { try (Admin admin = connection.getAdmin()) { String actualSeparator = conf.get(SEPARATOR_CONF_KEY); if (actualSeparator != null) { conf.set(SEPARATOR_CONF_KEY, Bytes.toString(Base64.getEncoder().encode(actualSeparator.getBytes()))); Class mapperClass = mapperClassName != null? Class.forName(mapperClassName): DEFAULT_MAPPER; Path outputDir = new Path(hfileOutPath); FileOutputFormat.setOutputPath(job, outputDir); HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(), regionLocator); TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job); job.setNumReduceTasks(0); CellSerialization.class.getName()); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), org.apache.hbase.thirdparty.com.google.common.base.Function.class /* Guava used by TsvParser */);
TableName tableName = TableName.valueOf(args[0]); conf.set(TABLE_NAME, tableName.getNameAsString()); Path inputDir = new Path(args[1]); Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName)); job.setJarByClass(Importer.class); FileInputFormat.setInputPaths(job, inputDir); job.setInputFormatClass(SequenceFileInputFormat.class); String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY); TableMapReduceUtil.addDependencyJarsForClasses(conf, filter); Path outputDir = new Path(hfileOutPath); FileOutputFormat.setOutputPath(job, outputDir); RawComparator.class); Path partitionsPath = new Path(TotalOrderPartitioner.getPartitionFile(job.getConfiguration())); TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), org.apache.hbase.thirdparty.com.google.common.base.Preconditions.class); FileOutputFormat.setOutputPath(job, outputDir); HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(), regionLocator); TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), org.apache.hbase.thirdparty.com.google.common.base.Preconditions.class); TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job);
return null; Path restoreDir = new Path(peerSnapshotTmpDir, UUID.randomUUID().toString()); peerSnapshotTmpDir = restoreDir.toString(); conf.set(NAME + ".peerSnapshotTmpDir", peerSnapshotTmpDir); Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName)); job.setJarByClass(VerifyReplication.class); Path snapshotTempPath = new Path(sourceSnapshotTmpDir); LOG.info( "Using source snapshot-" + sourceSnapshotName + " with temp dir:" + sourceSnapshotTmpDir); TableMapReduceUtil.initTableSnapshotMapperJob(sourceSnapshotName, scan, Verifier.class, null, null, job, true, snapshotTempPath); restoreSnapshotForPeerCluster(conf, peerQuorumAddress); } else { TableMapReduceUtil.initTableMapperJob(tableName, scan, Verifier.class, null, null, job); TableMapReduceUtil.initCredentialsForCluster(job, peerClusterConf); job.setOutputFormatClass(NullOutputFormat.class); job.setNumReduceTasks(0); return job;
private void doMapReduce(final Class<? extends Test> cmd) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = getConf(); Path inputDir = writeInputFile(conf); conf.set(EvaluationMapTask.CMD_KEY, cmd.getName()); conf.set(EvaluationMapTask.PE_KEY, getClass().getName()); Job job = Job.getInstance(conf); job.setJarByClass(PerformanceEvaluation.class); job.setJobName("HBase Performance Evaluation"); job.setInputFormatClass(PeInputFormat.class); PeInputFormat.setInputPaths(job, inputDir); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(EvaluationMapTask.class); job.setReducerClass(LongSumReducer.class); job.setNumReduceTasks(1); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs")); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.initCredentials(job); job.waitForCompletion(true); }
/** * Sets up the actual job. * * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { String tableName = args[0]; Path outputDir = new Path(args[1]); String reportSeparatorString = (args.length > 2) ? args[2]: ":"; conf.set("ReportSeparator", reportSeparatorString); Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName)); job.setJarByClass(CellCounter.class); Scan scan = getConfiguredScanForJob(conf, args); TableMapReduceUtil.initTableMapperJob(tableName, scan, CellCounterMapper.class, ImmutableBytesWritable.class, Result.class, job); job.setNumReduceTasks(1); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(job, outputDir); job.setReducerClass(IntSumReducer.class); return job; }
String inputDirs = args[0]; String tabName = args[1]; conf.setStrings(TABLES_KEY, tabName); conf.set(FileInputFormat.INPUT_DIR, inputDirs); Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + EnvironmentEdgeManager.currentTime())); job.setJarByClass(MapReduceHFileSplitterJob.class); job.setInputFormatClass(HFileInputFormat.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY); if (hfileOutPath != null) { LOG.debug("add incremental job :" + hfileOutPath + " from " + inputDirs); TableName tableName = TableName.valueOf(tabName); job.setMapperClass(HFileCellMapper.class); job.setReducerClass(CellSortReducer.class); Path outputDir = new Path(hfileOutPath); FileOutputFormat.setOutputPath(job, outputDir); job.setMapOutputValueClass(MapReduceExtendedCell.class); try (Connection conn = ConnectionFactory.createConnection(conf); TableMapReduceUtil.addDependencyJars(job.getConfiguration(), org.apache.hbase.thirdparty.com.google.common.base.Preconditions.class); } else {
/** * Tests an MR Scan initialized from properties set in the Configuration. */ protected void testScanFromConfiguration(String start, String stop, String last) throws IOException, InterruptedException, ClassNotFoundException { String jobName = "ScanFromConfig" + (start != null ? start.toUpperCase(Locale.ROOT) : "Empty") + "To" + (stop != null ? stop.toUpperCase(Locale.ROOT) : "Empty"); Configuration c = new Configuration(TEST_UTIL.getConfiguration()); c.set(TableInputFormat.INPUT_TABLE, TABLE_NAME.getNameAsString()); c.set(TableInputFormat.SCAN_COLUMN_FAMILY, Bytes.toString(INPUT_FAMILYS[0]) + ", " + Bytes.toString(INPUT_FAMILYS[1])); c.set(KEY_STARTROW, start != null ? start : ""); c.set(KEY_LASTROW, last != null ? last : ""); if (start != null) { c.set(TableInputFormat.SCAN_ROW_START, start); } if (stop != null) { c.set(TableInputFormat.SCAN_ROW_STOP, stop); } Job job = Job.getInstance(c, jobName); job.setMapperClass(ScanMapper.class); job.setReducerClass(ScanReducer.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(ImmutableBytesWritable.class); job.setInputFormatClass(TableInputFormat.class); job.setNumReduceTasks(1); FileOutputFormat.setOutputPath(job, new Path(job.getJobName())); TableMapReduceUtil.addDependencyJars(job); assertTrue(job.waitForCompletion(true)); }
throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = getConf(); if (filesGroup != null) conf.set(CONF_FILES_GROUP, filesGroup); if (filesUser != null) conf.set(CONF_FILES_USER, filesUser); if (mappers > 0) { conf.setInt(CONF_NUM_SPLITS, mappers); conf.setInt(MR_NUM_MAPS, mappers); conf.setInt(CONF_FILES_MODE, filesMode); conf.setBoolean(CONF_CHECKSUM_VERIFY, verifyChecksum); conf.set(CONF_OUTPUT_ROOT, outputRoot.toString()); conf.set(CONF_INPUT_ROOT, inputRoot.toString()); conf.setInt(CONF_BANDWIDTH_MB, bandwidthMB); conf.set(CONF_SNAPSHOT_NAME, snapshotName); conf.set(CONF_SNAPSHOT_DIR, snapshotDir.toString()); String jobname = conf.get(CONF_MR_JOB_NAME, "ExportSnapshot-" + snapshotName); Job job = new Job(conf); job.setJobName(jobname); job.setJarByClass(ExportSnapshot.class); TableMapReduceUtil.addDependencyJars(job); job.setMapperClass(ExportMapper.class); job.setInputFormatClass(ExportSnapshotInputFormat.class);
@Test public void testInitTableMapperJob2() throws Exception { Configuration configuration = new Configuration(); Job job = new Job(configuration, "tableName"); TableMapReduceUtil.initTableMapperJob(Bytes.toBytes("Table"), new Scan(), Import.Importer.class, Text.class, Text.class, job, false, WALInputFormat.class); assertEquals(WALInputFormat.class, job.getInputFormatClass()); assertEquals(Import.Importer.class, job.getMapperClass()); assertEquals(LongWritable.class, job.getOutputKeyClass()); assertEquals(Text.class, job.getOutputValueClass()); assertNull(job.getCombinerClass()); assertEquals("Table", job.getConfiguration().get(TableInputFormat.INPUT_TABLE)); }
Scan scan = new Scan(); if (column != null) { byte[][] colkey = KeyValue.parseColumn(Bytes.toBytes(column)); if (colkey.length > 1) { scan.addColumn(colkey[0], colkey[1]); } else { scan.addFamily(colkey[0]); Admin admin = connection.getAdmin(); LOG.info("Performing snapshot of table " + table + " as " + snapshot); admin.snapshot(snapshot, TableName.valueOf(table)); // co AnalyzeSnapshotData-2-Snap Create a snapshot of the table. Job job = Job.getInstance(conf, "Analyze data in snapshot " + table); job.setJarByClass(AnalyzeSnapshotData.class); TableMapReduceUtil.initTableSnapshotMapperJob(snapshot, scan, AnalyzeMapper.class, Text.class, IntWritable.class, job, true, TableMapReduceUtil.addDependencyJars(job.getConfiguration(), FileOutputFormat.setOutputPath(job, new Path(output)); LOG.info("Cleaning up snapshot and restore directory"); restoreDir.getFileSystem(conf).delete(restoreDir, true);
(stop != null ? stop.toUpperCase(Locale.ROOT) : "Empty"); LOG.info("Before map/reduce startup - job " + jobName); Configuration c = new Configuration(TEST_UTIL.getConfiguration()); Scan scan = new Scan(); scan.addFamily(INPUT_FAMILYS[0]); scan.addFamily(INPUT_FAMILYS[1]); if (start != null) { scan.withStartRow(Bytes.toBytes(start)); c.set(KEY_STARTROW, start != null ? start : ""); if (stop != null) { scan.withStopRow(Bytes.toBytes(stop)); c.set(KEY_LASTROW, last != null ? last : ""); LOG.info("scan before: " + scan); Job job = Job.getInstance(c, jobName); TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan, ScanMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job); job.setReducerClass(ScanReducer.class); job.setNumReduceTasks(1); // one to get final "first" and "last" key FileOutputFormat.setOutputPath(job, new Path(job.getJobName())); LOG.info("Started " + job.getJobName()); assertTrue(job.waitForCompletion(true));
public Job createSubmittableJob(String[] args) throws IOException { Path partitionsPath = new Path(destPath, PARTITIONS_FILE_NAME); generatePartitions(partitionsPath); Job job = Job.getInstance(getConf(), getConf().get("mapreduce.job.name", "hashTable_" + tableHash.tableName)); Configuration jobConf = job.getConfiguration(); jobConf.setLong(HASH_BATCH_SIZE_CONF_KEY, tableHash.batchSize); job.setJarByClass(HashTable.class); TableMapReduceUtil.initTableMapperJob(tableHash.tableName, tableHash.initScan(), HashMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job); // use a TotalOrderPartitioner and reducers to group region output into hash files job.setPartitionerClass(TotalOrderPartitioner.class); TotalOrderPartitioner.setPartitionFile(jobConf, partitionsPath); job.setReducerClass(Reducer.class); // identity reducer job.setNumReduceTasks(tableHash.numHashFiles); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(ImmutableBytesWritable.class); job.setOutputFormatClass(MapFileOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(destPath, HASH_DATA_DIR)); return job; }
try { LOG.info("Before map/reduce startup"); job = new Job(table.getConfiguration(), "process column contents"); job.setNumReduceTasks(1); Scan scan = new Scan(); scan.addFamily(INPUT_FAMILY); TableMapReduceUtil.initTableMapperJob( table.getName().getNameAsString(), scan, ProcessContentsMapper.class, ImmutableBytesWritable.class, Put.class, job); TableMapReduceUtil.initTableReducerJob( table.getName().getNameAsString(), IdentityTableReducer.class, job); FileOutputFormat.setOutputPath(job, new Path("test")); LOG.info("Started " + table.getName().getNameAsString()); assertTrue(job.waitForCompletion(true)); LOG.info("After map/reduce completion"); if (job != null) { FileUtil.fullyDelete( new File(job.getConfiguration().get("hadoop.tmp.dir")));
static void configureIncrementalLoad(Job job, List<TableInfo> multiTableInfo, Class<? extends OutputFormat<?, ?>> cls) throws IOException { Configuration conf = job.getConfiguration(); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(MapReduceExtendedCell.class); job.setOutputFormatClass(cls); if (MultiTableHFileOutputFormat.class.equals(cls)) { writeMultipleTables = true; conf.setBoolean(MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY, true); conf.setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), CellSerialization.class.getName()); tableInfo.getRegionLocator().getName().getNameWithNamespaceInclAsString(): tableInfo.getRegionLocator().getName().getNameAsString(); allTableNames.add(tn); tableDescriptors.add(tableInfo.getTableDescriptor()); serializeColumnFamilyAttribute(dataBlockEncodingDetails, tableDescriptors)); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.initCredentials(job); LOG.info("Incremental output configured for tables: " + StringUtils.join(allTableNames, ","));
Job job = Job.getInstance(getConf(), getConf().get(JOB_NAME_CONF_KEY, jobName)); job.setJarByClass(CopyTable.class); Scan scan = new Scan(); scan.setBatch(batch); scan.setCacheBlocks(false); scan.setCaching(cacheRow); } else { scan.setCaching(getConf().getInt(HConstants.HBASE_CLIENT_SCANNER_CACHING, 100)); job.getConfiguration().set(TableInputFormat.SHUFFLE_MAPS, "true"); TableInputFormat.configureSplitTable(job, TableName.valueOf(dstTableName)); Admin admin = conn.getAdmin()) { HFileOutputFormat2.configureIncrementalLoadMap(job, admin.getDescriptor((TableName.valueOf(dstTableName)))); TableMapReduceUtil.initTableReducerJob(dstTableName, null, job, null, peerAddress, null, null);