public static void initTableMapJob(String table, String columns, Class<? extends TableMap> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, JobConf job, boolean addDependencyJars) { initTableMapJob(table, columns, mapper, outputKeyClass, outputValueClass, job, addDependencyJars, TableInputFormat.class); }
/** * Use this before submitting a TableMap job. It will * appropriately set up the JobConf. * * @param table table name * @param columns columns to scan * @param mapper mapper class * @param job job configuration */ @SuppressWarnings("unchecked") public static void initJob(String table, String columns, Class<? extends TableMap> mapper, JobConf job) { TableMapReduceUtil.initTableMapJob(table, columns, mapper, ImmutableBytesWritable.class, Result.class, job); }
/** * Use this before submitting a TableMap job. It will * appropriately set up the JobConf. * * @param table The table name to read from. * @param columns The columns to scan. * @param mapper The mapper class to use. * @param outputKeyClass The class of the output key. * @param outputValueClass The class of the output value. * @param job The current job configuration to adjust. */ public static void initTableMapJob(String table, String columns, Class<? extends TableMap> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, JobConf job) { initTableMapJob(table, columns, mapper, outputKeyClass, outputValueClass, job, true, TableInputFormat.class); }
/** * Use this before submitting a TableMap job. It will appropriately set up the * JobConf. * * @param table table to be processed * @param columns space separated list of columns to fetch * @param groupColumns space separated list of columns used to form the key * used in collect * @param mapper map class * @param job job configuration object */ @SuppressWarnings("unchecked") public static void initJob(String table, String columns, String groupColumns, Class<? extends TableMap> mapper, JobConf job) { TableMapReduceUtil.initTableMapJob(table, columns, mapper, ImmutableBytesWritable.class, Result.class, job); job.set(GROUP_COLUMNS, groupColumns); }
/** * @param args * @return the JobConf * @throws IOException */ public JobConf createSubmittableJob(String[] args) throws IOException { JobConf c = new JobConf(getConf(), getClass()); c.setJobName(NAME); // Columns are space delimited StringBuilder sb = new StringBuilder(); final int columnoffset = 2; for (int i = columnoffset; i < args.length; i++) { if (i > columnoffset) { sb.append(" "); } sb.append(args[i]); } // Second argument is the table name. TableMapReduceUtil.initTableMapJob(args[1], sb.toString(), RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, c); c.setNumReduceTasks(0); // First arg is the output directory. FileOutputFormat.setOutputPath(c, new Path(args[0])); return c; }
/** * Sets up the job for reading from a table snapshot. It bypasses hbase servers * and read directly from snapshot files. * * @param snapshotName The name of the snapshot (of a table) to read from. * @param columns The columns to scan. * @param mapper The mapper class to use. * @param outputKeyClass The class of the output key. * @param outputValueClass The class of the output value. * @param job The current job to adjust. Make sure the passed job is * carrying all necessary HBase configuration. * @param addDependencyJars upload HBase jars and jars for any of the configured * job classes via the distributed cache (tmpjars). * @param tmpRestoreDir a temporary directory to copy the snapshot files into. Current user should * have write permissions to this directory, and this should not be a subdirectory of rootdir. * After the job is finished, restore directory can be deleted. * @throws IOException When setting up the details fails. * @see TableSnapshotInputFormat */ public static void initTableSnapshotMapJob(String snapshotName, String columns, Class<? extends TableMap> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, JobConf job, boolean addDependencyJars, Path tmpRestoreDir) throws IOException { TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir); initTableMapJob(snapshotName, columns, mapper, outputKeyClass, outputValueClass, job, addDependencyJars, TableSnapshotInputFormat.class); org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.resetCacheConfig(job); }
TableSnapshotInputFormat.setInput(jobConf, snapshotName, tmpRestoreDir, splitAlgo, numSplitsPerRegion); initTableMapJob(snapshotName, columns, mapper, outputKeyClass, outputValueClass, jobConf, addDependencyJars, TableSnapshotInputFormat.class); org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.resetCacheConfig(jobConf);
/** * Open and close a TableOutputFormat. The closing the RecordWriter should release HBase * Connection (ZK) resources, and will throw exception if they are exhausted. */ static void openCloseTableOutputFormat(int iter) throws IOException { LOG.info("Instantiating TableOutputFormat connection " + iter); JobConf conf = new JobConf(); conf.addResource(UTIL.getConfiguration()); conf.set(TableOutputFormat.OUTPUT_TABLE, TABLE); TableMapReduceUtil.initTableMapJob(TABLE, FAMILY, TableMap.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, conf); TableOutputFormat tof = new TableOutputFormat(); RecordWriter rw = tof.getRecordWriter(null, conf, TABLE, null); rw.close(null); }
@Test @SuppressWarnings("deprecation") public void shoudBeValidMapReduceEvaluation() throws Exception { Configuration cfg = UTIL.getConfiguration(); JobConf jobConf = new JobConf(cfg); try { jobConf.setJobName("process row task"); jobConf.setNumReduceTasks(1); TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY), ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class, jobConf); TableMapReduceUtil.initTableReduceJob(TABLE_NAME, ClassificatorRowReduce.class, jobConf); RunningJob job = JobClient.runJob(jobConf); assertTrue(job.isSuccessful()); } finally { if (jobConf != null) FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir"))); } }
@Test @SuppressWarnings("deprecation") public void shoudBeValidMapReduceWithPartitionerEvaluation() throws IOException { Configuration cfg = UTIL.getConfiguration(); JobConf jobConf = new JobConf(cfg); try { jobConf.setJobName("process row task"); jobConf.setNumReduceTasks(2); TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY), ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class, jobConf); TableMapReduceUtil.initTableReduceJob(TABLE_NAME, ClassificatorRowReduce.class, jobConf, HRegionPartitioner.class); RunningJob job = JobClient.runJob(jobConf); assertTrue(job.isSuccessful()); } finally { if (jobConf != null) FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir"))); } }
@Override protected void runTestOnTable(Table table) throws IOException { JobConf jobConf = null; try { LOG.info("Before map/reduce startup"); jobConf = new JobConf(UTIL.getConfiguration(), TestTableMapReduce.class); jobConf.setJobName("process column contents"); jobConf.setNumReduceTasks(1); TableMapReduceUtil.initTableMapJob(table.getName().getNameAsString(), Bytes.toString(INPUT_FAMILY), ProcessContentsMapper.class, ImmutableBytesWritable.class, Put.class, jobConf); TableMapReduceUtil.initTableReduceJob(table.getName().getNameAsString(), IdentityTableReduce.class, jobConf); LOG.info("Started " + table.getName()); RunningJob job = JobClient.runJob(jobConf); assertTrue(job.isSuccessful()); LOG.info("After map/reduce completion"); // verify map-reduce results verify(table.getName()); } finally { if (jobConf != null) { FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir"))); } } } }
public static void initTableMapJob(String table, String columns, Class<? extends TableMap> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, JobConf job, boolean addDependencyJars) { initTableMapJob(table, columns, mapper, outputKeyClass, outputValueClass, job, addDependencyJars, TableInputFormat.class); }
public static void initTableMapJob(String table, String columns, Class<? extends TableMap> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, JobConf job, boolean addDependencyJars) { initTableMapJob(table, columns, mapper, outputKeyClass, outputValueClass, job, addDependencyJars, TableInputFormat.class); }
public static void initTableMapJob(String table, String columns, Class<? extends TableMap> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, JobConf job, boolean addDependencyJars) { initTableMapJob(table, columns, mapper, outputKeyClass, outputValueClass, job, addDependencyJars, TableInputFormat.class); }
/** * Use this before submitting a TableMap job. It will * appropriately set up the JobConf. * * @param table table name * @param columns columns to scan * @param mapper mapper class * @param job job configuration */ @SuppressWarnings("unchecked") public static void initJob(String table, String columns, Class<? extends TableMap> mapper, JobConf job) { TableMapReduceUtil.initTableMapJob(table, columns, mapper, ImmutableBytesWritable.class, Result.class, job); }
/** * Use this before submitting a TableMap job. It will * appropriately set up the JobConf. * * @param table table name * @param columns columns to scan * @param mapper mapper class * @param job job configuration */ @SuppressWarnings("unchecked") public static void initJob(String table, String columns, Class<? extends TableMap> mapper, JobConf job) { TableMapReduceUtil.initTableMapJob(table, columns, mapper, ImmutableBytesWritable.class, Result.class, job); }
/** * Use this before submitting a TableMap job. It will * appropriately set up the JobConf. * * @param table table name * @param columns columns to scan * @param mapper mapper class * @param job job configuration */ @SuppressWarnings("unchecked") public static void initJob(String table, String columns, Class<? extends TableMap> mapper, JobConf job) { TableMapReduceUtil.initTableMapJob(table, columns, mapper, ImmutableBytesWritable.class, Result.class, job); }
/** * Use this before submitting a TableMap job. It will * appropriately set up the JobConf. * * @param table The table name to read from. * @param columns The columns to scan. * @param mapper The mapper class to use. * @param outputKeyClass The class of the output key. * @param outputValueClass The class of the output value. * @param job The current job configuration to adjust. */ public static void initTableMapJob(String table, String columns, Class<? extends TableMap> mapper, Class<? extends WritableComparable> outputKeyClass, Class<? extends Writable> outputValueClass, JobConf job) { initTableMapJob(table, columns, mapper, outputKeyClass, outputValueClass, job, true); }
/** * Open and close a TableOutputFormat. The closing the RecordWriter should release HBase * Connection (ZK) resources, and will throw exception if they are exhausted. */ static void openCloseTableOutputFormat(int iter) throws IOException { LOG.info("Instantiating TableOutputFormat connection " + iter); JobConf conf = new JobConf(); conf.addResource(UTIL.getConfiguration()); conf.set(TableOutputFormat.OUTPUT_TABLE, TABLE); TableMapReduceUtil.initTableMapJob(TABLE, FAMILY, TableMap.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, conf); TableOutputFormat tof = new TableOutputFormat(); RecordWriter rw = tof.getRecordWriter(null, conf, TABLE, null); rw.close(null); }
/** * Open and close a TableOutputFormat. The closing the RecordWriter should release HBase * Connection (ZK) resources, and will throw exception if they are exhausted. */ static void openCloseTableOutputFormat(int iter) throws IOException { LOG.info("Instantiating TableOutputFormat connection " + iter); JobConf conf = new JobConf(); conf.addResource(UTIL.getConfiguration()); conf.set(TableOutputFormat.OUTPUT_TABLE, TABLE); TableMapReduceUtil.initTableMapJob(TABLE, FAMILY, TableMap.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, conf); TableOutputFormat tof = new TableOutputFormat(); RecordWriter rw = tof.getRecordWriter(null, conf, TABLE, null); rw.close(null); }