/** * Configures the job with an HBase scan. * * @param scan The scan to set in the job configuration. * @param job The job to configure. * @throws IOException If there is an error. */ public static void initTableScan(Scan scan, Job job) throws IOException { job.getConfiguration().set(TableInputFormat.SCAN, convertScanToString(scan)); addAllDependencyJars(job); }
/** * Configures the job with an HBase scan over a table as input. */ public static void initTableInput(String table, Scan scan, Job job) throws IOException { job.getConfiguration().set(TableInputFormat.INPUT_TABLE, table); initTableScan(scan, job); }
/** {@inheritDoc} */ @Override public void configure(Job job) throws IOException { // sets Hadoop output format according to getOutputFormatClass() super.configure(job); final Configuration conf = job.getConfiguration(); conf.set(FijiConfKeys.FIJI_OUTPUT_TABLE_URI, mTableURI.toString()); job.setNumReduceTasks(getNumReduceTasks()); // Adds HBase dependency jars to the distributed cache so they appear on the task classpath: GenericTableMapReduceUtil.addAllDependencyJars(job); }
GenericTableMapReduceUtil.addAllDependencyJars(job); DistributedCacheJars.addJarsToDistributedCache( job, new File(System.getenv("FIJI_HOME"), "lib"));
GenericTableMapReduceUtil.addAllDependencyJars(job); DistributedCacheJars.addJarsToDistributedCache(job, new File(System.getenv("FIJI_HOME"), "lib"));
/** * This is just like the TableMapReduceUtil.initTableMapperJob but * it takes any classes for input and output keys instead of just * Writables. This way we can work with AvroSerialization instead * of just WritableSerialization. */ public static void initGenericTableMapperJob(String table, Scan scan, Class<? extends TableMapper<?, ?>> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, Job job) throws IOException { if (outputValueClass != null) { job.setMapOutputValueClass(outputValueClass); } if (outputKeyClass != null) { job.setMapOutputKeyClass(outputKeyClass); } job.setMapperClass(mapper); job.getConfiguration().set(TableInputFormat.INPUT_TABLE, table); job.getConfiguration().set(TableInputFormat.SCAN, convertScanToString(scan)); addAllDependencyJars(job); }
/** * Configures HTable input for the job if the mapper is an HTableReader. * * <p>If the mapper the job is configured to run is reading from an HBase table * (HTable), this method will make sure the mapper implements the HTableReader interface * and use its methods to configure the table scan specification required for the * HTableInputFormat.</p> * * <p>A mapper that reads its input from an HTable needs to specify the Scan descriptor * that describes what subset of rows and column cells should be processed. The mapper * communicates this by implementing the methods of the HTableReader interface. This * method calls the methods of that interface on the job's mapper and sets Scan * descriptor into the job configuration so the HTableInputFormat can read it.</p> * * @param job The job to configure. * @param mapper The Fiji mapper the job is configured to run. * @throws IOException If the HTable input cannot be configured. */ protected void configureHTableInput(Job job, FijiMapper<?, ?, ?, ?> mapper) throws IOException { if (mapper instanceof HTableReader) { HTableReader htableReader = (HTableReader) mapper; Scan hbaseInputTableScan = htableReader.getInputHTableScan(job.getConfiguration()); if (null == hbaseInputTableScan) { return; } LOG.debug("Configuring HTable scan: " + hbaseInputTableScan.toString()); GenericTableMapReduceUtil.initTableScan(hbaseInputTableScan, job); } }
GenericTableMapReduceUtil.addAllDependencyJars(job); DistributedCacheJars.addJarsToDistributedCache( job, new File(System.getenv("FIJI_HOME"), "lib"));