/** * Configure a MapReduce Job to perform an incremental load into the given * table. This * <ul> * <li>Inspects the table to configure a total order partitioner</li> * <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li> * <li>Sets the number of reduce tasks to match the current number of regions</li> * <li>Sets the output key/value class to match HFileOutputFormat2's requirements</li> * <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or * PutSortReducer)</li> * </ul> * The user should be sure to set the map output value class to either KeyValue or Put before * running this function. */ public static void configureIncrementalLoad(Job job, Table table, RegionLocator regionLocator) throws IOException { configureIncrementalLoad(job, table.getDescriptor(), regionLocator); }
final Map<byte[], Algorithm> compressionMap = createFamilyCompressionMap(conf); final Map<byte[], BloomType> bloomTypeMap = createFamilyBloomTypeMap(conf); final Map<byte[], String> bloomParamMap = createFamilyBloomParamMap(conf); final Map<byte[], Integer> blockSizeMap = createFamilyBlockSizeMap(conf); = createFamilyDataBlockEncodingMap(conf); final DataBlockEncoding overriddenEncoding; if (dataBlockEncodingStr != null) {
Bytes.toString(HFileOutputFormat2.combineTableNameSuffix( TABLE_NAMES[0].getName(), FAMILIES[0])), "ONE_SSD"); Path cf1Dir = new Path(util.getDataTestDir(), Bytes.toString(FAMILIES[0])); HFileOutputFormat2.configureStoragePolicy(conf, fs, HFileOutputFormat2.combineTableNameSuffix(TABLE_NAMES[0].getName(), FAMILIES[0]), cf1Dir); HFileOutputFormat2.configureStoragePolicy(conf, fs, HFileOutputFormat2.combineTableNameSuffix(TABLE_NAMES[0].getName(), FAMILIES[1]), cf2Dir); spA = getStoragePolicyName(fs, cf1Dir); spB = getStoragePolicyName(fs, cf2Dir);
@Override public RecordWriter<ImmutableBytesWritable, Cell> getRecordWriter( final TaskAttemptContext context) throws IOException, InterruptedException { return createRecordWriter(context, this.getOutputCommitter(context)); }
HFileOutputFormat2.setOutputPath(job, bulkloadDir); try (Connection conn = ConnectionFactory.createConnection(getConf()); Admin admin = conn.getAdmin()) { HFileOutputFormat2.configureIncrementalLoadMap(job, admin.getDescriptor((TableName.valueOf(dstTableName))));
job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings")); setupRandomGeneratorMapper(job, false); HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator); FileOutputFormat.setOutputPath(job, dir); context = createTestTaskAttemptContext(job); HFileOutputFormat2 hof = new HFileOutputFormat2(); writer = hof.getRecordWriter(context); hof.getOutputCommitter(context).commitTask(context); hof.getOutputCommitter(context).commitJob(context); FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs)); assertEquals(htd.getFamilies().size(), families.length);
HFileOutputFormat2.setOutputPath(job, bulkloadDir); HTable hTable = new HTable(hConf, tableName); HFileOutputFormat2.configureIncrementalLoad(job, hTable);
setupMockColumnFamiliesForCompression(table, familyToCompression); conf.set(HFileOutputFormat2.COMPRESSION_FAMILIES_CONF_KEY, HFileOutputFormat2.serializeColumnFamilyAttribute (HFileOutputFormat2.compressionDetails, Arrays.asList(table.getTableDescriptor()))); .createFamilyCompressionMap(conf);
familyToBloomType); conf.set(HFileOutputFormat2.BLOOM_TYPE_FAMILIES_CONF_KEY, HFileOutputFormat2.serializeColumnFamilyAttribute(HFileOutputFormat2.bloomTypeDetails, Arrays.asList(table.getTableDescriptor()))); .createFamilyBloomTypeMap(conf);
HTableDescriptor tableDescriptor = table.getTableDescriptor(); conf.set(HFileOutputFormat2.DATABLOCK_ENCODING_FAMILIES_CONF_KEY, HFileOutputFormat2.serializeColumnFamilyAttribute (HFileOutputFormat2.dataBlockEncodingDetails, Arrays .asList(tableDescriptor))); .createFamilyDataBlockEncodingMap(conf);
familyToBlockSize); conf.set(HFileOutputFormat2.BLOCK_SIZE_FAMILIES_CONF_KEY, HFileOutputFormat2.serializeColumnFamilyAttribute (HFileOutputFormat2.blockSizeDetails, Arrays.asList(table .getTableDescriptor()))); .createFamilyBlockSizeMap(conf);
protected static byte[] getTableNameSuffixedWithFamily(byte[] tableName, byte[] family) { return combineTableNameSuffix(tableName, family); }
/** * Runs inside the task to deserialize column family to compression algorithm * map from the configuration. * * @param conf to read the serialized values from * @return a map from column family to the configured compression algorithm */ @VisibleForTesting static Map<byte[], Algorithm> createFamilyCompressionMap(Configuration conf) { return HFileOutputFormat2.createFamilyCompressionMap(conf); }
/** * Runs inside the task to deserialize column family to block size * map from the configuration. * * @param conf to read the serialized values from * @return a map from column family to the configured block size */ @VisibleForTesting static Map<byte[], Integer> createFamilyBlockSizeMap(Configuration conf) { return HFileOutputFormat2.createFamilyBlockSizeMap(conf); }
/** * Runs inside the task to deserialize column family to bloom filter type * map from the configuration. * * @param conf to read the serialized values from * @return a map from column family to the the configured bloom filter type */ @VisibleForTesting static Map<byte[], BloomType> createFamilyBloomTypeMap(Configuration conf) { return HFileOutputFormat2.createFamilyBloomTypeMap(conf); }
/** * Runs inside the task to deserialize column family to data block encoding * type map from the configuration. * * @param conf to read the serialized values from * @return a map from column family to HFileDataBlockEncoder for the * configured data block type for the family */ @VisibleForTesting static Map<byte[], DataBlockEncoding> createFamilyDataBlockEncodingMap( Configuration conf) { return HFileOutputFormat2.createFamilyDataBlockEncodingMap(conf); }
private void exportHBaseConfiguration(String hbaseTableName) throws IOException { Configuration hbaseConf = HBaseConnection.getCurrentHBaseConfiguration(); HadoopUtil.healSickConfig(hbaseConf); Job job = Job.getInstance(hbaseConf, hbaseTableName); HTable table = new HTable(hbaseConf, hbaseTableName); HFileOutputFormat2.configureIncrementalLoadMap(job, table); logger.info("Saving HBase configuration to {0}", hbaseConfPath); FileSystem fs = HadoopUtil.getWorkingFileSystem(); FSDataOutputStream out = null; try { out = fs.create(new Path(hbaseConfPath)); job.getConfiguration().writeXml(out); } finally { IOUtils.closeQuietly(out); } }
job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings")); setupRandomGeneratorMapper(job, false); HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator); FileOutputFormat.setOutputPath(job, dir); context = createTestTaskAttemptContext(job); HFileOutputFormat2 hof = new HFileOutputFormat2(); writer = hof.getRecordWriter(context); hof.getOutputCommitter(context).commitTask(context); hof.getOutputCommitter(context).commitJob(context); FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs)); assertEquals(htd.getFamilies().size(), families.length);
setupMockColumnFamiliesForCompression(table, familyToCompression); conf.set(HFileOutputFormat2.COMPRESSION_FAMILIES_CONF_KEY, HFileOutputFormat2.serializeColumnFamilyAttribute (HFileOutputFormat2.compressionDetails, Arrays.asList(table.getTableDescriptor()))); .createFamilyCompressionMap(conf);
familyToBloomType); conf.set(HFileOutputFormat2.BLOOM_TYPE_FAMILIES_CONF_KEY, HFileOutputFormat2.serializeColumnFamilyAttribute(HFileOutputFormat2.bloomTypeDetails, Arrays.asList(table.getTableDescriptor()))); .createFamilyBloomTypeMap(conf);