private boolean isSecureBulkLoadEndpointAvailable() { String classes = getConf().get(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, ""); return classes.contains("org.apache.hadoop.hbase.security.access.SecureBulkLoadEndpoint"); }
@Override public void bulkHFile(final byte[] family, final FileStatus hfile) throws IOException { long length = hfile.getLen(); if (length > getConf().getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE)) { LOG.warn("Trying to bulk load hfile " + hfile.getPath() + " with size: " + length + " bytes can be problematic as it may lead to oversplitting."); } ret.add(new LoadQueueItem(family, hfile.getPath())); } }, validateHFile);
/** * Walk the given directory for all HFiles, and return a Queue * containing all such files. */ private void discoverLoadQueue(final Deque<LoadQueueItem> ret, final Path hfofDir, final boolean validateHFile) throws IOException { fs = hfofDir.getFileSystem(getConf()); visitBulkHFiles(fs, hfofDir, new BulkHFileVisitor<byte[]>() { @Override public byte[] bulkFamily(final byte[] familyName) { return familyName; } @Override public void bulkHFile(final byte[] family, final FileStatus hfile) throws IOException { long length = hfile.getLen(); if (length > getConf().getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE)) { LOG.warn("Trying to bulk load hfile " + hfile.getPath() + " with size: " + length + " bytes can be problematic as it may lead to oversplitting."); } ret.add(new LoadQueueItem(family, hfile.getPath())); } }, validateHFile); }
private void initialize() throws Exception { if (hbAdmin == null) { // make a copy, just to be sure we're not overriding someone else's config setConf(HBaseConfiguration.create(getConf())); Configuration conf = getConf(); // disable blockcache for tool invocation, see HBASE-10500 conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0); this.hbAdmin = new HBaseAdmin(conf); this.userProvider = UserProvider.instantiate(conf); this.fsDelegationToken = new FsDelegationToken(userProvider, "renewer"); assignSeqIds = conf.getBoolean(ASSIGN_SEQ_IDS, true); maxFilesPerRegionPerFamily = conf.getInt(MAX_FILES_PER_REGION_PER_FAMILY, 32); } }
FileSystem fs = hfofDir.getFileSystem(getConf());
protected List<LoadQueueItem> splitStoreFile(final LoadQueueItem item, final HTable table, byte[] startKey, byte[] splitKey) throws IOException { final Path hfilePath = item.hfilePath; // We use a '_' prefix which is ignored when walking directory trees // above. final Path tmpDir = new Path(item.hfilePath.getParent(), "_tmp"); LOG.info("HFile at " + hfilePath + " no longer fits inside a single " + "region. Splitting..."); String uniqueName = getUniqueName(table.getTableName()); HColumnDescriptor familyDesc = table.getTableDescriptor().getFamily(item.family); Path botOut = new Path(tmpDir, uniqueName + ".bottom"); Path topOut = new Path(tmpDir, uniqueName + ".top"); splitStoreFile(getConf(), hfilePath, familyDesc, splitKey, botOut, topOut); // Add these back at the *front* of the queue, so there's a lower // chance that the region will just split again before we get there. List<LoadQueueItem> lqis = new ArrayList<LoadQueueItem>(2); lqis.add(new LoadQueueItem(item.family, botOut)); lqis.add(new LoadQueueItem(item.family, topOut)); LOG.info("Successfully split into new HFiles " + botOut + " and " + topOut); return lqis; }
private void createTable(TableName tableName, String dirPath) throws Exception { final Path hfofDir = new Path(dirPath); final FileSystem fs = hfofDir.getFileSystem(getConf());
@Override public int run(String[] args) throws Exception { if (args.length != 2) { usage(); return -1; } initialize(); String dirPath = args[0]; TableName tableName = TableName.valueOf(args[1]); boolean tableExists = this.doesTableExist(tableName); if (!tableExists) { if ("yes".equalsIgnoreCase(getConf().get(CREATE_TABLE_CONF_KEY, "yes"))) { this.createTable(tableName, dirPath); } else { String errorMsg = format("Table '%s' does not exist.", tableName); LOG.error(errorMsg); throw new TableNotFoundException(errorMsg); } } Path hfofDir = new Path(dirPath); try (Connection connection = ConnectionFactory.createConnection(getConf()); HTable table = (HTable) connection.getTable(tableName);) { doBulkLoad(hfofDir, table); } return 0; }
private void createTable(String tableName, String dirPath) throws Exception { Path hfofDir = new Path(dirPath); FileSystem fs = hfofDir.getFileSystem(getConf()); if (hfile.getName().startsWith("_")) continue; HFile.Reader reader = HFile.createReader(fs, hfile, new CacheConfig(getConf())); final byte[] first, last; try {
Path botOut = new Path(tmpDir, uniqueName + ".bottom"); Path topOut = new Path(tmpDir, uniqueName + ".top"); splitStoreFile(getConf(), hfilePath, familyDesc, splitKey, botOut, topOut); FileSystem fs = tmpDir.getFileSystem(getConf()); fs.setPermission(tmpDir, FsPermission.valueOf("-rwxrwxrwx")); fs.setPermission(botOut, FsPermission.valueOf("-rwxrwxrwx"));
int nrThreads = getConf().getInt("hbase.loadincremental.threads.max", Runtime.getRuntime().availableProcessors()); ThreadFactoryBuilder builder = new ThreadFactoryBuilder(); boolean validateHFile = getConf().getBoolean("hbase.loadincremental.validate.hfile", true); if(!validateHFile) { LOG.warn("You are skipping HFiles validation, it might cause some data loss if files " + int maxRetries = getConf().getInt("hbase.bulkload.retries.number", 10); maxRetries = Math.max(maxRetries, startEndKeys.getFirst().length + 1); if (maxRetries != 0 && count >= maxRetries) {
@Override public void bulkHFile(final HColumnDescriptor hcd, final FileStatus hfileStatus) throws IOException { Path hfile = hfileStatus.getPath(); HFile.Reader reader = HFile.createReader(fs, hfile, new CacheConfig(getConf()), getConf()); try { if (hcd.getCompressionType() != reader.getFileContext().getCompression()) { hcd.setCompressionType(reader.getFileContext().getCompression()); LOG.info("Setting compression " + hcd.getCompressionType().name() + " for family " + hcd.toString()); } reader.loadFileInfo(); byte[] first = reader.getFirstRowKey(); byte[] last = reader.getLastRowKey(); LOG.info("Trying to figure out region boundaries hfile=" + hfile + " first=" + Bytes.toStringBinary(first) + " last=" + Bytes.toStringBinary(last)); // To eventually infer start key-end key boundaries Integer value = map.containsKey(first)? map.get(first):0; map.put(first, value+1); value = map.containsKey(last)? map.get(last):0; map.put(last, value-1); } finally { reader.close(); } } });
Configuration conf = getConf(); boolean success = RpcRetryingCallerFactory.instantiate(conf, null).<Boolean> newCaller()