@Override protected void setup(Context context) throws IOException { Configuration conf = context.getConfiguration(); sourceHashDir = new Path(conf.get(SOURCE_HASH_DIR_CONF_KEY)); sourceConnection = openConnection(conf, SOURCE_ZK_CLUSTER_CONF_KEY, null); targetConnection = openConnection(conf, TARGET_ZK_CLUSTER_CONF_KEY, TableOutputFormat.OUTPUT_CONF_PREFIX); sourceTable = openTable(sourceConnection, conf, SOURCE_TABLE_CONF_KEY); targetTable = openTable(targetConnection, conf, TARGET_TABLE_CONF_KEY); dryRun = conf.getBoolean(DRY_RUN_CONF_KEY, false); doDeletes = conf.getBoolean(DO_DELETES_CONF_KEY, true); doPuts = conf.getBoolean(DO_PUTS_CONF_KEY, true); sourceTableHash = HashTable.TableHash.read(conf, sourceHashDir); LOG.info("Read source hash manifest: " + sourceTableHash); LOG.info("Read " + sourceTableHash.partitions.size() + " partition keys"); TableSplit split = (TableSplit) context.getInputSplit(); ImmutableBytesWritable splitStartKey = new ImmutableBytesWritable(split.getStartRow()); sourceHashReader = sourceTableHash.newReader(conf, splitStartKey); findNextKeyHashPair(); // create a hasher, but don't start it right away // instead, find the first hash batch at or after the start row // and skip any rows that come before. they will be caught by the previous task targetHasher = new HashTable.ResultHasher(); }
/** * Finish the currently open hash batch. * Compare the target hash to the given source hash. * If they do not match, then sync the covered key range. */ private void finishBatchAndCompareHashes(Context context) throws IOException, InterruptedException { targetHasher.finishBatch(); context.getCounter(Counter.BATCHES).increment(1); if (targetHasher.getBatchSize() == 0) { context.getCounter(Counter.EMPTY_BATCHES).increment(1); } ImmutableBytesWritable targetHash = targetHasher.getBatchHash(); if (targetHash.equals(currentSourceHash)) { context.getCounter(Counter.HASHES_MATCHED).increment(1); } else { context.getCounter(Counter.HASHES_NOT_MATCHED).increment(1); ImmutableBytesWritable stopRow = nextSourceKey == null ? new ImmutableBytesWritable(sourceTableHash.stopRow) : nextSourceKey; if (LOG.isDebugEnabled()) { LOG.debug("Hash mismatch. Key range: " + toHex(targetHasher.getBatchStartKey()) + " to " + toHex(stopRow) + " sourceHash: " + toHex(currentSourceHash) + " targetHash: " + toHex(targetHash)); } syncRange(context, targetHasher.getBatchStartKey(), stopRow); } } private static String toHex(ImmutableBytesWritable bytes) {
@Override protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException { if (currentRow == null || !currentRow.equals(key)) { currentRow = new ImmutableBytesWritable(key); // not immutable if (hasher.getBatchSize() >= targetBatchSize) { hasher.finishBatch(); context.write(hasher.getBatchStartKey(), hasher.getBatchHash()); hasher.startBatch(currentRow); } } hasher.hashResult(value); }
if (targetHasher.isBatchStarted()) { targetScanner = targetTable.getScanner(scan); for (Result row : targetScanner) { targetHasher.hashResult(row);
if (targetHasher.isBatchStarted()) { targetHasher.hashResult(row);
if (targetHasher.isBatchStarted()) { targetScanner = targetTable.getScanner(scan); for (Result row : targetScanner) { targetHasher.hashResult(row);
/** * Finish the currently open hash batch. * Compare the target hash to the given source hash. * If they do not match, then sync the covered key range. */ private void finishBatchAndCompareHashes(Context context) throws IOException, InterruptedException { targetHasher.finishBatch(); context.getCounter(Counter.BATCHES).increment(1); if (targetHasher.getBatchSize() == 0) { context.getCounter(Counter.EMPTY_BATCHES).increment(1); } ImmutableBytesWritable targetHash = targetHasher.getBatchHash(); if (targetHash.equals(currentSourceHash)) { context.getCounter(Counter.HASHES_MATCHED).increment(1); } else { context.getCounter(Counter.HASHES_NOT_MATCHED).increment(1); ImmutableBytesWritable stopRow = nextSourceKey == null ? new ImmutableBytesWritable(sourceTableHash.stopRow) : nextSourceKey; if (LOG.isDebugEnabled()) { LOG.debug("Hash mismatch. Key range: " + toHex(targetHasher.getBatchStartKey()) + " to " + toHex(stopRow) + " sourceHash: " + toHex(currentSourceHash) + " targetHash: " + toHex(targetHash)); } syncRange(context, targetHasher.getBatchStartKey(), stopRow); } } private static String toHex(ImmutableBytesWritable bytes) {
targetHasher.finishBatch(); context.getCounter(Counter.BATCHES).increment(1); if (targetHasher.getBatchSize() == 0) { context.getCounter(Counter.EMPTY_BATCHES).increment(1); ImmutableBytesWritable targetHash = targetHasher.getBatchHash(); if (targetHash.equals(currentSourceHash)) { context.getCounter(Counter.HASHES_MATCHED).increment(1); LOG.debug("Hash mismatch. Key range: " + toHex(targetHasher.getBatchStartKey()) + " to " + toHex(stopRow) + " sourceHash: " + toHex(currentSourceHash) syncRange(context, targetHasher.getBatchStartKey(), stopRow);
/** * Finish the currently open hash batch. * Compare the target hash to the given source hash. * If they do not match, then sync the covered key range. */ private void finishBatchAndCompareHashes(Context context) throws IOException, InterruptedException { targetHasher.finishBatch(); context.getCounter(Counter.BATCHES).increment(1); if (targetHasher.getBatchSize() == 0) { context.getCounter(Counter.EMPTY_BATCHES).increment(1); } ImmutableBytesWritable targetHash = targetHasher.getBatchHash(); if (targetHash.equals(currentSourceHash)) { context.getCounter(Counter.HASHES_MATCHED).increment(1); } else { context.getCounter(Counter.HASHES_NOT_MATCHED).increment(1); ImmutableBytesWritable stopRow = nextSourceKey == null ? new ImmutableBytesWritable(sourceTableHash.stopRow) : nextSourceKey; if (LOG.isDebugEnabled()) { LOG.debug("Hash mismatch. Key range: " + toHex(targetHasher.getBatchStartKey()) + " to " + toHex(stopRow) + " sourceHash: " + toHex(currentSourceHash) + " targetHash: " + toHex(targetHash)); } syncRange(context, targetHasher.getBatchStartKey(), stopRow); } } private static String toHex(ImmutableBytesWritable bytes) {
@Override protected void setup(Context context) throws IOException { Configuration conf = context.getConfiguration(); sourceHashDir = new Path(conf.get(SOURCE_HASH_DIR_CONF_KEY)); sourceConnection = openConnection(conf, SOURCE_ZK_CLUSTER_CONF_KEY, null); targetConnection = openConnection(conf, TARGET_ZK_CLUSTER_CONF_KEY, TableOutputFormat.OUTPUT_CONF_PREFIX); sourceTable = openTable(sourceConnection, conf, SOURCE_TABLE_CONF_KEY); targetTable = openTable(targetConnection, conf, TARGET_TABLE_CONF_KEY); dryRun = conf.getBoolean(SOURCE_TABLE_CONF_KEY, false); sourceTableHash = HashTable.TableHash.read(conf, sourceHashDir); LOG.info("Read source hash manifest: " + sourceTableHash); LOG.info("Read " + sourceTableHash.partitions.size() + " partition keys"); TableSplit split = (TableSplit) context.getInputSplit(); ImmutableBytesWritable splitStartKey = new ImmutableBytesWritable(split.getStartRow()); sourceHashReader = sourceTableHash.newReader(conf, splitStartKey); findNextKeyHashPair(); // create a hasher, but don't start it right away // instead, find the first hash batch at or after the start row // and skip any rows that come before. they will be caught by the previous task targetHasher = new HashTable.ResultHasher(); }
@Override protected void setup(Context context) throws IOException { Configuration conf = context.getConfiguration(); sourceHashDir = new Path(conf.get(SOURCE_HASH_DIR_CONF_KEY)); sourceConnection = openConnection(conf, SOURCE_ZK_CLUSTER_CONF_KEY, null); targetConnection = openConnection(conf, TARGET_ZK_CLUSTER_CONF_KEY, TableOutputFormat.OUTPUT_CONF_PREFIX); sourceTable = openTable(sourceConnection, conf, SOURCE_TABLE_CONF_KEY); targetTable = openTable(targetConnection, conf, TARGET_TABLE_CONF_KEY); dryRun = conf.getBoolean(SOURCE_TABLE_CONF_KEY, false); sourceTableHash = HashTable.TableHash.read(conf, sourceHashDir); LOG.info("Read source hash manifest: " + sourceTableHash); LOG.info("Read " + sourceTableHash.partitions.size() + " partition keys"); TableSplit split = (TableSplit) context.getInputSplit(); ImmutableBytesWritable splitStartKey = new ImmutableBytesWritable(split.getStartRow()); sourceHashReader = sourceTableHash.newReader(conf, splitStartKey); findNextKeyHashPair(); // create a hasher, but don't start it right away // instead, find the first hash batch at or after the start row // and skip any rows that come before. they will be caught by the previous task targetHasher = new HashTable.ResultHasher(); }
@Override protected void setup(Context context) throws IOException { Configuration conf = context.getConfiguration(); sourceHashDir = new Path(conf.get(SOURCE_HASH_DIR_CONF_KEY)); sourceConnection = openConnection(conf, SOURCE_ZK_CLUSTER_CONF_KEY, null); targetConnection = openConnection(conf, TARGET_ZK_CLUSTER_CONF_KEY, TableOutputFormat.OUTPUT_CONF_PREFIX); sourceTable = openTable(sourceConnection, conf, SOURCE_TABLE_CONF_KEY); targetTable = openTable(targetConnection, conf, TARGET_TABLE_CONF_KEY); dryRun = conf.getBoolean(SOURCE_TABLE_CONF_KEY, false); sourceTableHash = HashTable.TableHash.read(conf, sourceHashDir); LOG.info("Read source hash manifest: " + sourceTableHash); LOG.info("Read " + sourceTableHash.partitions.size() + " partition keys"); TableSplit split = (TableSplit) context.getInputSplit(); ImmutableBytesWritable splitStartKey = new ImmutableBytesWritable(split.getStartRow()); sourceHashReader = sourceTableHash.newReader(conf, splitStartKey); findNextKeyHashPair(); // create a hasher, but don't start it right away // instead, find the first hash batch at or after the start row // and skip any rows that come before. they will be caught by the previous task targetHasher = new HashTable.ResultHasher(); }
@Override protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException { try { // first, finish any hash batches that end before the scanned row while (nextSourceKey != null && key.compareTo(nextSourceKey) >= 0) { moveToNextBatch(context); } // next, add the scanned row (as long as we've reached the first batch) if (targetHasher.isBatchStarted()) { targetHasher.hashResult(value); } } catch (Throwable t) { mapperException = t; Throwables.propagateIfInstanceOf(t, IOException.class); Throwables.propagateIfInstanceOf(t, InterruptedException.class); Throwables.propagate(t); } }
@Override protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException { try { // first, finish any hash batches that end before the scanned row while (nextSourceKey != null && key.compareTo(nextSourceKey) >= 0) { moveToNextBatch(context); } // next, add the scanned row (as long as we've reached the first batch) if (targetHasher.isBatchStarted()) { targetHasher.hashResult(value); } } catch (Throwable t) { mapperException = t; Throwables.propagateIfInstanceOf(t, IOException.class); Throwables.propagateIfInstanceOf(t, InterruptedException.class); Throwables.propagate(t); } }
@Override protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException { try { // first, finish any hash batches that end before the scanned row while (nextSourceKey != null && key.compareTo(nextSourceKey) >= 0) { moveToNextBatch(context); } // next, add the scanned row (as long as we've reached the first batch) if (targetHasher.isBatchStarted()) { targetHasher.hashResult(value); } } catch (Throwable t) { mapperException = t; Throwables.propagateIfInstanceOf(t, IOException.class); Throwables.propagateIfInstanceOf(t, InterruptedException.class); Throwables.propagate(t); } }
@Override protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException { if (currentRow == null || !currentRow.equals(key)) { currentRow = new ImmutableBytesWritable(key); // not immutable if (hasher.getBatchSize() >= targetBatchSize) { hasher.finishBatch(); context.write(hasher.getBatchStartKey(), hasher.getBatchHash()); hasher.startBatch(currentRow); } } hasher.hashResult(value); }
/** * If there is an open hash batch, complete it and sync if there are diffs. * Start a new batch, and seek to read the */ private void moveToNextBatch(Context context) throws IOException, InterruptedException { if (targetHasher.isBatchStarted()) { finishBatchAndCompareHashes(context); } targetHasher.startBatch(nextSourceKey); currentSourceHash = sourceHashReader.getCurrentHash(); findNextKeyHashPair(); }
@Override protected void setup(Context context) throws IOException, InterruptedException { targetBatchSize = context.getConfiguration() .getLong(HASH_BATCH_SIZE_CONF_KEY, DEFAULT_BATCH_SIZE); hasher = new ResultHasher(); TableSplit split = (TableSplit) context.getInputSplit(); hasher.startBatch(new ImmutableBytesWritable(split.getStartRow())); }
@Override protected void setup(Context context) throws IOException, InterruptedException { targetBatchSize = context.getConfiguration() .getLong(HASH_BATCH_SIZE_CONF_KEY, DEFAULT_BATCH_SIZE); hasher = new ResultHasher(); TableSplit split = (TableSplit) context.getInputSplit(); hasher.startBatch(new ImmutableBytesWritable(split.getStartRow())); }
@Override protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException { if (currentRow == null || !currentRow.equals(key)) { currentRow = new ImmutableBytesWritable(key); // not immutable if (hasher.getBatchSize() >= targetBatchSize) { hasher.finishBatch(); context.write(hasher.getBatchStartKey(), hasher.getBatchHash()); hasher.startBatch(currentRow); } } hasher.hashResult(value); }