@Override protected List<IteratorSetting> jobIterators(JobConf job, String tableName) { return getIterators(job); }
/** * Apply the configured iterators from the configuration to the scanner. * * @param job * the job configuration * @param scanner * the scanner to configure */ @Deprecated protected void setupIterators(JobConf job, Scanner scanner) { setupIterators(getIterators(job), scanner); } }
@Override public RecordWriter<Text,Mutation> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { try { return new AccumuloRecordWriter(job); } catch (Exception e) { throw new IOException(e); } }
authorizations = getScanAuthorizations(job); String classLoaderContext = getClassLoaderContext(job); String table = baseSplit.getTableName(); org.apache.accumulo.core.client.mapreduce.InputTableConfig tableConfig = getInputTableConfig( job, baseSplit.getTableName()); scanner = client.createBatchScanner(baseSplit.getTableName(), authorizations, scanThreads); setupIterators(job, scanner, baseSplit.getTableName(), baseSplit); if (classLoaderContext != null) { scanner.setClassLoaderContext(classLoaderContext); scanner = new ClientSideIteratorScanner(scanner); setupIterators(job, scanner, baseSplit.getTableName(), baseSplit); } catch (Exception e) { throw new IOException(e);
Level logLevel = getLogLevel(job); log.setLevel(logLevel); validateOptions(job); Map<String,org.apache.accumulo.core.client.mapreduce.InputTableConfig> tableConfigs = getInputTableConfigs( job); try { if (tableConfig.isOfflineScan()) { binnedRanges = binOfflineTable(job, tableId, ranges); while (binnedRanges == null) { binnedRanges = binOfflineTable(job, tableId, ranges); RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonicalID(), ke.clip(r), new String[] {location}); org.apache.accumulo.core.clientImpl.mapreduce.SplitUtils.updateSplit(split, tableConfig, logLevel); split.setOffline(tableConfig.isOfflineScan()); split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners()); split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators()); splits.add(split); } else { RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonicalID(), entry.getKey(), entry.getValue().toArray(new String[0])); org.apache.accumulo.core.clientImpl.mapreduce.SplitUtils.updateSplit(split, tableConfig, logLevel); split.setOffline(tableConfig.isOfflineScan());
protected AccumuloRecordWriter(JobConf job) throws AccumuloException, AccumuloSecurityException { Level l = getLogLevel(job); if (l != null) log.setLevel(getLogLevel(job)); this.simulate = getSimulationMode(job); this.createTables = canCreateTables(job); if (simulate) log.info("Simulating output only. No writes to tables will occur"); this.bws = new HashMap<>(); String tname = getDefaultTableName(job); this.defaultTableName = (tname == null) ? null : new Text(tname); if (!simulate) { this.client = OutputConfigurator.client(CLASS, job); mtbw = client.createMultiTableBatchWriter(getBatchWriterOptions(job)); } }
@Override public RecordReader<Key,Value> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { log.setLevel(getLogLevel(job)); recordReader.initialize(split, job); return recordReader;
@Override public void checkOutputSpecs(FileSystem ignored, JobConf job) throws IOException { if (!isConnectorInfoSet(job)) throw new IOException("Connector info has not been set."); try { AccumuloClient c = OutputConfigurator.client(CLASS, job); String principal = getPrincipal(job); AuthenticationToken token = getAuthenticationToken(job); if (!c.securityOperations().authenticateUser(principal, token)) throw new IOException("Unable to authenticate user"); } catch (AccumuloException | AccumuloSecurityException e) { throw new IOException(e); } }
/** * Configures the iterators on a scanner for the given table name. * * @param job * the Hadoop job configuration * @param scanner * the scanner for which to configure the iterators * @param tableName * the table name for which the scanner is configured * @since 1.7.0 */ private void setupIterators(JobConf job, ScannerBase scanner, String tableName, org.apache.accumulo.core.client.mapreduce.RangeInputSplit split) { List<IteratorSetting> iterators = null; if (split == null) { iterators = jobIterators(job, tableName); } else { iterators = split.getIterators(); if (iterators == null) { iterators = jobIterators(job, tableName); } } for (IteratorSetting iterator : iterators) scanner.addScanIterator(iterator); }
@Override public RecordReader<Key,Value> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { log.setLevel(getLogLevel(job)); InputFormatBase.RecordReaderBase<Key,Value> recordReader = new InputFormatBase.RecordReaderBase<Key,Value>() { recordReader.initialize(split, job); return recordReader;
@Override public RecordReader<Text,PeekingIterator<Entry<Key,Value>>> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { log.setLevel(getLogLevel(job)); recordReader.initialize(split, job); return recordReader;
final Path file = new Path(getWorkOutputPath(job), getUniqueName(job, "part") + "." + extension); final int visCacheSize = FileOutputConfigurator.getVisibilityCacheSize(conf);
/** * Configures a {@link org.apache.accumulo.core.client.ZooKeeperInstance} for this job. * * @param job * the Hadoop job instance to be configured * @param instanceName * the Accumulo instance name * @param zooKeepers * a comma-separated list of zookeeper servers * @since 1.5.0 * @deprecated since 1.6.0; Use * {@link #setZooKeeperInstance(JobConf, org.apache.accumulo.core.client.ClientConfiguration)} * instead. */ @Deprecated public static void setZooKeeperInstance(JobConf job, String instanceName, String zooKeepers) { setZooKeeperInstance(job, org.apache.accumulo.core.client.ClientConfiguration.create() .withInstance(instanceName).withZkHosts(zooKeepers)); }
/** * Push a mutation into a table. If table is null, the defaultTable will be used. If * {@link AccumuloOutputFormat#canCreateTables(JobConf)} is set, the table will be created if it * does not exist. The table name must only contain alphanumerics and underscore. */ @Override public void write(Text table, Mutation mutation) throws IOException { if (table == null || table.toString().isEmpty()) table = this.defaultTableName; if (!simulate && table == null) throw new IOException("No table or default table specified. Try simulation mode next time"); ++mutCount; valCount += mutation.size(); printMutation(table, mutation); if (simulate) return; if (!bws.containsKey(table)) try { addTable(table); } catch (final Exception e) { log.error("Could not add table '" + table + "'", e); throw new IOException(e); } try { bws.get(table).addMutation(mutation); } catch (MutationsRejectedException e) { throw new IOException(e); } }
/** * Gets the serialized token class from either the configuration or the token file. * * @since 1.5.0 * @deprecated since 1.6.0; Use {@link #getAuthenticationToken(JobConf)} instead. */ @Deprecated protected static String getTokenClass(JobConf job) { return getAuthenticationToken(job).getClass().getName(); }
/** * Configures the iterators on a scanner for the given table name. * * @param job * the Hadoop job configuration * @param scanner * the scanner for which to configure the iterators * @param tableName * the table name for which the scanner is configured * @since 1.6.0 * @deprecated since 1.7.0; Use {@link #jobIterators} instead. */ @Deprecated protected void setupIterators(JobConf job, Scanner scanner, String tableName, RangeInputSplit split) { setupIterators(job, (ScannerBase) scanner, tableName, split); }
@Override public void initialize(InputSplit inSplit, JobConf job) throws IOException { super.initialize(inSplit, job); rowIterator = new RowIterator(scannerIterator); }
private int printMutation(Text table, Mutation m) { if (log.isTraceEnabled()) { log.trace(String.format("Table %s row key: %s", table, hexDump(m.getRow()))); for (ColumnUpdate cu : m.getUpdates()) { log.trace(String.format("Table %s column: %s:%s", table, hexDump(cu.getColumnFamily()), hexDump(cu.getColumnQualifier()))); log.trace(String.format("Table %s security: %s", table, new ColumnVisibility(cu.getColumnVisibility()).toString())); log.trace(String.format("Table %s value: %s", table, hexDump(cu.getValue()))); } } return m.getUpdates().size(); }
/** * Configures a {@link org.apache.accumulo.core.client.ZooKeeperInstance} for this job. * * @param job * the Hadoop job instance to be configured * @param instanceName * the Accumulo instance name * @param zooKeepers * a comma-separated list of zookeeper servers * @since 1.5.0 * @deprecated since 1.6.0; Use * {@link #setZooKeeperInstance(JobConf, org.apache.accumulo.core.client.ClientConfiguration)} * instead. */ @Deprecated public static void setZooKeeperInstance(JobConf job, String instanceName, String zooKeepers) { setZooKeeperInstance(job, org.apache.accumulo.core.client.ClientConfiguration.create() .withInstance(instanceName).withZkHosts(zooKeepers)); }
/** * Gets the serialized token from either the configuration or the token file. * * @since 1.5.0 * @deprecated since 1.6.0; Use {@link #getAuthenticationToken(JobConf)} instead. */ @Deprecated protected static byte[] getToken(JobConf job) { return AuthenticationTokenSerializer.serialize(getAuthenticationToken(job)); }