public Builder() { super(new AddElementsFromHdfs()); }
private void validateOperation(final AddElementsFromHdfs operation) { if (null != operation.getMinMapTasks()) { LOGGER.warn("minMapTasks field will be ignored"); } if (null != operation.getMaxMapTasks()) { LOGGER.warn("maxMapTasks field will be ignored"); } if (null != operation.getNumReduceTasks() && (null != operation.getMinReduceTasks() || null != operation.getMaxReduceTasks())) { throw new IllegalArgumentException("minReduceTasks and/or maxReduceTasks should not be set if numReduceTasks is"); } if (null != operation.getMinReduceTasks() && null != operation.getMaxReduceTasks()) { LOGGER.warn("Logic for the minimum may result in more reducers than the maximum set"); if (operation.getMinReduceTasks() > operation.getMaxReduceTasks()) { throw new IllegalArgumentException("Minimum number of reducers must be less than the maximum number of reducers"); } } if (null == operation.getSplitsFilePath()) { throw new IllegalArgumentException("splitsFilePath is required"); } if (null == operation.getWorkingPath()) { throw new IllegalArgumentException("workingPath is required"); } }
final String workingPath = operation.getWorkingPath(); if (null == workingPath) { throw new IllegalArgumentException("Prior to adding the data, the table needs to be split. To do this the workingPath must be set to a temporary directory"); store.execute(new OperationChain.Builder() .first(new SampleDataForSplitPoints.Builder() .addInputMapperPairs(operation.getInputMapperPairs()) .jobInitialiser(operation.getJobInitialiser()) .mappers(operation.getNumMapTasks()) .validate(operation.isValidate()) .outputPath(tmpSplitsOutputPath) .splitsFilePath(operation.getSplitsFilePath()) .options(operation.getOptions()) .build()) .then(new SplitStoreFromFile.Builder() .inputPath(operation.getSplitsFilePath()) .options(operation.getOptions()) .build()) .build(), context);
protected void setUpPartitionerGenerateSplitsFile(final Job job, final AddElementsFromHdfs operation, final AccumuloStore store) throws IOException { final String splitsFilePath = operation.getSplitsFilePath(); LOGGER.info("Creating splits file in location {} from table {}", splitsFilePath, store.getTableName()); final int maxReducers; int numReducers; if (validateValue(operation.getNumReduceTasks()) != 0) { minReducers = validateValue(operation.getNumReduceTasks()); maxReducers = validateValue(operation.getNumReduceTasks()); } else { minReducers = validateValue(operation.getMinReduceTasks()); maxReducers = validateValue(operation.getMaxReduceTasks());
private void importElements(final AddElementsFromHdfs operation, final AccumuloStore store) throws OperationException { final ImportElementsToAccumuloTool importTool; final int response; importTool = new ImportElementsToAccumuloTool(operation.getOutputPath(), operation.getFailurePath(), store, operation.getOptions()); try { LOGGER.info("Running import job"); response = ToolRunner.run(importTool, new String[0]); LOGGER.info("Finished running import job"); } catch (final Exception e) { LOGGER.error("Failed to import elements into Accumulo: {}", e.getMessage()); throw new OperationException("Failed to import elements into Accumulo", e); } if (ImportElementsToAccumuloTool.SUCCESS_RESPONSE != response) { LOGGER.error("Failed to import elements into Accumulo. Response code was {}", response); throw new OperationException("Failed to import elements into Accumulo. Response code was: " + response); } }
@Override public JobConf createJobConf(final AddElementsFromHdfs operation, final String mapperGeneratorClassName, final Store store) throws IOException { final JobConf jobConf = new JobConf(new Configuration()); LOGGER.info("Setting up job conf"); jobConf.set(SCHEMA, new String(store.getSchema().toCompactJson(), CommonConstants.UTF_8)); LOGGER.debug("Added {} {} to job conf", SCHEMA, new String(store.getSchema().toCompactJson(), CommonConstants.UTF_8)); jobConf.set(MAPPER_GENERATOR, mapperGeneratorClassName); LOGGER.info("Added {} of {} to job conf", MAPPER_GENERATOR, mapperGeneratorClassName); jobConf.set(VALIDATE, String.valueOf(operation.isValidate())); LOGGER.info("Added {} option of {} to job conf", VALIDATE, operation.isValidate()); if (null != operation.getNumMapTasks()) { jobConf.setNumMapTasks(operation.getNumMapTasks()); LOGGER.info("Set number of map tasks to {} on job conf", operation.getNumMapTasks()); } if (null != operation.getNumReduceTasks()) { jobConf.setNumReduceTasks(operation.getNumReduceTasks()); LOGGER.info("Set number of reduce tasks to {} on job conf", operation.getNumReduceTasks()); } jobConf.set(AccumuloStoreConstants.ACCUMULO_ELEMENT_CONVERTER_CLASS, ((AccumuloStore) store).getKeyPackage().getKeyConverter().getClass().getName()); return jobConf; }
protected void setUpPartitionerFromUserProvidedSplitsFile(final Job job, final AddElementsFromHdfs operation) throws IOException { final String splitsFilePath = operation.getSplitsFilePath(); if (validateValue(operation.getMaxReduceTasks()) != -1 || validateValue(operation.getMinReduceTasks()) != -1) { LOGGER.info("Using splits file provided by user {}, ignoring minReduceTasks and maxReduceTasks", splitsFilePath); } else { LOGGER.info("Using splits file provided by user {}", splitsFilePath); } final int numSplits = IngestUtils.getNumSplits(FileSystem.get(job.getConfiguration()), new Path(splitsFilePath)); job.setNumReduceTasks(numSplits + 1); job.setPartitionerClass(GafferKeyRangePartitioner.class); GafferKeyRangePartitioner.setSplitFile(job, splitsFilePath); }