uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs java code examples

public Builder() {
  super(new AddElementsFromHdfs());
}

private void validateOperation(final AddElementsFromHdfs operation) {
  if (null != operation.getMinMapTasks()) {
    LOGGER.warn("minMapTasks field will be ignored");
  }
  if (null != operation.getMaxMapTasks()) {
    LOGGER.warn("maxMapTasks field will be ignored");
  }
  if (null != operation.getNumReduceTasks() && (null != operation.getMinReduceTasks() || null != operation.getMaxReduceTasks())) {
    throw new IllegalArgumentException("minReduceTasks and/or maxReduceTasks should not be set if numReduceTasks is");
  }
  if (null != operation.getMinReduceTasks() && null != operation.getMaxReduceTasks()) {
    LOGGER.warn("Logic for the minimum may result in more reducers than the maximum set");
    if (operation.getMinReduceTasks() > operation.getMaxReduceTasks()) {
      throw new IllegalArgumentException("Minimum number of reducers must be less than the maximum number of reducers");
    }
  }
  if (null == operation.getSplitsFilePath()) {
    throw new IllegalArgumentException("splitsFilePath is required");
  }
  if (null == operation.getWorkingPath()) {
    throw new IllegalArgumentException("workingPath is required");
  }
}

final String workingPath = operation.getWorkingPath();
if (null == workingPath) {
  throw new IllegalArgumentException("Prior to adding the data, the table needs to be split. To do this the workingPath must be set to a temporary directory");
  store.execute(new OperationChain.Builder()
      .first(new SampleDataForSplitPoints.Builder()
          .addInputMapperPairs(operation.getInputMapperPairs())
          .jobInitialiser(operation.getJobInitialiser())
          .mappers(operation.getNumMapTasks())
          .validate(operation.isValidate())
          .outputPath(tmpSplitsOutputPath)
          .splitsFilePath(operation.getSplitsFilePath())
          .options(operation.getOptions())
          .build())
      .then(new SplitStoreFromFile.Builder()
          .inputPath(operation.getSplitsFilePath())
          .options(operation.getOptions())
          .build())
      .build(), context);

protected void setUpPartitionerGenerateSplitsFile(final Job job, final AddElementsFromHdfs operation,
                         final AccumuloStore store) throws IOException {
  final String splitsFilePath = operation.getSplitsFilePath();
  LOGGER.info("Creating splits file in location {} from table {}", splitsFilePath, store.getTableName());
  final int maxReducers;
  int numReducers;
  if (validateValue(operation.getNumReduceTasks()) != 0) {
    minReducers = validateValue(operation.getNumReduceTasks());
    maxReducers = validateValue(operation.getNumReduceTasks());
  } else {
    minReducers = validateValue(operation.getMinReduceTasks());
    maxReducers = validateValue(operation.getMaxReduceTasks());

private void importElements(final AddElementsFromHdfs operation, final AccumuloStore store)
    throws OperationException {
  final ImportElementsToAccumuloTool importTool;
  final int response;
  importTool = new ImportElementsToAccumuloTool(operation.getOutputPath(), operation.getFailurePath(), store, operation.getOptions());
  try {
    LOGGER.info("Running import job");
    response = ToolRunner.run(importTool, new String[0]);
    LOGGER.info("Finished running import job");
  } catch (final Exception e) {
    LOGGER.error("Failed to import elements into Accumulo: {}", e.getMessage());
    throw new OperationException("Failed to import elements into Accumulo", e);
  }
  if (ImportElementsToAccumuloTool.SUCCESS_RESPONSE != response) {
    LOGGER.error("Failed to import elements into Accumulo. Response code was {}", response);
    throw new OperationException("Failed to import elements into Accumulo. Response code was: " + response);
  }
}

@Override
public JobConf createJobConf(final AddElementsFromHdfs operation, final String mapperGeneratorClassName, final Store store) throws IOException {
  final JobConf jobConf = new JobConf(new Configuration());
  LOGGER.info("Setting up job conf");
  jobConf.set(SCHEMA, new String(store.getSchema().toCompactJson(), CommonConstants.UTF_8));
  LOGGER.debug("Added {} {} to job conf", SCHEMA, new String(store.getSchema().toCompactJson(), CommonConstants.UTF_8));
  jobConf.set(MAPPER_GENERATOR, mapperGeneratorClassName);
  LOGGER.info("Added {} of {} to job conf", MAPPER_GENERATOR, mapperGeneratorClassName);
  jobConf.set(VALIDATE, String.valueOf(operation.isValidate()));
  LOGGER.info("Added {} option of {} to job conf", VALIDATE, operation.isValidate());
  if (null != operation.getNumMapTasks()) {
    jobConf.setNumMapTasks(operation.getNumMapTasks());
    LOGGER.info("Set number of map tasks to {} on job conf", operation.getNumMapTasks());
  }
  if (null != operation.getNumReduceTasks()) {
    jobConf.setNumReduceTasks(operation.getNumReduceTasks());
    LOGGER.info("Set number of reduce tasks to {} on job conf", operation.getNumReduceTasks());
  }
  jobConf.set(AccumuloStoreConstants.ACCUMULO_ELEMENT_CONVERTER_CLASS,
      ((AccumuloStore) store).getKeyPackage().getKeyConverter().getClass().getName());
  return jobConf;
}

protected void setUpPartitionerFromUserProvidedSplitsFile(final Job job, final AddElementsFromHdfs operation)
    throws IOException {
  final String splitsFilePath = operation.getSplitsFilePath();
  if (validateValue(operation.getMaxReduceTasks()) != -1
      || validateValue(operation.getMinReduceTasks()) != -1) {
    LOGGER.info("Using splits file provided by user {}, ignoring minReduceTasks and maxReduceTasks", splitsFilePath);
  } else {
    LOGGER.info("Using splits file provided by user {}", splitsFilePath);
  }
  final int numSplits = IngestUtils.getNumSplits(FileSystem.get(job.getConfiguration()), new Path(splitsFilePath));
  job.setNumReduceTasks(numSplits + 1);
  job.setPartitionerClass(GafferKeyRangePartitioner.class);
  GafferKeyRangePartitioner.setSplitFile(job, splitsFilePath);
}

Javadoc

An AddElementsFromHdfs operation is for adding uk.gov.gchq.gaffer.data.element.Elements from HDFS. This operation requires an input, output and failure path. For each input file you must also provide a uk.gov.gchq.gaffer.hdfs.operation.mapper.generator.MapperGenerator class name as part of a pair (input, mapperGeneratorClassName). In order to be generic and deal with any type of input file you also need to provide a uk.gov.gchq.gaffer.hdfs.operation.handler.job.initialiser.JobInitialiser. You will need to write your own uk.gov.gchq.gaffer.hdfs.operation.mapper.generator.MapperGenerator to convert the input data into gaffer uk.gov.gchq.gaffer.data.element.Elements. This can be as simple as delegating to your uk.gov.gchq.gaffer.data.generator.ElementGeneratorclass, however it can be more complex and make use of the configuration in org.apache.hadoop.mapreduce.MapContext.

For normal operation handlers the operation uk.gov.gchq.gaffer.data.elementdefinition.view.View will be ignored.

NOTE - currently this job has to be run as a hadoop job.

Most used methods

Popular in Java

Updating database using SQL prepared statement
setContentView (Activity)
onRequestPermissionsResult (Fragment)
getApplicationContext (Context)
BufferedWriter (java.io)
Wraps an existing Writer and buffers the output. Expensive interaction with the underlying reader is
ServerSocket (java.net)
This class represents a server-side socket that waits for incoming client connections. A ServerSocke
CountDownLatch (java.util.concurrent)
A synchronization aid that allows one or more threads to wait until a set of operations being perfor
ServletException (javax.servlet)
Defines a general exception a servlet can throw when it encounters difficulty.
Graphics2D (java.awt)
This Graphics2D class extends the Graphics class to provide more sophisticated control overgraphics
JFileChooser (javax.swing)
Top PhpStorm plugins

How to useAddElementsFromHdfs in uk.gov.gchq.gaffer.hdfs.operation

Best Java code snippets using uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs (Showing top 7 results out of 315)

How to use
AddElementsFromHdfs
in
uk.gov.gchq.gaffer.hdfs.operation