org.elasticsearch.hadoop.mr.EsInputFormat java code examples

@Override
public List<InputSplit> getSplits(JobContext context) throws IOException {
  JobConf conf = HadoopCfgUtils.asJobConf(CompatHandler.jobContext(context).getConfiguration());
  // NOTE: this method expects a ShardInputSplit to be returned (which implements both the old and the new API).
  return Arrays.asList((InputSplit[]) getSplits(conf, conf.getNumMapTasks()));
}

@SuppressWarnings("unchecked")
@Override
public EsInputRecordReader<K, V> getRecordReader(org.apache.hadoop.mapred.InputSplit split, JobConf job, Reporter reporter) {
  return (EsInputRecordReader<K, V>) (isOutputAsJson(job) ? new JsonWritableEsInputRecordReader(split, job, reporter) : new WritableEsInputRecordReader(split, job, reporter));
}

public ESCellExtractor(Class<Cells> cellsClass) {
  super();
  this.deepJobConfig = new ESDeepJobConfig(cellsClass);
  this.inputFormat = new EsInputFormat<>();
  this.outputFormat = new EsOutputFormat();
}

public ESEntityExtractor(Class<T> t) {
  super();
  this.deepJobConfig = new ESDeepJobConfig(t);
  this.inputFormat = new EsInputFormat<>();
  this.outputFormat = new EsOutputFormat();
}

@Override
public FileSplit[] getSplits(JobConf job, int numSplits) throws IOException {
  // first, merge input table properties (since there's no access to them ...)
  Settings settings = HadoopSettingsManager.loadFrom(job);
  //settings.merge(IOUtils.propsFromString(settings.getProperty(HiveConstants.INPUT_TBL_PROPERTIES)));
  Log log = LogFactory.getLog(getClass());
  // move on to initialization
  InitializationUtils.setValueReaderIfNotSet(settings, HiveValueReader.class, log);
  if (settings.getOutputAsJson() == false) {
    // Only set the fields if we aren't asking for raw JSON
    settings.setProperty(InternalConfigurationOptions.INTERNAL_ES_TARGET_FIELDS, StringUtils.concatenate(HiveUtils.columnToAlias(settings), ","));
  }
  HiveUtils.init(settings, log);
  // decorate original splits as FileSplit
  InputSplit[] shardSplits = super.getSplits(job, numSplits);
  FileSplit[] wrappers = new FileSplit[shardSplits.length];
  Path path = new Path(job.get(HiveConstants.TABLE_LOCATION));
  for (int i = 0; i < wrappers.length; i++) {
    wrappers[i] = new EsHiveSplit(shardSplits[i], path);
  }
  return wrappers;
}

@SuppressWarnings("unchecked")
@Override
public EsInputRecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context) {
  return (EsInputRecordReader<K, V>) (isOutputAsJson(CompatHandler.taskAttemptContext(context).getConfiguration()) ? new JsonWritableEsInputRecordReader() : new WritableEsInputRecordReader());
}

@Override
public List<InputSplit> getSplits(JobContext context) throws IOException {
  JobConf conf = HadoopCfgUtils.asJobConf(CompatHandler.jobContext(context).getConfiguration());
  // NOTE: this method expects a ShardInputSplit to be returned (which implements both the old and the new API).
  return Arrays.asList((InputSplit[]) getSplits(conf, conf.getNumMapTasks()));
}

@SuppressWarnings("unchecked")
@Override
public EsInputRecordReader<K, V> getRecordReader(org.apache.hadoop.mapred.InputSplit split, JobConf job, Reporter reporter) {
  return (EsInputRecordReader<K, V>) (isOutputAsJson(job) ? new JsonWritableEsInputRecordReader(split, job, reporter) : new WritableEsInputRecordReader(split, job, reporter));
}

@Override
public List<InputSplit> getSplits(JobContext context) throws IOException {
  JobConf conf = HadoopCfgUtils.asJobConf(CompatHandler.jobContext(context).getConfiguration());
  // NOTE: this method expects a ShardInputSplit to be returned (which implements both the old and the new API).
  return Arrays.asList((InputSplit[]) getSplits(conf, conf.getNumMapTasks()));
}

@SuppressWarnings("unchecked")
@Override
public EsInputRecordReader<K, V> getRecordReader(org.apache.hadoop.mapred.InputSplit split, JobConf job, Reporter reporter) {
  return (EsInputRecordReader<K, V>) (isOutputAsJson(job) ? new JsonWritableEsInputRecordReader(split, job, reporter) : new WritableEsInputRecordReader(split, job, reporter));
}

@Override
public List<InputSplit> getSplits(JobContext context) throws IOException {
  JobConf conf = HadoopCfgUtils.asJobConf(CompatHandler.jobContext(context).getConfiguration());
  // NOTE: this method expects a ShardInputSplit to be returned (which implements both the old and the new API).
  return Arrays.asList((InputSplit[]) getSplits(conf, conf.getNumMapTasks()));
}

@SuppressWarnings("unchecked")
@Override
public ShardRecordReader<K, V> getRecordReader(org.apache.hadoop.mapred.InputSplit split, JobConf job, Reporter reporter) {
  return (ShardRecordReader<K, V>) (isOutputAsJson(job) ? new JsonWritableShardRecordReader(split, job, reporter) : new WritableShardRecordReader(split, job, reporter));
}

@Override
public List<InputSplit> getSplits(JobContext context) throws IOException {
  JobConf conf = HadoopCfgUtils.asJobConf(CompatHandler.jobContext(context).getConfiguration());
  // NOTE: this method expects a ShardInputSplit to be returned (which implements both the old and the new API).
  return Arrays.asList((InputSplit[]) getSplits(conf, conf.getNumMapTasks()));
}

@SuppressWarnings("unchecked")
@Override
public EsInputRecordReader<K, V> getRecordReader(org.apache.hadoop.mapred.InputSplit split, JobConf job, Reporter reporter) {
  return (EsInputRecordReader<K, V>) (isOutputAsJson(job) ? new JsonWritableEsInputRecordReader(split, job, reporter) : new WritableEsInputRecordReader(split, job, reporter));
}

@Override
public FileSplit[] getSplits(JobConf job, int numSplits) throws IOException {
  // first, merge input table properties (since there's no access to them ...)
  Settings settings = HadoopSettingsManager.loadFrom(job);
  //settings.merge(IOUtils.propsFromString(settings.getProperty(HiveConstants.INPUT_TBL_PROPERTIES)));
  Log log = LogFactory.getLog(getClass());
  // move on to initialization
  InitializationUtils.setValueReaderIfNotSet(settings, HiveValueReader.class, log);
  if (settings.getOutputAsJson() == false) {
    // Only set the fields if we aren't asking for raw JSON
    settings.setProperty(InternalConfigurationOptions.INTERNAL_ES_TARGET_FIELDS, StringUtils.concatenate(HiveUtils.columnToAlias(settings), ","));
  }
  HiveUtils.init(settings, log);
  // decorate original splits as FileSplit
  InputSplit[] shardSplits = super.getSplits(job, numSplits);
  FileSplit[] wrappers = new FileSplit[shardSplits.length];
  Path path = new Path(job.get(HiveConstants.TABLE_LOCATION));
  for (int i = 0; i < wrappers.length; i++) {
    wrappers[i] = new EsHiveSplit(shardSplits[i], path);
  }
  return wrappers;
}

@SuppressWarnings("unchecked")
@Override
public EsInputRecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context) {
  return (EsInputRecordReader<K, V>) (isOutputAsJson(CompatHandler.taskAttemptContext(context).getConfiguration()) ? new JsonWritableEsInputRecordReader() : new WritableEsInputRecordReader());
}

@SuppressWarnings("unchecked")
@Override
public ShardRecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context) {
  return (ShardRecordReader<K, V>) (isOutputAsJson(CompatHandler.taskAttemptContext(context).getConfiguration()) ? new JsonWritableShardRecordReader() : new WritableShardRecordReader());
}

@SuppressWarnings("unchecked")
@Override
public EsInputRecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context) {
  return (EsInputRecordReader<K, V>) (isOutputAsJson(CompatHandler.taskAttemptContext(context).getConfiguration()) ? new JsonWritableEsInputRecordReader() : new WritableEsInputRecordReader());
}

@SuppressWarnings("unchecked")
@Override
public EsInputRecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context) {
  return (EsInputRecordReader<K, V>) (isOutputAsJson(CompatHandler.taskAttemptContext(context).getConfiguration()) ? new JsonWritableEsInputRecordReader() : new WritableEsInputRecordReader());
}

Javadoc

ElasticSearch InputFormat for streaming data (typically based on a query) from ElasticSearch. Returns the document ID as key and its content as value.

This class implements both the "old" (org.apache.hadoop.mapred) and the "new" (org.apache.hadoop.mapreduce) API.

Most used methods

Popular in Java

Updating database using SQL prepared statement
startActivity (Activity)
getSystemService (Context)
setContentView (Activity)
URL (java.net)
A Uniform Resource Locator that identifies the location of an Internet resource as specified by RFC
Collections (java.util)
This class consists exclusively of static methods that operate on or return collections. It contains
HashSet (java.util)
HashSet is an implementation of a Set. All optional operations (adding and removing) are supported.
Options (org.apache.commons.cli)
Main entry-point into the library. Options represents a collection of Option objects, which describ
Modifier (javassist)
The Modifier class provides static methods and constants to decode class and member access modifiers
BoxLayout (javax.swing)
Top PhpStorm plugins

How to useEsInputFormat in org.elasticsearch.hadoop.mr

Best Java code snippets using org.elasticsearch.hadoop.mr.EsInputFormat (Showing top 19 results out of 315)

How to use
EsInputFormat
in
org.elasticsearch.hadoop.mr