@Override public List<InputSplit> getSplits(JobContext context) throws IOException { JobConf conf = HadoopCfgUtils.asJobConf(CompatHandler.jobContext(context).getConfiguration()); // NOTE: this method expects a ShardInputSplit to be returned (which implements both the old and the new API). return Arrays.asList((InputSplit[]) getSplits(conf, conf.getNumMapTasks())); }
@SuppressWarnings("unchecked") @Override public EsInputRecordReader<K, V> getRecordReader(org.apache.hadoop.mapred.InputSplit split, JobConf job, Reporter reporter) { return (EsInputRecordReader<K, V>) (isOutputAsJson(job) ? new JsonWritableEsInputRecordReader(split, job, reporter) : new WritableEsInputRecordReader(split, job, reporter)); }
public ESCellExtractor(Class<Cells> cellsClass) { super(); this.deepJobConfig = new ESDeepJobConfig(cellsClass); this.inputFormat = new EsInputFormat<>(); this.outputFormat = new EsOutputFormat(); }
public ESEntityExtractor(Class<T> t) { super(); this.deepJobConfig = new ESDeepJobConfig(t); this.inputFormat = new EsInputFormat<>(); this.outputFormat = new EsOutputFormat(); }
@Override public FileSplit[] getSplits(JobConf job, int numSplits) throws IOException { // first, merge input table properties (since there's no access to them ...) Settings settings = HadoopSettingsManager.loadFrom(job); //settings.merge(IOUtils.propsFromString(settings.getProperty(HiveConstants.INPUT_TBL_PROPERTIES))); Log log = LogFactory.getLog(getClass()); // move on to initialization InitializationUtils.setValueReaderIfNotSet(settings, HiveValueReader.class, log); if (settings.getOutputAsJson() == false) { // Only set the fields if we aren't asking for raw JSON settings.setProperty(InternalConfigurationOptions.INTERNAL_ES_TARGET_FIELDS, StringUtils.concatenate(HiveUtils.columnToAlias(settings), ",")); } HiveUtils.init(settings, log); // decorate original splits as FileSplit InputSplit[] shardSplits = super.getSplits(job, numSplits); FileSplit[] wrappers = new FileSplit[shardSplits.length]; Path path = new Path(job.get(HiveConstants.TABLE_LOCATION)); for (int i = 0; i < wrappers.length; i++) { wrappers[i] = new EsHiveSplit(shardSplits[i], path); } return wrappers; }
@SuppressWarnings("unchecked") @Override public EsInputRecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context) { return (EsInputRecordReader<K, V>) (isOutputAsJson(CompatHandler.taskAttemptContext(context).getConfiguration()) ? new JsonWritableEsInputRecordReader() : new WritableEsInputRecordReader()); }
@Override public List<InputSplit> getSplits(JobContext context) throws IOException { JobConf conf = HadoopCfgUtils.asJobConf(CompatHandler.jobContext(context).getConfiguration()); // NOTE: this method expects a ShardInputSplit to be returned (which implements both the old and the new API). return Arrays.asList((InputSplit[]) getSplits(conf, conf.getNumMapTasks())); }
@SuppressWarnings("unchecked") @Override public EsInputRecordReader<K, V> getRecordReader(org.apache.hadoop.mapred.InputSplit split, JobConf job, Reporter reporter) { return (EsInputRecordReader<K, V>) (isOutputAsJson(job) ? new JsonWritableEsInputRecordReader(split, job, reporter) : new WritableEsInputRecordReader(split, job, reporter)); }
@Override public List<InputSplit> getSplits(JobContext context) throws IOException { JobConf conf = HadoopCfgUtils.asJobConf(CompatHandler.jobContext(context).getConfiguration()); // NOTE: this method expects a ShardInputSplit to be returned (which implements both the old and the new API). return Arrays.asList((InputSplit[]) getSplits(conf, conf.getNumMapTasks())); }
@SuppressWarnings("unchecked") @Override public EsInputRecordReader<K, V> getRecordReader(org.apache.hadoop.mapred.InputSplit split, JobConf job, Reporter reporter) { return (EsInputRecordReader<K, V>) (isOutputAsJson(job) ? new JsonWritableEsInputRecordReader(split, job, reporter) : new WritableEsInputRecordReader(split, job, reporter)); }
@Override public List<InputSplit> getSplits(JobContext context) throws IOException { JobConf conf = HadoopCfgUtils.asJobConf(CompatHandler.jobContext(context).getConfiguration()); // NOTE: this method expects a ShardInputSplit to be returned (which implements both the old and the new API). return Arrays.asList((InputSplit[]) getSplits(conf, conf.getNumMapTasks())); }
@SuppressWarnings("unchecked") @Override public ShardRecordReader<K, V> getRecordReader(org.apache.hadoop.mapred.InputSplit split, JobConf job, Reporter reporter) { return (ShardRecordReader<K, V>) (isOutputAsJson(job) ? new JsonWritableShardRecordReader(split, job, reporter) : new WritableShardRecordReader(split, job, reporter)); }
@Override public List<InputSplit> getSplits(JobContext context) throws IOException { JobConf conf = HadoopCfgUtils.asJobConf(CompatHandler.jobContext(context).getConfiguration()); // NOTE: this method expects a ShardInputSplit to be returned (which implements both the old and the new API). return Arrays.asList((InputSplit[]) getSplits(conf, conf.getNumMapTasks())); }
@SuppressWarnings("unchecked") @Override public EsInputRecordReader<K, V> getRecordReader(org.apache.hadoop.mapred.InputSplit split, JobConf job, Reporter reporter) { return (EsInputRecordReader<K, V>) (isOutputAsJson(job) ? new JsonWritableEsInputRecordReader(split, job, reporter) : new WritableEsInputRecordReader(split, job, reporter)); }
@Override public FileSplit[] getSplits(JobConf job, int numSplits) throws IOException { // first, merge input table properties (since there's no access to them ...) Settings settings = HadoopSettingsManager.loadFrom(job); //settings.merge(IOUtils.propsFromString(settings.getProperty(HiveConstants.INPUT_TBL_PROPERTIES))); Log log = LogFactory.getLog(getClass()); // move on to initialization InitializationUtils.setValueReaderIfNotSet(settings, HiveValueReader.class, log); if (settings.getOutputAsJson() == false) { // Only set the fields if we aren't asking for raw JSON settings.setProperty(InternalConfigurationOptions.INTERNAL_ES_TARGET_FIELDS, StringUtils.concatenate(HiveUtils.columnToAlias(settings), ",")); } HiveUtils.init(settings, log); // decorate original splits as FileSplit InputSplit[] shardSplits = super.getSplits(job, numSplits); FileSplit[] wrappers = new FileSplit[shardSplits.length]; Path path = new Path(job.get(HiveConstants.TABLE_LOCATION)); for (int i = 0; i < wrappers.length; i++) { wrappers[i] = new EsHiveSplit(shardSplits[i], path); } return wrappers; }
@SuppressWarnings("unchecked") @Override public EsInputRecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context) { return (EsInputRecordReader<K, V>) (isOutputAsJson(CompatHandler.taskAttemptContext(context).getConfiguration()) ? new JsonWritableEsInputRecordReader() : new WritableEsInputRecordReader()); }
@SuppressWarnings("unchecked") @Override public ShardRecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context) { return (ShardRecordReader<K, V>) (isOutputAsJson(CompatHandler.taskAttemptContext(context).getConfiguration()) ? new JsonWritableShardRecordReader() : new WritableShardRecordReader()); }
@SuppressWarnings("unchecked") @Override public EsInputRecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context) { return (EsInputRecordReader<K, V>) (isOutputAsJson(CompatHandler.taskAttemptContext(context).getConfiguration()) ? new JsonWritableEsInputRecordReader() : new WritableEsInputRecordReader()); }
@SuppressWarnings("unchecked") @Override public EsInputRecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context) { return (EsInputRecordReader<K, V>) (isOutputAsJson(CompatHandler.taskAttemptContext(context).getConfiguration()) ? new JsonWritableEsInputRecordReader() : new WritableEsInputRecordReader()); }