@Override public List<InputSplit> getSplits(JobContext context) throws IOException { JobConf conf = HadoopCfgUtils.asJobConf(CompatHandler.jobContext(context).getConfiguration()); // NOTE: this method expects a ShardInputSplit to be returned (which implements both the old and the new API). return Arrays.asList((InputSplit[]) getSplits(conf, conf.getNumMapTasks())); }
@Override public FileSplit[] getSplits(JobConf job, int numSplits) throws IOException { // first, merge input table properties (since there's no access to them ...) Settings settings = HadoopSettingsManager.loadFrom(job); //settings.merge(IOUtils.propsFromString(settings.getProperty(HiveConstants.INPUT_TBL_PROPERTIES))); Log log = LogFactory.getLog(getClass()); // move on to initialization InitializationUtils.setValueReaderIfNotSet(settings, HiveValueReader.class, log); if (settings.getOutputAsJson() == false) { // Only set the fields if we aren't asking for raw JSON settings.setProperty(InternalConfigurationOptions.INTERNAL_ES_TARGET_FIELDS, StringUtils.concatenate(HiveUtils.columnToAlias(settings), ",")); } HiveUtils.init(settings, log); // decorate original splits as FileSplit InputSplit[] shardSplits = super.getSplits(job, numSplits); FileSplit[] wrappers = new FileSplit[shardSplits.length]; Path path = new Path(job.get(HiveConstants.TABLE_LOCATION)); for (int i = 0; i < wrappers.length; i++) { wrappers[i] = new EsHiveSplit(shardSplits[i], path); } return wrappers; }
@Override public List<InputSplit> getSplits(JobContext context) throws IOException { JobConf conf = HadoopCfgUtils.asJobConf(CompatHandler.jobContext(context).getConfiguration()); // NOTE: this method expects a ShardInputSplit to be returned (which implements both the old and the new API). return Arrays.asList((InputSplit[]) getSplits(conf, conf.getNumMapTasks())); }
@Override public List<InputSplit> getSplits(JobContext context) throws IOException { JobConf conf = HadoopCfgUtils.asJobConf(CompatHandler.jobContext(context).getConfiguration()); // NOTE: this method expects a ShardInputSplit to be returned (which implements both the old and the new API). return Arrays.asList((InputSplit[]) getSplits(conf, conf.getNumMapTasks())); }
@Override public List<InputSplit> getSplits(JobContext context) throws IOException { JobConf conf = HadoopCfgUtils.asJobConf(CompatHandler.jobContext(context).getConfiguration()); // NOTE: this method expects a ShardInputSplit to be returned (which implements both the old and the new API). return Arrays.asList((InputSplit[]) getSplits(conf, conf.getNumMapTasks())); }
@Override public List<InputSplit> getSplits(JobContext context) throws IOException { JobConf conf = HadoopCfgUtils.asJobConf(CompatHandler.jobContext(context).getConfiguration()); // NOTE: this method expects a ShardInputSplit to be returned (which implements both the old and the new API). return Arrays.asList((InputSplit[]) getSplits(conf, conf.getNumMapTasks())); }
@Override public FileSplit[] getSplits(JobConf job, int numSplits) throws IOException { // first, merge input table properties (since there's no access to them ...) Settings settings = HadoopSettingsManager.loadFrom(job); //settings.merge(IOUtils.propsFromString(settings.getProperty(HiveConstants.INPUT_TBL_PROPERTIES))); Log log = LogFactory.getLog(getClass()); // move on to initialization InitializationUtils.setValueReaderIfNotSet(settings, HiveValueReader.class, log); if (settings.getOutputAsJson() == false) { // Only set the fields if we aren't asking for raw JSON settings.setProperty(InternalConfigurationOptions.INTERNAL_ES_TARGET_FIELDS, StringUtils.concatenate(HiveUtils.columnToAlias(settings), ",")); } HiveUtils.init(settings, log); // decorate original splits as FileSplit InputSplit[] shardSplits = super.getSplits(job, numSplits); FileSplit[] wrappers = new FileSplit[shardSplits.length]; Path path = new Path(job.get(HiveConstants.TABLE_LOCATION)); for (int i = 0; i < wrappers.length; i++) { wrappers[i] = new EsHiveSplit(shardSplits[i], path); } return wrappers; }