HeartBeat(final Progressable progressable, Configuration cfg, TimeValue lead, final Log log) { Assert.notNull(progressable, "a valid progressable is required to report status to Hadoop"); TimeValue tv = HadoopCfgUtils.getTaskTimeout(cfg); Assert.isTrue(tv.getSeconds() <= 0 || tv.getSeconds() > lead.getSeconds(), "Hadoop timeout is shorter than the heartbeat"); this.progressable = progressable; long cfgMillis = (tv.getMillis() > 0 ? tv.getMillis() : 0); // the task is simple hence the delay = timeout - lead, that is when to start the notification right before the timeout this.delay = new TimeValue(Math.abs(cfgMillis - lead.getMillis()), TimeUnit.MILLISECONDS); this.log = log; String taskId; TaskID taskID = HadoopCfgUtils.getTaskID(cfg); if (taskID == null) { log.warn("Cannot determine task id..."); taskId = "<unknown>"; if (log.isTraceEnabled()) { log.trace("Current configuration is " + HadoopCfgUtils.asProperties(cfg)); } } else { taskId = "" + taskID; } id = taskId; }
@Override public List<InputSplit> getSplits(JobContext context) throws IOException { JobConf conf = HadoopCfgUtils.asJobConf(CompatHandler.jobContext(context).getConfiguration()); // NOTE: this method expects a ShardInputSplit to be returned (which implements both the old and the new API). return Arrays.asList((InputSplit[]) getSplits(conf, conf.getNumMapTasks())); }
public static String getFileSystem(Configuration cfg) { return get(cfg, "fs.defaultFS", "fs.default.name"); }
public static TaskID getTaskID(Configuration cfg) { // first try with the attempt since some Hadoop versions mix the two String taskAttemptId = HadoopCfgUtils.getTaskAttemptId(cfg); if (StringUtils.hasText(taskAttemptId)) { try { return TaskAttemptID.forName(taskAttemptId).getTaskID(); } catch (IllegalArgumentException ex) { // the task attempt is invalid (Tez in particular uses the wrong string - see #346) // try to fallback to task id return parseTaskIdFromTaskAttemptId(taskAttemptId); } } String taskIdProp = HadoopCfgUtils.getTaskId(cfg); // double-check task id bug in Hadoop 2.5.x if (StringUtils.hasText(taskIdProp) && !taskIdProp.contains("attempt")) { return TaskID.forName(taskIdProp); } return null; }
@Override public void sinkConfInit(FlowProcess<JobConf> flowProcess, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) { conf.setOutputFormat(EsOutputFormat.class); // define an output dir to prevent Cascading from setting up a TempHfs and overriding the OutputFormat Settings set = loadSettings(conf, false); Log log = LogFactory.getLog(EsTap.class); InitializationUtils.setValueWriterIfNotSet(set, CascadingValueWriter.class, log); InitializationUtils.setValueReaderIfNotSet(set, JdkValueReader.class, log); InitializationUtils.setBytesConverterIfNeeded(set, CascadingLocalBytesConverter.class, log); InitializationUtils.setFieldExtractorIfNotSet(set, CascadingFieldExtractor.class, log); // NB: we need to set this property even though it is not being used - and since and URI causes problem, use only the resource/file //conf.set("mapred.output.dir", set.getTargetUri() + "/" + set.getTargetResource()); HadoopCfgUtils.setFileOutputFormatDir(conf, set.getResourceWrite()); HadoopCfgUtils.setOutputCommitterClass(conf, EsOutputFormat.EsOldAPIOutputCommitter.class.getName()); if (log.isTraceEnabled()) { log.trace("Initialized (sink) configuration " + HadoopCfgUtils.asProperties(conf)); } }
private void init(Configuration cfg) throws IOException { Settings settings = HadoopSettingsManager.loadFrom(cfg); Assert.hasText(settings.getResourceWrite(), String.format("No resource ['%s'] (index/query/location) specified", ES_RESOURCE)); // Need to discover the ESVersion before checking if index exists. InitializationUtils.discoverEsVersion(settings, log); InitializationUtils.checkIdForOperation(settings); InitializationUtils.checkIndexExistence(settings); if (HadoopCfgUtils.getReduceTasks(cfg) != null) { if (HadoopCfgUtils.getSpeculativeReduce(cfg)) { log.warn("Speculative execution enabled for reducer - consider disabling it to prevent data corruption"); } } else { if (HadoopCfgUtils.getSpeculativeMap(cfg)) { log.warn("Speculative execution enabled for mapper - consider disabling it to prevent data corruption"); } } //log.info(String.format("Starting to write/index to [%s][%s]", settings.getTargetUri(), settings.getTargetResource())); } }
@Override public Properties asProperties() { return HadoopCfgUtils.asProperties(cfg); } }
private int detectCurrentInstance(Configuration conf) { TaskID taskID = HadoopCfgUtils.getTaskID(conf); if (taskID == null) { log.warn(String.format("Cannot determine task id - redirecting writes in a random fashion")); return NO_TASK_ID; } return taskID.getId(); }
@Override void init(EsInputSplit esSplit, Configuration cfg, Progressable progressable) { useLinkedMapWritable = (!MapWritable.class.getName().equals(HadoopCfgUtils.getMapValueClass(cfg))); super.init(esSplit, cfg, progressable); }
public static boolean isLocal(Configuration cfg) { return "local".equals(cfg.get("mapreduce.framework.name")) || "local".equals(getJobTracker(cfg)); }
private void init(Configuration cfg) throws IOException { Settings settings = HadoopSettingsManager.loadFrom(cfg); Assert.hasText(settings.getResourceWrite(), String.format("No resource ['%s'] (index/query/location) specified", ES_RESOURCE)); // lazy-init RestRepository client = null; InitializationUtils.checkIdForOperation(settings); InitializationUtils.checkIndexExistence(settings, client); if (HadoopCfgUtils.getReduceTasks(cfg) != null) { if (HadoopCfgUtils.getSpeculativeReduce(cfg)) { log.warn("Speculative execution enabled for reducer - consider disabling it to prevent data corruption"); } } else { if (HadoopCfgUtils.getSpeculativeMap(cfg)) { log.warn("Speculative execution enabled for mapper - consider disabling it to prevent data corruption"); } } //log.info(String.format("Starting to write/index to [%s][%s]", settings.getTargetUri(), settings.getTargetResource())); } }
public static TaskID getTaskID(Configuration cfg) { // first try with the attempt since some Hadoop versions mix the two String taskAttemptId = HadoopCfgUtils.getTaskAttemptId(cfg); if (StringUtils.hasText(taskAttemptId)) { try { return TaskAttemptID.forName(taskAttemptId).getTaskID(); } catch (IllegalArgumentException ex) { // the task attempt is invalid (Tez in particular uses the wrong string - see #346) // try to fallback to task id return parseTaskIdFromTaskAttemptId(taskAttemptId); } } String taskIdProp = HadoopCfgUtils.getTaskId(cfg); // double-check task id bug in Hadoop 2.5.x if (StringUtils.hasText(taskIdProp) && !taskIdProp.contains("attempt")) { return TaskID.forName(taskIdProp); } return null; }
@Override public void sourceConfInit(FlowProcess<JobConf> flowProcess, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) { conf.setInputFormat(EsInputFormat.class); Settings set = loadSettings(conf, true); Collection<String> fields = CascadingUtils.fieldToAlias(set, getSourceFields()); // load only the necessary fields conf.set(InternalConfigurationOptions.INTERNAL_ES_TARGET_FIELDS, StringUtils.concatenate(fields)); if (log.isTraceEnabled()) { log.trace("Initialized (source) configuration " + HadoopCfgUtils.asProperties(conf)); } }
@Override public void sinkConfInit(FlowProcess<JobConf> flowProcess, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) { conf.setOutputFormat(EsOutputFormat.class); // define an output dir to prevent Cascading from setting up a TempHfs and overriding the OutputFormat Settings set = loadSettings(conf, false); Log log = LogFactory.getLog(EsTap.class); InitializationUtils.setValueWriterIfNotSet(set, CascadingValueWriter.class, log); InitializationUtils.setValueReaderIfNotSet(set, JdkValueReader.class, log); InitializationUtils.setBytesConverterIfNeeded(set, CascadingLocalBytesConverter.class, log); InitializationUtils.setFieldExtractorIfNotSet(set, CascadingFieldExtractor.class, log); // NB: we need to set this property even though it is not being used - and since and URI causes problem, use only the resource/file //conf.set("mapred.output.dir", set.getTargetUri() + "/" + set.getTargetResource()); HadoopCfgUtils.setFileOutputFormatDir(conf, set.getResourceWrite()); HadoopCfgUtils.setOutputCommitterClass(conf, EsOutputFormat.EsOldAPIOutputCommitter.class.getName()); if (log.isTraceEnabled()) { log.trace("Initialized (sink) configuration " + HadoopCfgUtils.asProperties(conf)); } }
private int detectCurrentInstance(Configuration conf) { TaskID taskID = HadoopCfgUtils.getTaskID(conf); if (taskID == null) { log.warn(String.format("Cannot determine task id - redirecting writes in a random fashion")); return NO_TASK_ID; } return taskID.getId(); }
@Override void init(EsInputSplit esSplit, Configuration cfg, Progressable progressable) { useLinkedMapWritable = (!MapWritable.class.getName().equals(HadoopCfgUtils.getMapValueClass(cfg))); super.init(esSplit, cfg, progressable); }
public static boolean isLocal(Configuration cfg) { return "local".equals(cfg.get("mapreduce.framework.name")) || "local".equals(getJobTracker(cfg)); }
HeartBeat(final Progressable progressable, Configuration cfg, TimeValue lead, final Log log) { Assert.notNull(progressable, "a valid progressable is required to report status to Hadoop"); TimeValue tv = HadoopCfgUtils.getTaskTimeout(cfg); Assert.isTrue(tv.getSeconds() <= 0 || tv.getSeconds() > lead.getSeconds(), "Hadoop timeout is shorter than the heartbeat"); this.progressable = progressable; long cfgMillis = (tv.getMillis() > 0 ? tv.getMillis() : 0); // the task is simple hence the delay = timeout - lead, that is when to start the notification right before the timeout this.delay = new TimeValue(Math.abs(cfgMillis - lead.getMillis()), TimeUnit.MILLISECONDS); this.log = log; String taskId; TaskID taskID = HadoopCfgUtils.getTaskID(cfg); if (taskID == null) { log.warn("Cannot determine task id..."); taskId = "<unknown>"; if (log.isTraceEnabled()) { log.trace("Current configuration is " + HadoopCfgUtils.asProperties(cfg)); } } else { taskId = "" + taskID; } id = taskId; }
private void init(Configuration cfg) throws IOException { Settings settings = HadoopSettingsManager.loadFrom(cfg); Assert.hasText(settings.getResourceWrite(), String.format("No resource ['%s'] (index/query/location) specified", ES_RESOURCE)); // Need to discover the ESVersion before checking if index exists. InitializationUtils.discoverEsVersion(settings, log); InitializationUtils.checkIdForOperation(settings); InitializationUtils.checkIndexExistence(settings); if (HadoopCfgUtils.getReduceTasks(cfg) != null) { if (HadoopCfgUtils.getSpeculativeReduce(cfg)) { log.warn("Speculative execution enabled for reducer - consider disabling it to prevent data corruption"); } } else { if (HadoopCfgUtils.getSpeculativeMap(cfg)) { log.warn("Speculative execution enabled for mapper - consider disabling it to prevent data corruption"); } } //log.info(String.format("Starting to write/index to [%s][%s]", settings.getTargetUri(), settings.getTargetResource())); } }
public static TaskID getTaskID(Configuration cfg) { // first try with the attempt since some Hadoop versions mix the two String taskAttemptId = HadoopCfgUtils.getTaskAttemptId(cfg); if (StringUtils.hasText(taskAttemptId)) { try { return TaskAttemptID.forName(taskAttemptId).getTaskID(); } catch (IllegalArgumentException ex) { // the task attempt is invalid (Tez in particular uses the wrong string - see #346) // try to fallback to task id return parseTaskIdFromTaskAttemptId(taskAttemptId); } } String taskIdProp = HadoopCfgUtils.getTaskId(cfg); // double-check task id bug in Hadoop 2.5.x if (StringUtils.hasText(taskIdProp) && !taskIdProp.contains("attempt")) { return TaskID.forName(taskIdProp); } return null; }