@Override public Properties asProperties() { return HadoopCfgUtils.asProperties(cfg); } }
@Override public void sourceConfInit(FlowProcess<JobConf> flowProcess, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) { conf.setInputFormat(EsInputFormat.class); Settings set = loadSettings(conf, true); Collection<String> fields = CascadingUtils.fieldToAlias(set, getSourceFields()); // load only the necessary fields conf.set(InternalConfigurationOptions.INTERNAL_ES_TARGET_FIELDS, StringUtils.concatenate(fields)); if (log.isTraceEnabled()) { log.trace("Initialized (source) configuration " + HadoopCfgUtils.asProperties(conf)); } }
void init(EsInputSplit esSplit, Configuration cfg, Progressable progressable) { // get a copy to override the host/port Settings settings = HadoopSettingsManager.loadFrom(cfg).copy().load(esSplit.getPartition().getSerializedSettings()); if (log.isTraceEnabled()) { log.trace(String.format("Init shard reader from cfg %s", HadoopCfgUtils.asProperties(cfg))); log.trace(String.format("Init shard reader w/ settings %s", settings)); } this.esSplit = esSplit; // initialize mapping/ scroll reader InitializationUtils.setValueReaderIfNotSet(settings, WritableValueReader.class, log); PartitionDefinition part = esSplit.getPartition(); PartitionReader partitionReader = RestService.createReader(settings, part, log); this.scrollReader = partitionReader.scrollReader; this.client = partitionReader.client; this.queryBuilder = partitionReader.queryBuilder; this.progressable = progressable; // in Hadoop-like envs (Spark) the progressable might be null and thus the heart-beat is not needed if (progressable != null) { beat = new HeartBeat(progressable, cfg, settings.getHeartBeatLead(), log); } if (log.isDebugEnabled()) { log.debug(String.format("Initializing RecordReader for [%s]", esSplit)); } }
protected void init() throws IOException { //int instances = detectNumberOfInstances(cfg); int currentInstance = detectCurrentInstance(cfg); if (log.isTraceEnabled()) { log.trace(String.format("EsRecordWriter instance [%s] initiating discovery of target shard...", currentInstance)); } Settings settings = HadoopSettingsManager.loadFrom(cfg).copy(); if (log.isTraceEnabled()) { log.trace(String.format("Init shard writer from cfg %s", HadoopCfgUtils.asProperties(cfg))); } InitializationUtils.setValueWriterIfNotSet(settings, WritableValueWriter.class, log); InitializationUtils.setBytesConverterIfNeeded(settings, WritableBytesConverter.class, log); InitializationUtils.setFieldExtractorIfNotSet(settings, MapWritableFieldExtractor.class, log); PartitionWriter pw = RestService.createWriter(settings, currentInstance, -1, log); this.repository = pw.repository; if (progressable != null) { this.beat = new HeartBeat(progressable, cfg, settings.getHeartBeatLead(), log); this.beat.start(); } }
@Override public void sinkConfInit(FlowProcess<JobConf> flowProcess, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) { conf.setOutputFormat(EsOutputFormat.class); // define an output dir to prevent Cascading from setting up a TempHfs and overriding the OutputFormat Settings set = loadSettings(conf, false); Log log = LogFactory.getLog(EsTap.class); InitializationUtils.setValueWriterIfNotSet(set, CascadingValueWriter.class, log); InitializationUtils.setValueReaderIfNotSet(set, JdkValueReader.class, log); InitializationUtils.setBytesConverterIfNeeded(set, CascadingLocalBytesConverter.class, log); InitializationUtils.setFieldExtractorIfNotSet(set, CascadingFieldExtractor.class, log); // NB: we need to set this property even though it is not being used - and since and URI causes problem, use only the resource/file //conf.set("mapred.output.dir", set.getTargetUri() + "/" + set.getTargetResource()); HadoopCfgUtils.setFileOutputFormatDir(conf, set.getResourceWrite()); HadoopCfgUtils.setOutputCommitterClass(conf, EsOutputFormat.EsOldAPIOutputCommitter.class.getName()); if (log.isTraceEnabled()) { log.trace("Initialized (sink) configuration " + HadoopCfgUtils.asProperties(conf)); } }
HeartBeat(final Progressable progressable, Configuration cfg, TimeValue lead, final Log log) { Assert.notNull(progressable, "a valid progressable is required to report status to Hadoop"); TimeValue tv = HadoopCfgUtils.getTaskTimeout(cfg); Assert.isTrue(tv.getSeconds() <= 0 || tv.getSeconds() > lead.getSeconds(), "Hadoop timeout is shorter than the heartbeat"); this.progressable = progressable; long cfgMillis = (tv.getMillis() > 0 ? tv.getMillis() : 0); // the task is simple hence the delay = timeout - lead, that is when to start the notification right before the timeout this.delay = new TimeValue(Math.abs(cfgMillis - lead.getMillis()), TimeUnit.MILLISECONDS); this.log = log; String taskId; TaskID taskID = HadoopCfgUtils.getTaskID(cfg); if (taskID == null) { log.warn("Cannot determine task id..."); taskId = "<unknown>"; if (log.isTraceEnabled()) { log.trace("Current configuration is " + HadoopCfgUtils.asProperties(cfg)); } } else { taskId = "" + taskID; } id = taskId; }
@Override public Properties asProperties() { return HadoopCfgUtils.asProperties(cfg); } }
@Override public Properties asProperties() { return HadoopCfgUtils.asProperties(cfg); } }
@Override public Properties asProperties() { return HadoopCfgUtils.asProperties(cfg); } }
@Override public Properties asProperties() { return HadoopCfgUtils.asProperties(cfg); } }
@Override public void sourceConfInit(FlowProcess<JobConf> flowProcess, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) { conf.setInputFormat(EsInputFormat.class); Settings set = loadSettings(conf, true); Collection<String> fields = CascadingUtils.fieldToAlias(set, getSourceFields()); // load only the necessary fields conf.set(InternalConfigurationOptions.INTERNAL_ES_TARGET_FIELDS, StringUtils.concatenate(fields)); if (log.isTraceEnabled()) { log.trace("Initialized (source) configuration " + HadoopCfgUtils.asProperties(conf)); } }
void init(EsInputSplit esSplit, Configuration cfg, Progressable progressable) { // get a copy to override the host/port Settings settings = HadoopSettingsManager.loadFrom(cfg).copy().load(esSplit.getPartition().getSerializedSettings()); if (log.isTraceEnabled()) { log.trace(String.format("Init shard reader from cfg %s", HadoopCfgUtils.asProperties(cfg))); log.trace(String.format("Init shard reader w/ settings %s", settings)); } this.esSplit = esSplit; // initialize mapping/ scroll reader InitializationUtils.setValueReaderIfNotSet(settings, WritableValueReader.class, log); PartitionDefinition part = esSplit.getPartition(); PartitionReader partitionReader = RestService.createReader(settings, part, log); this.scrollReader = partitionReader.scrollReader; this.client = partitionReader.client; this.queryBuilder = partitionReader.queryBuilder; this.progressable = progressable; // in Hadoop-like envs (Spark) the progressable might be null and thus the heart-beat is not needed if (progressable != null) { beat = new HeartBeat(progressable, cfg, settings.getHeartBeatLead(), log); } if (log.isDebugEnabled()) { log.debug(String.format("Initializing RecordReader for [%s]", esSplit)); } }
protected void init() throws IOException { //int instances = detectNumberOfInstances(cfg); int currentInstance = detectCurrentInstance(cfg); if (log.isTraceEnabled()) { log.trace(String.format("EsRecordWriter instance [%s] initiating discovery of target shard...", currentInstance)); } Settings settings = HadoopSettingsManager.loadFrom(cfg).copy(); if (log.isTraceEnabled()) { log.trace(String.format("Init shard writer from cfg %s", HadoopCfgUtils.asProperties(cfg))); } InitializationUtils.setValueWriterIfNotSet(settings, WritableValueWriter.class, log); InitializationUtils.setBytesConverterIfNeeded(settings, WritableBytesConverter.class, log); InitializationUtils.setFieldExtractorIfNotSet(settings, MapWritableFieldExtractor.class, log); PartitionWriter pw = RestService.createWriter(settings, currentInstance, -1, log); this.repository = pw.repository; if (progressable != null) { this.beat = new HeartBeat(progressable, cfg, settings.getHeartBeatLead(), log); this.beat.start(); } }
protected void init() throws IOException { //int instances = detectNumberOfInstances(cfg); int currentInstance = detectCurrentInstance(cfg); if (log.isTraceEnabled()) { log.trace(String.format("EsRecordWriter instance [%s] initiating discovery of target shard...", currentInstance)); } Settings settings = HadoopSettingsManager.loadFrom(cfg).copy(); if (log.isTraceEnabled()) { log.trace(String.format("Init shard writer from cfg %s", HadoopCfgUtils.asProperties(cfg))); } InitializationUtils.setValueWriterIfNotSet(settings, WritableValueWriter.class, log); InitializationUtils.setBytesConverterIfNeeded(settings, WritableBytesConverter.class, log); InitializationUtils.setFieldExtractorIfNotSet(settings, MapWritableFieldExtractor.class, log); PartitionWriter pw = RestService.createWriter(settings, currentInstance, -1, log); this.repository = pw.repository; if (progressable != null) { this.beat = new HeartBeat(progressable, cfg, settings.getHeartBeatLead(), log); this.beat.start(); } }
protected void init() throws IOException { //int instances = detectNumberOfInstances(cfg); int currentInstance = detectCurrentInstance(cfg); if (log.isTraceEnabled()) { log.trace(String.format("EsRecordWriter instance [%s] initiating discovery of target shard...", currentInstance)); } Settings settings = HadoopSettingsManager.loadFrom(cfg).copy(); if (log.isTraceEnabled()) { log.trace(String.format("Init shard writer from cfg %s", HadoopCfgUtils.asProperties(cfg))); } InitializationUtils.setValueWriterIfNotSet(settings, WritableValueWriter.class, log); InitializationUtils.setBytesConverterIfNeeded(settings, WritableBytesConverter.class, log); InitializationUtils.setFieldExtractorIfNotSet(settings, MapWritableFieldExtractor.class, log); PartitionWriter pw = RestService.createWriter(settings, currentInstance, -1, log); this.repository = pw.repository; if (progressable != null) { this.beat = new HeartBeat(progressable, cfg, settings.getHeartBeatLead(), log); this.beat.start(); } }
@Override public void sinkConfInit(FlowProcess<JobConf> flowProcess, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) { conf.setOutputFormat(EsOutputFormat.class); // define an output dir to prevent Cascading from setting up a TempHfs and overriding the OutputFormat Settings set = loadSettings(conf, false); Log log = LogFactory.getLog(EsTap.class); InitializationUtils.setValueWriterIfNotSet(set, CascadingValueWriter.class, log); InitializationUtils.setValueReaderIfNotSet(set, JdkValueReader.class, log); InitializationUtils.setBytesConverterIfNeeded(set, CascadingLocalBytesConverter.class, log); InitializationUtils.setFieldExtractorIfNotSet(set, CascadingFieldExtractor.class, log); // NB: we need to set this property even though it is not being used - and since and URI causes problem, use only the resource/file //conf.set("mapred.output.dir", set.getTargetUri() + "/" + set.getTargetResource()); HadoopCfgUtils.setFileOutputFormatDir(conf, set.getResourceWrite()); HadoopCfgUtils.setOutputCommitterClass(conf, EsOutputFormat.EsOldAPIOutputCommitter.class.getName()); if (log.isTraceEnabled()) { log.trace("Initialized (sink) configuration " + HadoopCfgUtils.asProperties(conf)); } }
protected void init() throws IOException { //int instances = detectNumberOfInstances(cfg); int currentInstance = detectCurrentInstance(cfg); if (log.isTraceEnabled()) { log.trace(String.format("EsRecordWriter instance [%s] initiating discovery of target shard...", currentInstance)); } Settings settings = HadoopSettingsManager.loadFrom(cfg).copy(); if (log.isTraceEnabled()) { log.trace(String.format("Init shard writer from cfg %s", HadoopCfgUtils.asProperties(cfg))); } InitializationUtils.setValueWriterIfNotSet(settings, WritableValueWriter.class, log); InitializationUtils.setBytesConverterIfNeeded(settings, WritableBytesConverter.class, log); InitializationUtils.setFieldExtractorIfNotSet(settings, MapWritableFieldExtractor.class, log); PartitionWriter pw = RestService.createWriter(settings, currentInstance, -1, log); this.repository = pw.repository; if (progressable != null) { this.beat = new HeartBeat(progressable, cfg, settings.getHeartBeatLead(), log); this.beat.start(); } }
HeartBeat(final Progressable progressable, Configuration cfg, TimeValue lead, final Log log) { Assert.notNull(progressable, "a valid progressable is required to report status to Hadoop"); TimeValue tv = HadoopCfgUtils.getTaskTimeout(cfg); Assert.isTrue(tv.getSeconds() <= 0 || tv.getSeconds() > lead.getSeconds(), "Hadoop timeout is shorter than the heartbeat"); this.progressable = progressable; long cfgMillis = (tv.getMillis() > 0 ? tv.getMillis() : 0); // the task is simple hence the delay = timeout - lead, that is when to start the notification right before the timeout this.delay = new TimeValue(Math.abs(cfgMillis - lead.getMillis()), TimeUnit.MILLISECONDS); this.log = log; String taskId; TaskID taskID = HadoopCfgUtils.getTaskID(cfg); if (taskID == null) { log.warn("Cannot determine task id..."); taskId = "<unknown>"; if (log.isTraceEnabled()) { log.trace("Current configuration is " + HadoopCfgUtils.asProperties(cfg)); } } else { taskId = "" + taskID; } id = taskId; }
HeartBeat(final Progressable progressable, Configuration cfg, TimeValue lead, final Log log) { Assert.notNull(progressable, "a valid progressable is required to report status to Hadoop"); TimeValue tv = HadoopCfgUtils.getTaskTimeout(cfg); Assert.isTrue(tv.getSeconds() <= 0 || tv.getSeconds() > lead.getSeconds(), "Hadoop timeout is shorter than the heartbeat"); this.progressable = progressable; long cfgMillis = (tv.getMillis() > 0 ? tv.getMillis() : 0); // the task is simple hence the delay = timeout - lead, that is when to start the notification right before the timeout this.delay = new TimeValue(Math.abs(cfgMillis - lead.getMillis()), TimeUnit.MILLISECONDS); this.log = log; String taskId; TaskID taskID = HadoopCfgUtils.getTaskID(cfg); if (taskID == null) { log.warn("Cannot determine task id..."); taskId = "<unknown>"; if (log.isTraceEnabled()) { log.trace("Current configuration is " + HadoopCfgUtils.asProperties(cfg)); } } else { taskId = "" + taskID; } id = taskId; }
HeartBeat(final Progressable progressable, Configuration cfg, TimeValue lead, final Log log) { Assert.notNull(progressable, "a valid progressable is required to report status to Hadoop"); TimeValue tv = HadoopCfgUtils.getTaskTimeout(cfg); Assert.isTrue(tv.getSeconds() <= 0 || tv.getSeconds() > lead.getSeconds(), "Hadoop timeout is shorter than the heartbeat"); this.progressable = progressable; long cfgMillis = (tv.getMillis() > 0 ? tv.getMillis() : 0); // the task is simple hence the delay = timeout - lead, that is when to start the notification right before the timeout this.delay = new TimeValue(Math.abs(cfgMillis - lead.getMillis()), TimeUnit.MILLISECONDS); this.log = log; String taskId; TaskID taskID = HadoopCfgUtils.getTaskID(cfg); if (taskID == null) { log.warn("Cannot determine task id..."); taskId = "<unknown>"; if (log.isTraceEnabled()) { log.trace("Current configuration is " + HadoopCfgUtils.asProperties(cfg)); } } else { taskId = "" + taskID; } id = taskId; }