@Override public void sinkConfInit(FlowProcess<JobConf> flowProcess, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) { conf.setOutputFormat(EsOutputFormat.class); // define an output dir to prevent Cascading from setting up a TempHfs and overriding the OutputFormat Settings set = loadSettings(conf, false); Log log = LogFactory.getLog(EsTap.class); InitializationUtils.setValueWriterIfNotSet(set, CascadingValueWriter.class, log); InitializationUtils.setValueReaderIfNotSet(set, JdkValueReader.class, log); InitializationUtils.setBytesConverterIfNeeded(set, CascadingLocalBytesConverter.class, log); InitializationUtils.setFieldExtractorIfNotSet(set, CascadingFieldExtractor.class, log); // NB: we need to set this property even though it is not being used - and since and URI causes problem, use only the resource/file //conf.set("mapred.output.dir", set.getTargetUri() + "/" + set.getTargetResource()); HadoopCfgUtils.setFileOutputFormatDir(conf, set.getResourceWrite()); HadoopCfgUtils.setOutputCommitterClass(conf, EsOutputFormat.EsOldAPIOutputCommitter.class.getName()); if (log.isTraceEnabled()) { log.trace("Initialized (sink) configuration " + HadoopCfgUtils.asProperties(conf)); } }
protected void init() throws IOException { //int instances = detectNumberOfInstances(cfg); int currentInstance = detectCurrentInstance(cfg); if (log.isTraceEnabled()) { log.trace(String.format("EsRecordWriter instance [%s] initiating discovery of target shard...", currentInstance)); } Settings settings = HadoopSettingsManager.loadFrom(cfg).copy(); if (log.isTraceEnabled()) { log.trace(String.format("Init shard writer from cfg %s", HadoopCfgUtils.asProperties(cfg))); } InitializationUtils.setValueWriterIfNotSet(settings, WritableValueWriter.class, log); InitializationUtils.setBytesConverterIfNeeded(settings, WritableBytesConverter.class, log); InitializationUtils.setFieldExtractorIfNotSet(settings, MapWritableFieldExtractor.class, log); PartitionWriter pw = RestService.createWriter(settings, currentInstance, -1, log); this.repository = pw.repository; if (progressable != null) { this.beat = new HeartBeat(progressable, cfg, settings.getHeartBeatLead(), log); this.beat.start(); } }
public EsHiveRecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath, Class valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) { // force the table properties to be merged into the configuration // NB: the properties are also available in HiveConstants#OUTPUT_TBL_PROPERTIES Settings settings = HadoopSettingsManager.loadFrom(jc).merge(tableProperties); Log log = LogFactory.getLog(getClass()); // NB: ESSerDe is already initialized at this stage but should still have a reference to the same cfg object // NB: the value writer is not needed by Hive but it's set for consistency and debugging purposes InitializationUtils.setValueWriterIfNotSet(settings, HiveValueWriter.class, log); InitializationUtils.setBytesConverterIfNeeded(settings, HiveBytesConverter.class, log); // set write resource settings.setResourceWrite(settings.getResourceWrite()); HiveUtils.init(settings, log); return new EsHiveRecordWriter(jc, progress); } }
@Override public void prepare(Map conf, TopologyContext context, OutputCollector collector) { this.collector = collector; LinkedHashMap copy = new LinkedHashMap(conf); copy.putAll(boltConfig); StormSettings settings = new StormSettings(copy); flushOnTickTuple = settings.getStormTickTupleFlush(); ackWrites = settings.getStormBoltAck(); // trigger manual flush if (ackWrites) { settings.setProperty(ES_BATCH_FLUSH_MANUAL, Boolean.TRUE.toString()); // align Bolt / es-hadoop batch settings numberOfEntries = settings.getStormBulkSize(); settings.setProperty(ES_BATCH_SIZE_ENTRIES, String.valueOf(numberOfEntries)); inflightTuples = new ArrayList<Tuple>(numberOfEntries + 1); } int totalTasks = context.getComponentTasks(context.getThisComponentId()).size(); InitializationUtils.setValueWriterIfNotSet(settings, StormValueWriter.class, log); InitializationUtils.setBytesConverterIfNeeded(settings, StormTupleBytesConverter.class, log); InitializationUtils.setFieldExtractorIfNotSet(settings, StormTupleFieldExtractor.class, log); writer = RestService.createWriter(settings, context.getThisTaskIndex(), totalTasks, log); }
private void lazyInitializeWrite() { if (writeInitialized) { return; } writeInitialized = true; // We want to use just the table properties here, but we need to add the internal version to the settings. // We don't want to mutate the underlying table properties (the settings implementations differ greatly on // their mutability) so we just use a composite settings object. Settings tableSettings = HadoopSettingsManager.loadFrom(tableProperties); Settings versionSetting = new PropertiesSettings(); versionSetting.setInternalVersion(version); Settings settings = new CompositeSettings(Arrays.asList(versionSetting, tableSettings)); InitializationUtils.setValueWriterIfNotSet(settings, HiveValueWriter.class, log); InitializationUtils.setFieldExtractorIfNotSet(settings, HiveFieldExtractor.class, log); InitializationUtils.setBytesConverterIfNeeded(settings, HiveBytesConverter.class, log); this.command = BulkCommands.create(settings, null, version); }
private void init(String location, Job job, boolean read) { Settings settings = HadoopSettingsManager.loadFrom(job.getConfiguration()).merge(properties); settings = (read ? settings.setResourceRead(location) : settings.setResourceWrite(location)); InitializationUtils.checkIdForOperation(settings); InitializationUtils.setValueWriterIfNotSet(settings, PigValueWriter.class, log); InitializationUtils.setValueReaderIfNotSet(settings, PigValueReader.class, log); InitializationUtils.setBytesConverterIfNeeded(settings, PigBytesConverter.class, log); InitializationUtils.setFieldExtractorIfNotSet(settings, PigFieldExtractor.class, log); isJSON = settings.getOutputAsJson(); }
static Settings addDefaultsToSettings(Properties flowProperties, Properties tapProperties, Log log) { Settings settings = HadoopSettingsManager.loadFrom(CascadingUtils.extractOriginalProperties(flowProperties)).merge(tapProperties); InitializationUtils.validateSettings(settings); InitializationUtils.setValueWriterIfNotSet(settings, CascadingValueWriter.class, log); InitializationUtils.setValueReaderIfNotSet(settings, JdkValueReader.class, log); InitializationUtils.setBytesConverterIfNeeded(settings, CascadingLocalBytesConverter.class, log); InitializationUtils.setFieldExtractorIfNotSet(settings, CascadingFieldExtractor.class, log); return settings; }
private Settings settings() { Settings set = new TestSettings(); set.setInternalVersion(version); set.setProperty(ConfigurationOptions.ES_INPUT_JSON, Boolean.toString(jsonInput)); InitializationUtils.setValueWriterIfNotSet(set, JdkValueWriter.class, null); InitializationUtils.setFieldExtractorIfNotSet(set, MapFieldExtractor.class, null); InitializationUtils.setBytesConverterIfNeeded(set, JdkBytesConverter.class, null); set.setProperty(ConfigurationOptions.ES_WRITE_OPERATION, operation); set.setResourceWrite("foo/bar"); if (isUpdateOp()) { set.setProperty(ConfigurationOptions.ES_MAPPING_ID, "<2>"); } return set; }
private void lazyInitializeWrite() { if (writeInitialized) { return; } writeInitialized = true; Settings settings = HadoopSettingsManager.loadFrom(tableProperties); InitializationUtils.setValueWriterIfNotSet(settings, HiveValueWriter.class, log); InitializationUtils.setFieldExtractorIfNotSet(settings, HiveFieldExtractor.class, log); InitializationUtils.setBytesConverterIfNeeded(settings, HiveBytesConverter.class, log); this.command = BulkCommands.create(settings, null, version); }
public EsHiveRecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath, Class valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) { // force the table properties to be merged into the configuration // NB: the properties are also available in HiveConstants#OUTPUT_TBL_PROPERTIES Settings settings = HadoopSettingsManager.loadFrom(jc).merge(tableProperties); Log log = LogFactory.getLog(getClass()); // NB: ESSerDe is already initialized at this stage but should still have a reference to the same cfg object // NB: the value writer is not needed by Hive but it's set for consistency and debugging purposes InitializationUtils.setValueWriterIfNotSet(settings, HiveValueWriter.class, log); InitializationUtils.setBytesConverterIfNeeded(settings, HiveBytesConverter.class, log); // set write resource settings.setResourceWrite(settings.getResourceWrite()); HiveUtils.init(settings, log); return new EsHiveRecordWriter(jc, progress); } }
protected void init() throws IOException { //int instances = detectNumberOfInstances(cfg); int currentInstance = detectCurrentInstance(cfg); if (log.isTraceEnabled()) { log.trace(String.format("EsRecordWriter instance [%s] initiating discovery of target shard...", currentInstance)); } Settings settings = HadoopSettingsManager.loadFrom(cfg).copy(); if (log.isTraceEnabled()) { log.trace(String.format("Init shard writer from cfg %s", HadoopCfgUtils.asProperties(cfg))); } InitializationUtils.setValueWriterIfNotSet(settings, WritableValueWriter.class, log); InitializationUtils.setBytesConverterIfNeeded(settings, WritableBytesConverter.class, log); InitializationUtils.setFieldExtractorIfNotSet(settings, MapWritableFieldExtractor.class, log); PartitionWriter pw = RestService.createWriter(settings, currentInstance, -1, log); this.repository = pw.repository; if (progressable != null) { this.beat = new HeartBeat(progressable, cfg, settings.getHeartBeatLead(), log); this.beat.start(); } }
protected void init() throws IOException { //int instances = detectNumberOfInstances(cfg); int currentInstance = detectCurrentInstance(cfg); if (log.isTraceEnabled()) { log.trace(String.format("EsRecordWriter instance [%s] initiating discovery of target shard...", currentInstance)); } Settings settings = HadoopSettingsManager.loadFrom(cfg).copy(); if (log.isTraceEnabled()) { log.trace(String.format("Init shard writer from cfg %s", HadoopCfgUtils.asProperties(cfg))); } InitializationUtils.setValueWriterIfNotSet(settings, WritableValueWriter.class, log); InitializationUtils.setBytesConverterIfNeeded(settings, WritableBytesConverter.class, log); InitializationUtils.setFieldExtractorIfNotSet(settings, MapWritableFieldExtractor.class, log); PartitionWriter pw = RestService.createWriter(settings, currentInstance, -1, log); this.repository = pw.repository; if (progressable != null) { this.beat = new HeartBeat(progressable, cfg, settings.getHeartBeatLead(), log); this.beat.start(); } }
@Override public void sinkConfInit(FlowProcess<JobConf> flowProcess, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) { conf.setOutputFormat(EsOutputFormat.class); // define an output dir to prevent Cascading from setting up a TempHfs and overriding the OutputFormat Settings set = loadSettings(conf, false); Log log = LogFactory.getLog(EsTap.class); InitializationUtils.setValueWriterIfNotSet(set, CascadingValueWriter.class, log); InitializationUtils.setValueReaderIfNotSet(set, JdkValueReader.class, log); InitializationUtils.setBytesConverterIfNeeded(set, CascadingLocalBytesConverter.class, log); InitializationUtils.setFieldExtractorIfNotSet(set, CascadingFieldExtractor.class, log); // NB: we need to set this property even though it is not being used - and since and URI causes problem, use only the resource/file //conf.set("mapred.output.dir", set.getTargetUri() + "/" + set.getTargetResource()); HadoopCfgUtils.setFileOutputFormatDir(conf, set.getResourceWrite()); HadoopCfgUtils.setOutputCommitterClass(conf, EsOutputFormat.EsOldAPIOutputCommitter.class.getName()); if (log.isTraceEnabled()) { log.trace("Initialized (sink) configuration " + HadoopCfgUtils.asProperties(conf)); } }
protected void init() throws IOException { //int instances = detectNumberOfInstances(cfg); int currentInstance = detectCurrentInstance(cfg); if (log.isTraceEnabled()) { log.trace(String.format("EsRecordWriter instance [%s] initiating discovery of target shard...", currentInstance)); } Settings settings = HadoopSettingsManager.loadFrom(cfg).copy(); if (log.isTraceEnabled()) { log.trace(String.format("Init shard writer from cfg %s", HadoopCfgUtils.asProperties(cfg))); } InitializationUtils.setValueWriterIfNotSet(settings, WritableValueWriter.class, log); InitializationUtils.setBytesConverterIfNeeded(settings, WritableBytesConverter.class, log); InitializationUtils.setFieldExtractorIfNotSet(settings, MapWritableFieldExtractor.class, log); PartitionWriter pw = RestService.createWriter(settings, currentInstance, -1, log); this.repository = pw.repository; if (progressable != null) { this.beat = new HeartBeat(progressable, cfg, settings.getHeartBeatLead(), log); this.beat.start(); } }
protected void init() throws IOException { //int instances = detectNumberOfInstances(cfg); int currentInstance = detectCurrentInstance(cfg); if (log.isTraceEnabled()) { log.trace(String.format("EsRecordWriter instance [%s] initiating discovery of target shard...", currentInstance)); } Settings settings = HadoopSettingsManager.loadFrom(cfg).copy(); if (log.isTraceEnabled()) { log.trace(String.format("Init shard writer from cfg %s", HadoopCfgUtils.asProperties(cfg))); } InitializationUtils.setValueWriterIfNotSet(settings, WritableValueWriter.class, log); InitializationUtils.setBytesConverterIfNeeded(settings, WritableBytesConverter.class, log); InitializationUtils.setFieldExtractorIfNotSet(settings, MapWritableFieldExtractor.class, log); PartitionWriter pw = RestService.createWriter(settings, currentInstance, -1, log); this.repository = pw.repository; if (progressable != null) { this.beat = new HeartBeat(progressable, cfg, settings.getHeartBeatLead(), log); this.beat.start(); } }
@Override public void prepare(Map conf, TopologyContext context, OutputCollector collector) { this.collector = collector; LinkedHashMap copy = new LinkedHashMap(conf); copy.putAll(boltConfig); StormSettings settings = new StormSettings(copy); flushOnTickTuple = settings.getStormTickTupleFlush(); ackWrites = settings.getStormBoltAck(); // trigger manual flush if (ackWrites) { settings.setProperty(ES_BATCH_FLUSH_MANUAL, Boolean.TRUE.toString()); // align Bolt / es-hadoop batch settings numberOfEntries = settings.getStormBulkSize(); settings.setProperty(ES_BATCH_SIZE_ENTRIES, String.valueOf(numberOfEntries)); inflightTuples = new ArrayList<Tuple>(numberOfEntries + 1); } int totalTasks = context.getComponentTasks(context.getThisComponentId()).size(); InitializationUtils.setValueWriterIfNotSet(settings, StormValueWriter.class, log); InitializationUtils.setBytesConverterIfNeeded(settings, StormTupleBytesConverter.class, log); InitializationUtils.setFieldExtractorIfNotSet(settings, StormTupleFieldExtractor.class, log); writer = RestService.createWriter(settings, context.getThisTaskIndex(), totalTasks, log); }
private void init(String location, Job job, boolean read) { Settings settings = HadoopSettingsManager.loadFrom(job.getConfiguration()).merge(properties); settings = (read ? settings.setResourceRead(location) : settings.setResourceWrite(location)); InitializationUtils.checkIdForOperation(settings); InitializationUtils.setValueWriterIfNotSet(settings, PigValueWriter.class, log); InitializationUtils.setValueReaderIfNotSet(settings, PigValueReader.class, log); InitializationUtils.setBytesConverterIfNeeded(settings, PigBytesConverter.class, log); InitializationUtils.setFieldExtractorIfNotSet(settings, PigFieldExtractor.class, log); isJSON = settings.getOutputAsJson(); }
static Settings addDefaultsToSettings(Properties flowProperties, Properties tapProperties, Log log) { Settings settings = HadoopSettingsManager.loadFrom(CascadingUtils.extractOriginalProperties(flowProperties)).merge(tapProperties); InitializationUtils.validateSettings(settings); InitializationUtils.setValueWriterIfNotSet(settings, CascadingValueWriter.class, log); InitializationUtils.setValueReaderIfNotSet(settings, JdkValueReader.class, log); InitializationUtils.setBytesConverterIfNeeded(settings, CascadingLocalBytesConverter.class, log); InitializationUtils.setFieldExtractorIfNotSet(settings, CascadingFieldExtractor.class, log); return settings; }