static Settings addDefaultsToSettings(Properties flowProperties, Properties tapProperties, Log log) { Settings settings = HadoopSettingsManager.loadFrom(CascadingUtils.extractOriginalProperties(flowProperties)).merge(tapProperties); InitializationUtils.validateSettings(settings); InitializationUtils.setValueWriterIfNotSet(settings, CascadingValueWriter.class, log); InitializationUtils.setValueReaderIfNotSet(settings, JdkValueReader.class, log); InitializationUtils.setBytesConverterIfNeeded(settings, CascadingLocalBytesConverter.class, log); InitializationUtils.setFieldExtractorIfNotSet(settings, CascadingFieldExtractor.class, log); return settings; }
private void init(Configuration cfg) throws IOException { Settings settings = HadoopSettingsManager.loadFrom(cfg); Assert.hasText(settings.getResourceWrite(), String.format("No resource ['%s'] (index/query/location) specified", ES_RESOURCE)); // Need to discover the ESVersion before checking if index exists. InitializationUtils.discoverEsVersion(settings, log); InitializationUtils.checkIdForOperation(settings); InitializationUtils.checkIndexExistence(settings); if (HadoopCfgUtils.getReduceTasks(cfg) != null) { if (HadoopCfgUtils.getSpeculativeReduce(cfg)) { log.warn("Speculative execution enabled for reducer - consider disabling it to prevent data corruption"); } } else { if (HadoopCfgUtils.getSpeculativeMap(cfg)) { log.warn("Speculative execution enabled for mapper - consider disabling it to prevent data corruption"); } } //log.info(String.format("Starting to write/index to [%s][%s]", settings.getTargetUri(), settings.getTargetResource())); } }
public static PartitionWriter createWriter(Settings settings, long currentSplit, int totalSplits, Log log) { Version.logVersion(); InitializationUtils.validateSettings(settings); InitializationUtils.discoverEsVersion(settings, log); InitializationUtils.validateSettingsForWriting(settings); InitializationUtils.discoverNodesIfNeeded(settings, log); InitializationUtils.filterNonClientNodesIfNeeded(settings, log); InitializationUtils.filterNonDataNodesIfNeeded(settings, log); InitializationUtils.filterNonIngestNodesIfNeeded(settings, log);
static void initialDiscovery(Settings settings, Log log) { InitializationUtils.discoverEsVersion(settings, log); InitializationUtils.discoverNodesIfNeeded(settings, log); InitializationUtils.filterNonClientNodesIfNeeded(settings, log); InitializationUtils.filterNonDataNodesIfNeeded(settings, log); InitializationUtils.filterNonIngestNodesIfNeeded(settings, log); }
static void init(Settings settings, Log log) { InitializationUtils.checkIdForOperation(settings); InitializationUtils.setFieldExtractorIfNotSet(settings, HiveFieldExtractor.class, log); InitializationUtils.discoverEsVersion(settings, log); }
private void lazyInitializeWrite() { if (writeInitialized) { return; } writeInitialized = true; // We want to use just the table properties here, but we need to add the internal version to the settings. // We don't want to mutate the underlying table properties (the settings implementations differ greatly on // their mutability) so we just use a composite settings object. Settings tableSettings = HadoopSettingsManager.loadFrom(tableProperties); Settings versionSetting = new PropertiesSettings(); versionSetting.setInternalVersion(version); Settings settings = new CompositeSettings(Arrays.asList(versionSetting, tableSettings)); InitializationUtils.setValueWriterIfNotSet(settings, HiveValueWriter.class, log); InitializationUtils.setFieldExtractorIfNotSet(settings, HiveFieldExtractor.class, log); InitializationUtils.setBytesConverterIfNeeded(settings, HiveBytesConverter.class, log); this.command = BulkCommands.create(settings, null, version); }
Version.logVersion(); InitializationUtils.validateSettings(settings); EsMajorVersion version = InitializationUtils.discoverEsVersion(settings, log); InitializationUtils.validateSettingsForReading(settings); List<NodeInfo> nodes = InitializationUtils.discoverNodesIfNeeded(settings, log); InitializationUtils.filterNonClientNodesIfNeeded(settings, log); InitializationUtils.filterNonDataNodesIfNeeded(settings, log); InitializationUtils.filterNonIngestNodesIfNeeded(settings, log);
public static PartitionWriter createWriter(Settings settings, int currentSplit, int totalSplits, Log log) { Version.logVersion(); InitializationUtils.validateSettings(settings); InitializationUtils.discoverEsVersion(settings, log); InitializationUtils.discoverNodesIfNeeded(settings, log); InitializationUtils.filterNonClientNodesIfNeeded(settings, log); InitializationUtils.filterNonDataNodesIfNeeded(settings, log); List<String> nodes = SettingsUtils.discoveredOrDeclaredNodes(settings); // check invalid splits (applicable when running in non-MR environments) - in this case fall back to Random.. int selectedNode = (currentSplit < 0) ? new Random().nextInt(nodes.size()) : currentSplit % nodes.size(); // select the appropriate nodes first, to spread the load before-hand SettingsUtils.pinNode(settings, nodes.get(selectedNode)); Resource resource = new Resource(settings, false); log.info(String.format("Writing to [%s]", resource)); // single index vs multi indices IndexExtractor iformat = ObjectUtils.instantiate(settings.getMappingIndexExtractorClassName(), settings); iformat.compile(resource.toString()); RestRepository repository = (iformat.hasPattern() ? initMultiIndices(settings, currentSplit, resource, log) : initSingleIndex(settings, currentSplit, resource, log)); return new PartitionWriter(settings, currentSplit, totalSplits, repository); }
public EsHiveRecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath, Class valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) { // force the table properties to be merged into the configuration // NB: the properties are also available in HiveConstants#OUTPUT_TBL_PROPERTIES Settings settings = HadoopSettingsManager.loadFrom(jc).merge(tableProperties); Log log = LogFactory.getLog(getClass()); // NB: ESSerDe is already initialized at this stage but should still have a reference to the same cfg object // NB: the value writer is not needed by Hive but it's set for consistency and debugging purposes InitializationUtils.setValueWriterIfNotSet(settings, HiveValueWriter.class, log); InitializationUtils.setBytesConverterIfNeeded(settings, HiveBytesConverter.class, log); // set write resource settings.setResourceWrite(settings.getResourceWrite()); HiveUtils.init(settings, log); return new EsHiveRecordWriter(jc, progress); } }
void init(EsInputSplit esSplit, Configuration cfg, Progressable progressable) { // get a copy to override the host/port Settings settings = HadoopSettingsManager.loadFrom(cfg).copy().load(esSplit.getPartition().getSerializedSettings()); if (log.isTraceEnabled()) { log.trace(String.format("Init shard reader from cfg %s", HadoopCfgUtils.asProperties(cfg))); log.trace(String.format("Init shard reader w/ settings %s", settings)); } this.esSplit = esSplit; // initialize mapping/ scroll reader InitializationUtils.setValueReaderIfNotSet(settings, WritableValueReader.class, log); PartitionDefinition part = esSplit.getPartition(); PartitionReader partitionReader = RestService.createReader(settings, part, log); this.scrollReader = partitionReader.scrollReader; this.client = partitionReader.client; this.queryBuilder = partitionReader.queryBuilder; this.progressable = progressable; // in Hadoop-like envs (Spark) the progressable might be null and thus the heart-beat is not needed if (progressable != null) { beat = new HeartBeat(progressable, cfg, settings.getHeartBeatLead(), log); } if (log.isDebugEnabled()) { log.debug(String.format("Initializing RecordReader for [%s]", esSplit)); } }
@Test public void testValidateClientOnlyNodesWithDefaultData() throws Exception { Settings set = new TestSettings(); set.setProperty(ES_NODES_CLIENT_ONLY, "true"); validateSettings(set); }
@Override public void initialize(Configuration conf, Properties tbl, Properties partitionProperties) throws SerDeException { inspector = HiveUtils.structObjectInspector(tbl); structTypeInfo = HiveUtils.typeInfo(inspector); cfg = conf; settings = (cfg != null ? HadoopSettingsManager.loadFrom(cfg).merge(tbl) : HadoopSettingsManager.loadFrom(tbl)); alias = HiveUtils.alias(settings); version = InitializationUtils.discoverEsVersion(settings, log); HiveUtils.fixHive13InvalidComments(settings, tbl); this.tableProperties = tbl; trace = log.isTraceEnabled(); outputJSON = settings.getOutputAsJson(); if (outputJSON) { jsonFieldName = new Text(HiveUtils.discoverJsonFieldName(settings, alias)); } }
private void init(Configuration cfg) throws IOException { Settings settings = HadoopSettingsManager.loadFrom(cfg); Assert.hasText(settings.getResourceWrite(), String.format("No resource ['%s'] (index/query/location) specified", ES_RESOURCE)); // lazy-init RestRepository client = null; InitializationUtils.checkIdForOperation(settings); InitializationUtils.checkIndexExistence(settings, client); if (HadoopCfgUtils.getReduceTasks(cfg) != null) { if (HadoopCfgUtils.getSpeculativeReduce(cfg)) { log.warn("Speculative execution enabled for reducer - consider disabling it to prevent data corruption"); } } else { if (HadoopCfgUtils.getSpeculativeMap(cfg)) { log.warn("Speculative execution enabled for mapper - consider disabling it to prevent data corruption"); } } //log.info(String.format("Starting to write/index to [%s][%s]", settings.getTargetUri(), settings.getTargetResource())); } }
@Test public void createFieldExtractor() { Settings settings = new TestSettings(); settings.setResourceWrite("test/{field}"); settings.setInternalVersion(EsMajorVersion.V_7_X); InitializationUtils.setFieldExtractorIfNotSet(settings, MapFieldExtractor.class, LOG); IndexExtractor iformat = ObjectUtils.instantiate(settings.getMappingIndexExtractorClassName(), settings); iformat.compile(new Resource(settings, false).toString()); assertThat(iformat.hasPattern(), is(true)); Map<String, String> data = new HashMap<String, String>(); data.put("field", "data"); Object field = iformat.field(data); assertThat(field.toString(), equalTo("\"_index\":\"test\",\"_type\":\"data\"")); }
@Override public void sinkConfInit(FlowProcess<Properties> flowProcess, Tap<Properties, ScrollQuery, Object> tap, Properties conf) { initClient(conf, false); InitializationUtils.checkIndexExistence(client); }
protected void init() throws IOException { //int instances = detectNumberOfInstances(cfg); int currentInstance = detectCurrentInstance(cfg); if (log.isTraceEnabled()) { log.trace(String.format("EsRecordWriter instance [%s] initiating discovery of target shard...", currentInstance)); } Settings settings = HadoopSettingsManager.loadFrom(cfg).copy(); if (log.isTraceEnabled()) { log.trace(String.format("Init shard writer from cfg %s", HadoopCfgUtils.asProperties(cfg))); } InitializationUtils.setValueWriterIfNotSet(settings, WritableValueWriter.class, log); InitializationUtils.setBytesConverterIfNeeded(settings, WritableBytesConverter.class, log); InitializationUtils.setFieldExtractorIfNotSet(settings, MapWritableFieldExtractor.class, log); PartitionWriter pw = RestService.createWriter(settings, currentInstance, -1, log); this.repository = pw.repository; if (progressable != null) { this.beat = new HeartBeat(progressable, cfg, settings.getHeartBeatLead(), log); this.beat.start(); } }
Version.logVersion(); InitializationUtils.validateSettings(settings); InitializationUtils.validateSettingsForReading(settings); EsMajorVersion version = InitializationUtils.discoverEsVersion(settings, log); List<NodeInfo> nodes = InitializationUtils.discoverNodesIfNeeded(settings, log); InitializationUtils.filterNonClientNodesIfNeeded(settings, log); InitializationUtils.filterNonDataNodesIfNeeded(settings, log); InitializationUtils.filterNonIngestNodesIfNeeded(settings, log);
Map<Shard, Node> targetShards = null; InitializationUtils.validateSettings(settings); InitializationUtils.discoverEsVersion(settings, log); InitializationUtils.discoverNodesIfNeeded(settings, log); InitializationUtils.filterNonClientNodesIfNeeded(settings, log); InitializationUtils.filterNonDataNodesIfNeeded(settings, log);
static void initialDiscovery(Settings settings, Log log) { InitializationUtils.discoverEsVersion(settings, log); InitializationUtils.discoverNodesIfNeeded(settings, log); InitializationUtils.filterNonClientNodesIfNeeded(settings, log); InitializationUtils.filterNonDataNodesIfNeeded(settings, log); InitializationUtils.filterNonIngestNodesIfNeeded(settings, log); }
static void init(Settings settings, Log log) { InitializationUtils.checkIdForOperation(settings); InitializationUtils.setFieldExtractorIfNotSet(settings, HiveFieldExtractor.class, log); InitializationUtils.discoverEsVersion(settings, log); }