org.elasticsearch.hadoop.rest.InitializationUtils.setValueReaderIfNotSet java code examples

void init(EsInputSplit esSplit, Configuration cfg, Progressable progressable) {
  // get a copy to override the host/port
  Settings settings = HadoopSettingsManager.loadFrom(cfg).copy().load(esSplit.getPartition().getSerializedSettings());
  if (log.isTraceEnabled()) {
    log.trace(String.format("Init shard reader from cfg %s", HadoopCfgUtils.asProperties(cfg)));
    log.trace(String.format("Init shard reader w/ settings %s", settings));
  }
  this.esSplit = esSplit;
  // initialize mapping/ scroll reader
  InitializationUtils.setValueReaderIfNotSet(settings, WritableValueReader.class, log);
  PartitionDefinition part = esSplit.getPartition();
  PartitionReader partitionReader = RestService.createReader(settings, part, log);
  this.scrollReader = partitionReader.scrollReader;
  this.client = partitionReader.client;
  this.queryBuilder = partitionReader.queryBuilder;
  this.progressable = progressable;
  // in Hadoop-like envs (Spark) the progressable might be null and thus the heart-beat is not needed
  if (progressable != null) {
    beat = new HeartBeat(progressable, cfg, settings.getHeartBeatLead(), log);
  }
  if (log.isDebugEnabled()) {
    log.debug(String.format("Initializing RecordReader for [%s]", esSplit));
  }
}

@Override
public void sinkConfInit(FlowProcess<JobConf> flowProcess, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) {
  conf.setOutputFormat(EsOutputFormat.class);
  // define an output dir to prevent Cascading from setting up a TempHfs and overriding the OutputFormat
  Settings set = loadSettings(conf, false);
  Log log = LogFactory.getLog(EsTap.class);
  InitializationUtils.setValueWriterIfNotSet(set, CascadingValueWriter.class, log);
  InitializationUtils.setValueReaderIfNotSet(set, JdkValueReader.class, log);
  InitializationUtils.setBytesConverterIfNeeded(set, CascadingLocalBytesConverter.class, log);
  InitializationUtils.setFieldExtractorIfNotSet(set, CascadingFieldExtractor.class, log);
  // NB: we need to set this property even though it is not being used - and since and URI causes problem, use only the resource/file
  //conf.set("mapred.output.dir", set.getTargetUri() + "/" + set.getTargetResource());
  HadoopCfgUtils.setFileOutputFormatDir(conf, set.getResourceWrite());
  HadoopCfgUtils.setOutputCommitterClass(conf, EsOutputFormat.EsOldAPIOutputCommitter.class.getName());
  if (log.isTraceEnabled()) {
    log.trace("Initialized (sink) configuration " + HadoopCfgUtils.asProperties(conf));
  }
}

@Override
public FileSplit[] getSplits(JobConf job, int numSplits) throws IOException {
  // first, merge input table properties (since there's no access to them ...)
  Settings settings = HadoopSettingsManager.loadFrom(job);
  //settings.merge(IOUtils.propsFromString(settings.getProperty(HiveConstants.INPUT_TBL_PROPERTIES)));
  Log log = LogFactory.getLog(getClass());
  // move on to initialization
  InitializationUtils.setValueReaderIfNotSet(settings, HiveValueReader.class, log);
  if (settings.getOutputAsJson() == false) {
    // Only set the fields if we aren't asking for raw JSON
    settings.setProperty(InternalConfigurationOptions.INTERNAL_ES_TARGET_FIELDS, StringUtils.concatenate(HiveUtils.columnToAlias(settings), ","));
  }
  HiveUtils.init(settings, log);
  // decorate original splits as FileSplit
  InputSplit[] shardSplits = super.getSplits(job, numSplits);
  FileSplit[] wrappers = new FileSplit[shardSplits.length];
  Path path = new Path(job.get(HiveConstants.TABLE_LOCATION));
  for (int i = 0; i < wrappers.length; i++) {
    wrappers[i] = new EsHiveSplit(shardSplits[i], path);
  }
  return wrappers;
}

@Override
public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
  this.collector = collector;
  LinkedHashMap copy = new LinkedHashMap(conf);
  copy.putAll(spoutConfig);
  StormSettings settings = new StormSettings(copy);
  InitializationUtils.setValueReaderIfNotSet(settings, JdkValueReader.class, log);
  ackReads = settings.getStormSpoutReliable();
  if (ackReads) {
    inTransitQueue = new LinkedHashMap<Object, Object>();
    replayQueue = new LinkedList<Object[]>();
    retries = new HashMap<Object, Integer>();
    queueSize = settings.getStormSpoutReliableQueueSize();
    tupleRetries = settings.getStormSpoutReliableRetriesPerTuple();
    tupleFailure = settings.getStormSpoutReliableTupleFailureHandling();
  }
  int totalTasks = context.getComponentTasks(context.getThisComponentId()).size();
  int currentTask = context.getThisTaskIndex();
  // match the partitions based on the current topology
  List<PartitionDefinition> partitions = RestService.findPartitions(settings, log);
  List<PartitionDefinition> assigned = RestService.assignPartitions(partitions, currentTask, totalTasks);
  iterator = RestService.multiReader(settings, assigned, log);
}

private void init(String location, Job job, boolean read) {
  Settings settings = HadoopSettingsManager.loadFrom(job.getConfiguration()).merge(properties);
  settings = (read ? settings.setResourceRead(location) : settings.setResourceWrite(location));
  InitializationUtils.checkIdForOperation(settings);
  InitializationUtils.setValueWriterIfNotSet(settings, PigValueWriter.class, log);
  InitializationUtils.setValueReaderIfNotSet(settings, PigValueReader.class, log);
  InitializationUtils.setBytesConverterIfNeeded(settings, PigBytesConverter.class, log);
  InitializationUtils.setFieldExtractorIfNotSet(settings, PigFieldExtractor.class, log);
  isJSON = settings.getOutputAsJson();
}

static Settings addDefaultsToSettings(Properties flowProperties, Properties tapProperties, Log log) {
  Settings settings = HadoopSettingsManager.loadFrom(CascadingUtils.extractOriginalProperties(flowProperties)).merge(tapProperties);
  InitializationUtils.validateSettings(settings);
  InitializationUtils.setValueWriterIfNotSet(settings, CascadingValueWriter.class, log);
  InitializationUtils.setValueReaderIfNotSet(settings, JdkValueReader.class, log);
  InitializationUtils.setBytesConverterIfNeeded(settings, CascadingLocalBytesConverter.class, log);
  InitializationUtils.setFieldExtractorIfNotSet(settings, CascadingFieldExtractor.class, log);
  return settings;
}

void init(ShardInputSplit esSplit, Configuration cfg, Progressable progressable) {
  // get a copy to override the host/port
  Settings settings = HadoopSettingsManager.loadFrom(cfg).copy().load(esSplit.settings);
  if (log.isTraceEnabled()) {
    log.trace(String.format("Init shard reader from cfg %s", HadoopCfgUtils.asProperties(cfg)));
    log.trace(String.format("Init shard reader w/ settings %s", esSplit.settings));
  }
  this.esSplit = esSplit;
  // initialize mapping/ scroll reader
  InitializationUtils.setValueReaderIfNotSet(settings, WritableValueReader.class, log);
  PartitionDefinition part = new PartitionDefinition(esSplit.nodeIp, esSplit.httpPort, esSplit.nodeName, esSplit.nodeId, esSplit.shardId, esSplit.onlyNode, settings.save(), esSplit.mapping);
  PartitionReader partitionReader = RestService.createReader(settings, part, log);
  this.scrollReader = partitionReader.scrollReader;
  this.client = partitionReader.client;
  this.queryBuilder = partitionReader.queryBuilder;
  this.progressable = progressable;
  // in Hadoop-like envs (Spark) the progressable might be null and thus the heart-beat is not needed
  if (progressable != null) {
    beat = new HeartBeat(progressable, cfg, settings.getHeartBeatLead(), log);
  }
  if (log.isDebugEnabled()) {
    log.debug(String.format("Initializing RecordReader for [%s]", esSplit));
  }
}

void init(EsInputSplit esSplit, Configuration cfg, Progressable progressable) {
  // get a copy to override the host/port
  Settings settings = HadoopSettingsManager.loadFrom(cfg).copy().load(esSplit.getPartition().getSerializedSettings());
  if (log.isTraceEnabled()) {
    log.trace(String.format("Init shard reader from cfg %s", HadoopCfgUtils.asProperties(cfg)));
    log.trace(String.format("Init shard reader w/ settings %s", settings));
  }
  this.esSplit = esSplit;
  // initialize mapping/ scroll reader
  InitializationUtils.setValueReaderIfNotSet(settings, WritableValueReader.class, log);
  PartitionDefinition part = esSplit.getPartition();
  PartitionReader partitionReader = RestService.createReader(settings, part, log);
  this.scrollReader = partitionReader.scrollReader;
  this.client = partitionReader.client;
  this.queryBuilder = partitionReader.queryBuilder;
  this.progressable = progressable;
  // in Hadoop-like envs (Spark) the progressable might be null and thus the heart-beat is not needed
  if (progressable != null) {
    beat = new HeartBeat(progressable, cfg, settings.getHeartBeatLead(), log);
  }
  if (log.isDebugEnabled()) {
    log.debug(String.format("Initializing RecordReader for [%s]", esSplit));
  }
}

void init(EsInputSplit esSplit, Configuration cfg, Progressable progressable) {
  // get a copy to override the host/port
  Settings settings = HadoopSettingsManager.loadFrom(cfg).copy().load(esSplit.getPartition().getSerializedSettings());
  if (log.isTraceEnabled()) {
    log.trace(String.format("Init shard reader from cfg %s", HadoopCfgUtils.asProperties(cfg)));
    log.trace(String.format("Init shard reader w/ settings %s", settings));
  }
  this.esSplit = esSplit;
  // initialize mapping/ scroll reader
  InitializationUtils.setValueReaderIfNotSet(settings, WritableValueReader.class, log);
  PartitionDefinition part = esSplit.getPartition();
  PartitionReader partitionReader = RestService.createReader(settings, part, log);
  this.scrollReader = partitionReader.scrollReader;
  this.client = partitionReader.client;
  this.queryBuilder = partitionReader.queryBuilder;
  this.progressable = progressable;
  // in Hadoop-like envs (Spark) the progressable might be null and thus the heart-beat is not needed
  if (progressable != null) {
    beat = new HeartBeat(progressable, cfg, settings.getHeartBeatLead(), log);
  }
  if (log.isDebugEnabled()) {
    log.debug(String.format("Initializing RecordReader for [%s]", esSplit));
  }
}

void init(EsInputSplit esSplit, Configuration cfg, Progressable progressable) {
  // get a copy to override the host/port
  Settings settings = HadoopSettingsManager.loadFrom(cfg).copy().load(esSplit.getPartition().getSerializedSettings());
  if (log.isTraceEnabled()) {
    log.trace(String.format("Init shard reader from cfg %s", HadoopCfgUtils.asProperties(cfg)));
    log.trace(String.format("Init shard reader w/ settings %s", settings));
  }
  this.esSplit = esSplit;
  // initialize mapping/ scroll reader
  InitializationUtils.setValueReaderIfNotSet(settings, WritableValueReader.class, log);
  PartitionDefinition part = esSplit.getPartition();
  PartitionReader partitionReader = RestService.createReader(settings, part, log);
  this.scrollReader = partitionReader.scrollReader;
  this.client = partitionReader.client;
  this.queryBuilder = partitionReader.queryBuilder;
  this.progressable = progressable;
  // in Hadoop-like envs (Spark) the progressable might be null and thus the heart-beat is not needed
  if (progressable != null) {
    beat = new HeartBeat(progressable, cfg, settings.getHeartBeatLead(), log);
  }
  if (log.isDebugEnabled()) {
    log.debug(String.format("Initializing RecordReader for [%s]", esSplit));
  }
}

@Override
public FileSplit[] getSplits(JobConf job, int numSplits) throws IOException {
  // first, merge input table properties (since there's no access to them ...)
  Settings settings = HadoopSettingsManager.loadFrom(job);
  //settings.merge(IOUtils.propsFromString(settings.getProperty(HiveConstants.INPUT_TBL_PROPERTIES)));
  Log log = LogFactory.getLog(getClass());
  // move on to initialization
  InitializationUtils.setValueReaderIfNotSet(settings, HiveValueReader.class, log);
  if (settings.getOutputAsJson() == false) {
    // Only set the fields if we aren't asking for raw JSON
    settings.setProperty(InternalConfigurationOptions.INTERNAL_ES_TARGET_FIELDS, StringUtils.concatenate(HiveUtils.columnToAlias(settings), ","));
  }
  HiveUtils.init(settings, log);
  // decorate original splits as FileSplit
  InputSplit[] shardSplits = super.getSplits(job, numSplits);
  FileSplit[] wrappers = new FileSplit[shardSplits.length];
  Path path = new Path(job.get(HiveConstants.TABLE_LOCATION));
  for (int i = 0; i < wrappers.length; i++) {
    wrappers[i] = new EsHiveSplit(shardSplits[i], path);
  }
  return wrappers;
}

@Override
public void sinkConfInit(FlowProcess<JobConf> flowProcess, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) {
  conf.setOutputFormat(EsOutputFormat.class);
  // define an output dir to prevent Cascading from setting up a TempHfs and overriding the OutputFormat
  Settings set = loadSettings(conf, false);
  Log log = LogFactory.getLog(EsTap.class);
  InitializationUtils.setValueWriterIfNotSet(set, CascadingValueWriter.class, log);
  InitializationUtils.setValueReaderIfNotSet(set, JdkValueReader.class, log);
  InitializationUtils.setBytesConverterIfNeeded(set, CascadingLocalBytesConverter.class, log);
  InitializationUtils.setFieldExtractorIfNotSet(set, CascadingFieldExtractor.class, log);
  // NB: we need to set this property even though it is not being used - and since and URI causes problem, use only the resource/file
  //conf.set("mapred.output.dir", set.getTargetUri() + "/" + set.getTargetResource());
  HadoopCfgUtils.setFileOutputFormatDir(conf, set.getResourceWrite());
  HadoopCfgUtils.setOutputCommitterClass(conf, EsOutputFormat.EsOldAPIOutputCommitter.class.getName());
  if (log.isTraceEnabled()) {
    log.trace("Initialized (sink) configuration " + HadoopCfgUtils.asProperties(conf));
  }
}

@Override
public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
  this.collector = collector;
  LinkedHashMap copy = new LinkedHashMap(conf);
  copy.putAll(spoutConfig);
  StormSettings settings = new StormSettings(copy);
  InitializationUtils.setValueReaderIfNotSet(settings, JdkValueReader.class, log);
  ackReads = settings.getStormSpoutReliable();
  if (ackReads) {
    inTransitQueue = new LinkedHashMap<Object, Object>();
    replayQueue = new LinkedList<Object[]>();
    retries = new HashMap<Object, Integer>();
    queueSize = settings.getStormSpoutReliableQueueSize();
    tupleRetries = settings.getStormSpoutReliableRetriesPerTuple();
    tupleFailure = settings.getStormSpoutReliableTupleFailureHandling();
  }
  int totalTasks = context.getComponentTasks(context.getThisComponentId()).size();
  int currentTask = context.getThisTaskIndex();
  // match the partitions based on the current topology
  List<PartitionDefinition> partitions = RestService.findPartitions(settings, log);
  List<PartitionDefinition> assigned = RestService.assignPartitions(partitions, currentTask, totalTasks);
  iterator = RestService.multiReader(settings, assigned, log);
}

private void init(String location, Job job, boolean read) {
  Settings settings = HadoopSettingsManager.loadFrom(job.getConfiguration()).merge(properties);
  settings = (read ? settings.setResourceRead(location) : settings.setResourceWrite(location));
  InitializationUtils.checkIdForOperation(settings);
  InitializationUtils.setValueWriterIfNotSet(settings, PigValueWriter.class, log);
  InitializationUtils.setValueReaderIfNotSet(settings, PigValueReader.class, log);
  InitializationUtils.setBytesConverterIfNeeded(settings, PigBytesConverter.class, log);
  InitializationUtils.setFieldExtractorIfNotSet(settings, PigFieldExtractor.class, log);
  isJSON = settings.getOutputAsJson();
}

static Settings addDefaultsToSettings(Properties flowProperties, Properties tapProperties, Log log) {
  Settings settings = HadoopSettingsManager.loadFrom(CascadingUtils.extractOriginalProperties(flowProperties)).merge(tapProperties);
  InitializationUtils.validateSettings(settings);
  InitializationUtils.setValueWriterIfNotSet(settings, CascadingValueWriter.class, log);
  InitializationUtils.setValueReaderIfNotSet(settings, JdkValueReader.class, log);
  InitializationUtils.setBytesConverterIfNeeded(settings, CascadingLocalBytesConverter.class, log);
  InitializationUtils.setFieldExtractorIfNotSet(settings, CascadingFieldExtractor.class, log);
  return settings;
}

How to use setValueReaderIfNotSetmethodin org.elasticsearch.hadoop.rest.InitializationUtils

Best Java code snippets using org.elasticsearch.hadoop.rest.InitializationUtils.setValueReaderIfNotSet (Showing top 15 results out of 315)

How to use
setValueReaderIfNotSet
method
in
org.elasticsearch.hadoop.rest.InitializationUtils