private void init(TableDesc tableDesc, boolean read) { Configuration cfg = getConf(); // NB: we can't just merge the table properties in, we need to save them per input/output otherwise clashes occur which confuse Hive Settings settings = HadoopSettingsManager.loadFrom(cfg); //settings.setProperty((read ? HiveConstants.INPUT_TBL_PROPERTIES : HiveConstants.OUTPUT_TBL_PROPERTIES), IOUtils.propsToString(tableDesc.getProperties())); if (read) { // no generic setting } else { // replace the default committer when using the old API HadoopCfgUtils.setOutputCommitterClass(cfg, EsOutputFormat.EsOutputCommitter.class.getName()); } Assert.hasText(tableDesc.getProperties().getProperty(TABLE_LOCATION), String.format( "no table location [%s] declared by Hive resulting in abnormal execution;", TABLE_LOCATION)); }
@Override public void sinkConfInit(FlowProcess<JobConf> flowProcess, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) { conf.setOutputFormat(EsOutputFormat.class); // define an output dir to prevent Cascading from setting up a TempHfs and overriding the OutputFormat Settings set = loadSettings(conf, false); Log log = LogFactory.getLog(EsTap.class); InitializationUtils.setValueWriterIfNotSet(set, CascadingValueWriter.class, log); InitializationUtils.setValueReaderIfNotSet(set, JdkValueReader.class, log); InitializationUtils.setBytesConverterIfNeeded(set, CascadingLocalBytesConverter.class, log); InitializationUtils.setFieldExtractorIfNotSet(set, CascadingFieldExtractor.class, log); // NB: we need to set this property even though it is not being used - and since and URI causes problem, use only the resource/file //conf.set("mapred.output.dir", set.getTargetUri() + "/" + set.getTargetResource()); HadoopCfgUtils.setFileOutputFormatDir(conf, set.getResourceWrite()); HadoopCfgUtils.setOutputCommitterClass(conf, EsOutputFormat.EsOldAPIOutputCommitter.class.getName()); if (log.isTraceEnabled()) { log.trace("Initialized (sink) configuration " + HadoopCfgUtils.asProperties(conf)); } }
private void init(TableDesc tableDesc, boolean read) { Configuration cfg = getConf(); // NB: we can't just merge the table properties in, we need to save them per input/output otherwise clashes occur which confuse Hive Settings settings = HadoopSettingsManager.loadFrom(cfg); //settings.setProperty((read ? HiveConstants.INPUT_TBL_PROPERTIES : HiveConstants.OUTPUT_TBL_PROPERTIES), IOUtils.propsToString(tableDesc.getProperties())); if (read) { // no generic setting } else { // replace the default committer when using the old API HadoopCfgUtils.setOutputCommitterClass(cfg, EsOutputFormat.EsOutputCommitter.class.getName()); } Assert.hasText(tableDesc.getProperties().getProperty(TABLE_LOCATION), String.format( "no table location [%s] declared by Hive resulting in abnormal execution;", TABLE_LOCATION)); }
@Override public void sinkConfInit(FlowProcess<JobConf> flowProcess, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) { conf.setOutputFormat(EsOutputFormat.class); // define an output dir to prevent Cascading from setting up a TempHfs and overriding the OutputFormat Settings set = loadSettings(conf, false); Log log = LogFactory.getLog(EsTap.class); InitializationUtils.setValueWriterIfNotSet(set, CascadingValueWriter.class, log); InitializationUtils.setValueReaderIfNotSet(set, JdkValueReader.class, log); InitializationUtils.setBytesConverterIfNeeded(set, CascadingLocalBytesConverter.class, log); InitializationUtils.setFieldExtractorIfNotSet(set, CascadingFieldExtractor.class, log); // NB: we need to set this property even though it is not being used - and since and URI causes problem, use only the resource/file //conf.set("mapred.output.dir", set.getTargetUri() + "/" + set.getTargetResource()); HadoopCfgUtils.setFileOutputFormatDir(conf, set.getResourceWrite()); HadoopCfgUtils.setOutputCommitterClass(conf, EsOutputFormat.EsOldAPIOutputCommitter.class.getName()); if (log.isTraceEnabled()) { log.trace("Initialized (sink) configuration " + HadoopCfgUtils.asProperties(conf)); } }