org.apache.hadoop.mapred.JobConf.setBoolean java code examples

Refine search

JobConf.set

private void pushProjection(final JobConf newjob, final StringBuilder readColumnsBuffer,
  final StringBuilder readColumnNamesBuffer) {
 String readColIds = readColumnsBuffer.toString();
 String readColNames = readColumnNamesBuffer.toString();
 newjob.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
 newjob.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, readColIds);
 newjob.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, readColNames);
 if (LOG.isInfoEnabled()) {
  LOG.info("{} = {}", ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, readColIds);
  LOG.info("{} = {}", ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, readColNames);
 }
}

/**
 * Creates and initializes a JobConf object that can be used to execute
 * the DAG. The configuration object will contain configurations from mapred-site
 * overlaid with key/value pairs from the conf object. Finally it will also
 * contain some hive specific configurations that do not change from DAG to DAG.
 *
 * @param hiveConf Current conf for the execution
 * @return JobConf base configuration for job execution
 * @throws IOException
 */
public JobConf createConfiguration(HiveConf hiveConf) throws IOException {
 hiveConf.setBoolean("mapred.mapper.new-api", false);
 JobConf conf = new JobConf(new TezConfiguration(hiveConf));
 conf.set("mapred.output.committer.class", NullOutputCommitter.class.getName());
 conf.setBoolean("mapred.committer.job.setup.cleanup.needed", false);
 conf.setBoolean("mapred.committer.job.task.cleanup.needed", false);
 conf.setClass("mapred.output.format.class", HiveOutputFormatImpl.class, OutputFormat.class);
 conf.set(MRJobConfig.OUTPUT_KEY_CLASS, HiveKey.class.getName());
 conf.set(MRJobConfig.OUTPUT_VALUE_CLASS, BytesWritable.class.getName());
 conf.set("mapred.partitioner.class", HiveConf.getVar(conf, HiveConf.ConfVars.HIVEPARTITIONER));
 conf.set("tez.runtime.partitioner.class", MRPartitioner.class.getName());
 // Removing job credential entry/ cannot be set on the tasks
 conf.unset("mapreduce.job.credentials.binary");
 hiveConf.stripHiddenConfigurations(conf);
 return conf;
}

private void setupExternalCacheConfig(boolean isPpd, String paths) {
 FileInputFormat.setInputPaths(conf, paths);
 conf.set(ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname, "ETL");
 conf.setLong(HiveConf.ConfVars.MAPREDMINSPLITSIZE.varname, 1000);
 conf.setLong(HiveConf.ConfVars.MAPREDMAXSPLITSIZE.varname, 5000);
 conf.setBoolean(ConfVars.HIVE_ORC_MS_FOOTER_CACHE_PPD.varname, isPpd);
 conf.setBoolean(ConfVars.HIVEOPTINDEXFILTER.varname, isPpd);
}

job.setOutputCommitter(CompactorOutputCommitter.class);
job.set(FINAL_LOCATION, sd.getLocation());
job.set(TMP_LOCATION, generateTmpPath(sd));
job.set(INPUT_FORMAT_CLASS_NAME, sd.getInputFormat());
job.set(OUTPUT_FORMAT_CLASS_NAME, sd.getOutputFormat());
job.setBoolean(IS_COMPRESSED, sd.isCompressed());
job.set(TABLE_PROPS, new StringableMap(t.getParameters()).toString());
job.setInt(NUM_BUCKETS, sd.getNumBuckets());
job.setBoolean("mapreduce.map.speculative", false);

jobConf.setOutputKeyClass(Text.class);
jobConf.setOutputValueClass(Text.class);
jobConf.setBoolean("mapred.output.compress", true);
jobConf.set("mapred.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");
jobConf.setOutputFormat(TextMultiOutputFormat.class);
jobConf.set(XLearningConstants.STREAM_OUTPUT_DIR, remotePath.makeQualified(dfs).toString());
OutputFormat outputFormat = ReflectionUtils.newInstance(conf.getClass(XLearningConfiguration.XLEARNING_OUTPUTFORMAT_CLASS, XLearningConfiguration.DEFAULT_XLEARNING_OUTPUTF0RMAT_CLASS, OutputFormat.class),
  jobConf);
JobID jobID = new JobID(new SimpleDateFormat("yyyyMMddHHmm").format(new Date()), 0);
TaskAttemptID taId = new TaskAttemptID(new TaskID(jobID, true, 0), 0);
jobConf.set("mapred.tip.id", taId.getTaskID().toString());
jobConf.set("mapred.task.id", taId.toString());
jobConf.set("mapred.job.id", jobID.toString());

cloned.setBoolean("mapred.task.is.map", true);
List<Path> inputPaths = Utilities.getInputPaths(cloned, (MapWork) work,
  scratchDir, context, false);
if (work instanceof MergeFileWork) {
 MergeFileWork mergeFileWork = (MergeFileWork) work;
 cloned.set(Utilities.MAPRED_MAPPER_CLASS, MergeFileMapper.class.getName());
 cloned.set("mapred.input.format.class", mergeFileWork.getInputformat());
 cloned.setClass("mapred.output.format.class", MergeFileOutputFormat.class,
   FileOutputFormat.class);
} else {
 cloned.set(Utilities.MAPRED_MAPPER_CLASS, ExecMapper.class.getName());
cloned.setBoolean("mapred.task.is.map", false);
Utilities.setReduceWork(cloned, (ReduceWork) work, scratchDir, false);
Utilities.createTmpDirs(cloned, (ReduceWork) work);

@Before
public void setup() throws Exception {
 conf = new JobConf();
 conf.set(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, "true");
 conf.setBoolean(HiveConf.ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN.varname, true);
 conf.set(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES, "default");
 conf.setInt(HiveConf.ConfVars.HIVE_TXN_OPERATIONAL_PROPERTIES.varname,
   AcidUtils.AcidOperationalProperties.getDefault().toInt());
 conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, DummyRow.getColumnNamesProperty());
 conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, DummyRow.getColumnTypesProperty());
 conf.setBoolean(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.varname, true);
 conf.set(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname, "BI");
 OrcConf.ROWS_BETWEEN_CHECKS.setLong(conf, 1);

/**
 * Does actual test TeraSort job Through Ignite API
 *
 * @param gzip Whether to use GZIP.
 */
protected final void teraSort(boolean gzip) throws Exception {
  System.out.println("TeraSort ===============================================================");
  getFileSystem().delete(new Path(sortOutDir), true);
  final JobConf jobConf = new JobConf();
  jobConf.setUser(getUser());
  jobConf.set("fs.defaultFS", getFsBase());
  log().info("Desired number of reduces: " + numReduces());
  jobConf.set("mapreduce.job.reduces", String.valueOf(numReduces()));
  log().info("Desired number of maps: " + numMaps());
  final long splitSize = dataSizeBytes() / numMaps();
  log().info("Desired split size: " + splitSize);
  // Force the split to be of the desired size:
  jobConf.set("mapred.min.split.size", String.valueOf(splitSize));
  jobConf.set("mapred.max.split.size", String.valueOf(splitSize));
  jobConf.setBoolean(HadoopJobProperty.SHUFFLE_MAPPER_STRIPED_OUTPUT.propertyName(), true);
  jobConf.setInt(HadoopJobProperty.SHUFFLE_MSG_SIZE.propertyName(), 4096);
  if (gzip)
    jobConf.setBoolean(HadoopJobProperty.SHUFFLE_MSG_GZIP.propertyName(), true);
  jobConf.set(HadoopJobProperty.JOB_PARTIALLY_RAW_COMPARATOR.propertyName(),
    TextPartiallyRawComparator.class.getName());
  Job job = setupConfig(jobConf);
  HadoopJobId jobId = new HadoopJobId(UUID.randomUUID(), 1);
  IgniteInternalFuture<?> fut = grid(0).hadoop().submit(jobId, createJobInfo(job.getConfiguration(), null));
  fut.get();
}

@Test
public void testDoAs() throws Exception {
 conf.setInt(ConfVars.HIVE_ORC_COMPUTE_SPLITS_NUM_THREADS.varname, 1);
 conf.set(ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname, "ETL");
 conf.setBoolean(ConfVars.HIVE_IN_TEST.varname, true);
 conf.setClass("fs.mock.impl", MockFileSystem.class, FileSystem.class);
 String badUser = UserGroupInformation.getCurrentUser().getShortUserName() + "-foo";

job.set(FINAL_LOCATION, sd.getLocation());
job.set(TMP_LOCATION, sd.getLocation() + "/" + TMPDIR + "_" + UUID.randomUUID().toString());
job.set(INPUT_FORMAT_CLASS_NAME, sd.getInputFormat());
job.set(OUTPUT_FORMAT_CLASS_NAME, sd.getOutputFormat());
job.setBoolean(IS_COMPRESSED, sd.isCompressed());
job.set(TABLE_PROPS, new StringableMap(t.getParameters()).toString());
job.setInt(NUM_BUCKETS, sd.getNumBuckets());
job.setBoolean("mapreduce.map.speculative", false);

private void pushProjection(final JobConf newjob, final StringBuilder readColumnsBuffer,
  final StringBuilder readColumnNamesBuffer) {
 String readColIds = readColumnsBuffer.toString();
 String readColNames = readColumnNamesBuffer.toString();
 newjob.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
 newjob.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, readColIds);
 newjob.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, readColNames);
 if (LOG.isInfoEnabled()) {
  LOG.info("{} = {}", ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, readColIds);
  LOG.info("{} = {}", ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, readColNames);
 }
}

/**
 * Creates and initializes a JobConf object that can be used to execute
 * the DAG. The configuration object will contain configurations from mapred-site
 * overlaid with key/value pairs from the hiveConf object. Finally it will also
 * contain some hive specific configurations that do not change from DAG to DAG.
 *
 * @param hiveConf Current hiveConf for the execution
 * @return JobConf base configuration for job execution
 * @throws IOException
 */
public JobConf createConfiguration(HiveConf hiveConf) throws IOException {
 hiveConf.setBoolean("mapred.mapper.new-api", false);
 JobConf conf = new JobConf(new TezConfiguration(hiveConf));
 conf.set("mapred.output.committer.class", NullOutputCommitter.class.getName());
 conf.setBoolean("mapred.committer.job.setup.cleanup.needed", false);
 conf.setBoolean("mapred.committer.job.task.cleanup.needed", false);
 conf.setClass("mapred.output.format.class", HiveOutputFormatImpl.class, OutputFormat.class);
 conf.set(MRJobConfig.OUTPUT_KEY_CLASS, HiveKey.class.getName());
 conf.set(MRJobConfig.OUTPUT_VALUE_CLASS, BytesWritable.class.getName());
 conf.set("mapred.partitioner.class", HiveConf.getVar(conf, HiveConf.ConfVars.HIVEPARTITIONER));
 conf.set("tez.runtime.partitioner.class", MRPartitioner.class.getName());
 // Removing job credential entry/ cannot be set on the tasks
 conf.unset("mapreduce.job.credentials.binary");
 hiveConf.stripHiddenConfigurations(conf);
 return conf;
}

                int curDirNumber, int obsoleteDirNumber, HiveConf hiveConf,
                TxnStore txnHandler, long id, String jobName) throws IOException {
job.setBoolean(IS_MAJOR, compactionType == CompactionType.MAJOR);
if(dirsToSearch == null) {
 dirsToSearch = new StringableList();
if (baseDir != null) job.set(BASE_DIR, baseDir.toString());
job.set(DELTA_DIRS, deltaDirs.toString());
job.set(DIRS_TO_SEARCH, dirsToSearch.toString());
job.setLong(MIN_TXN, minTxn);
job.setLong(MAX_TXN, maxTxn);

cloned.setBoolean("mapred.task.is.map", true);
List<Path> inputPaths = Utilities.getInputPaths(cloned, mapWork,
  scratchDir, context, false);
if (work instanceof MergeFileWork) {
 MergeFileWork mergeFileWork = (MergeFileWork) work;
 cloned.set(Utilities.MAPRED_MAPPER_CLASS, MergeFileMapper.class.getName());
 cloned.set("mapred.input.format.class", mergeFileWork.getInputformat());
 cloned.setClass("mapred.output.format.class", MergeFileOutputFormat.class,
   FileOutputFormat.class);
} else {
 cloned.set(Utilities.MAPRED_MAPPER_CLASS, ExecMapper.class.getName());
cloned.setBoolean("mapred.task.is.map", false);
Utilities.setReduceWork(cloned, (ReduceWork) work, scratchDir, false);
Utilities.createTmpDirs(cloned, (ReduceWork) work);

private JobConf initializeVertexConf(JobConf baseConf, Context context, ReduceWork reduceWork) {
 JobConf conf = new JobConf(baseConf);
 conf.set(Operator.CONTEXT_NAME_KEY, reduceWork.getName());
 // Is this required ?
 conf.set("mapred.reducer.class", ExecReducer.class.getName());
 boolean useSpeculativeExecReducers = HiveConf.getBoolVar(conf,
   HiveConf.ConfVars.HIVESPECULATIVEEXECREDUCERS);
 conf.setBoolean(org.apache.hadoop.mapreduce.MRJobConfig.REDUCE_SPECULATIVE,
   useSpeculativeExecReducers);
 return conf;
}

/**
 * Adds a named output for the job.
 * <p/>
 *
 * @param conf              job conf to add the named output
 * @param namedOutput       named output name, it has to be a word, letters
 *                          and numbers only, cannot be the word 'part' as
 *                          that is reserved for the
 *                          default output.
 * @param multi             indicates if the named output is multi
 * @param outputFormatClass OutputFormat class.
 * @param schema            Schema to used for this namedOutput
 */
private static void addNamedOutput(JobConf conf, String namedOutput,
               boolean multi,
               Class<? extends OutputFormat> outputFormatClass,
               Schema schema) {
 checkNamedOutputName(namedOutput);
 checkNamedOutput(conf, namedOutput, true);
 boolean isMapOnly = conf.getNumReduceTasks() == 0;
 if(schema!=null)
  conf.set(MO_PREFIX+namedOutput+".schema", schema.toString());
 conf.set(NAMED_OUTPUTS, conf.get(NAMED_OUTPUTS, "") + " " + namedOutput);
 conf.setClass(MO_PREFIX + namedOutput + FORMAT, outputFormatClass,
  OutputFormat.class);
 conf.setBoolean(MO_PREFIX + namedOutput + MULTI, multi);
}

                int curDirNumber, int obsoleteDirNumber, HiveConf hiveConf,
                IMetaStoreClient msc, long id, String jobName) throws IOException {
job.setBoolean(IS_MAJOR, compactionType == CompactionType.MAJOR);
if(dirsToSearch == null) {
 dirsToSearch = new StringableList();
if (baseDir != null) job.set(BASE_DIR, baseDir.toString());
job.set(DELTA_DIRS, deltaDirs.toString());
job.set(DIRS_TO_SEARCH, dirsToSearch.toString());
job.setLong(MIN_TXN, minTxn);
job.setLong(MAX_TXN, maxTxn);

private JobConf initializeVertexConf(JobConf baseConf, Context context, ReduceWork reduceWork) {
 JobConf conf = new JobConf(baseConf);
 conf.set(Operator.CONTEXT_NAME_KEY, reduceWork.getName());
 // Is this required ?
 conf.set("mapred.reducer.class", ExecReducer.class.getName());
 boolean useSpeculativeExecReducers = HiveConf.getBoolVar(conf,
   HiveConf.ConfVars.HIVESPECULATIVEEXECREDUCERS);
 conf.setBoolean(org.apache.hadoop.mapreduce.MRJobConfig.REDUCE_SPECULATIVE,
   useSpeculativeExecReducers);
 return conf;
}

conf.setBoolean("mapreduce.tez.input.initializer.serialize.event.payload", false);
for (int i = 0; i < mapWorkList.size(); i++) {
 conf.set(TEZ_MERGE_CURRENT_MERGE_FILE_PREFIX, mapWork.getName());
 conf.set(Utilities.INPUT_NAME, mapWork.getName());
 LOG.info("Going through each work and adding MultiMRInput");
 mergeVx.addDataSource(mapWork.getName(),

private static void assertFileContentsDwrfHive(
    Type type,
    TempFile tempFile,
    Iterable<?> expectedValues)
    throws Exception
{
  JobConf configuration = new JobConf(new Configuration(false));
  configuration.set(READ_COLUMN_IDS_CONF_STR, "0");
  configuration.setBoolean(READ_ALL_COLUMNS, false);
  Path path = new Path(tempFile.getFile().getAbsolutePath());
  com.facebook.hive.orc.Reader reader = com.facebook.hive.orc.OrcFile.createReader(
      path.getFileSystem(configuration),
      path,
      configuration);
  boolean[] include = new boolean[reader.getTypes().size() + 100000];
  Arrays.fill(include, true);
  com.facebook.hive.orc.RecordReader recordReader = reader.rows(include);
  StructObjectInspector rowInspector = (StructObjectInspector) reader.getObjectInspector();
  StructField field = rowInspector.getStructFieldRef("test");
  Iterator<?> iterator = expectedValues.iterator();
  Object rowData = null;
  while (recordReader.hasNext()) {
    rowData = recordReader.next(rowData);
    Object expectedValue = iterator.next();
    Object actualValue = rowInspector.getStructFieldData(rowData, field);
    actualValue = decodeRecordReaderValue(type, actualValue);
    assertColumnValueEquals(type, actualValue, expectedValue);
  }
  assertFalse(iterator.hasNext());
}

Popular methods of JobConf

<init>
A new map/reduce configuration where the behavior of reading from the default resources can be turne
set
get
setInputFormat
Set the InputFormat implementation for the map-reduce job.
setOutputFormat
Set the OutputFormat implementation for the map-reduce job.
getInt
setMapperClass
Set the Mapper class for the job.
setOutputKeyClass
Set the key class for the job output data.
setOutputValueClass
Set the value class for job outputs.
setReducerClass
Set the Reducer class for the job.
setNumReduceTasks
Set the requisite number of reduce tasks for this job.HOW MANY REDUCES? The right number of reduces
setJobName
Set the user-specified job name.

Popular in Java

Updating database using SQL prepared statement
getSupportFragmentManager (FragmentActivity)
setRequestProperty (URLConnection)
compareTo (BigDecimal)
HttpServer (com.sun.net.httpserver)
This class implements a simple HTTP server. A HttpServer is bound to an IP address and port number a
Proxy (java.net)
This class represents proxy server settings. A created instance of Proxy stores a type and an addres
SimpleDateFormat (java.text)
Formats and parses dates in a locale-sensitive manner. Formatting turns a Date into a String, and pa
Random (java.util)
This class provides methods that return pseudo-random values.It is dangerous to seed Random with the
ResourceBundle (java.util)
ResourceBundle is an abstract class which is the superclass of classes which provide Locale-specifi
Scanner (java.util)
A parser that parses a text string of primitive types and strings with the help of regular expressio
CodeWhisperer alternatives

How to use setBooleanmethodin org.apache.hadoop.mapred.JobConf

Best Java code snippets using org.apache.hadoop.mapred.JobConf.setBoolean (Showing top 20 results out of 639)

Refine search

How to use
setBoolean
method
in
org.apache.hadoop.mapred.JobConf