org.apache.hadoop.mapred.JobConf.set java code examples

Refine search

public MiniMrShim(Configuration conf, int numberOfTaskTrackers,
         String nameNode, int numDir) throws IOException {
 this.conf = conf;
 JobConf jConf = new JobConf(conf);
 jConf.set("yarn.scheduler.capacity.root.queues", "default");
 jConf.set("yarn.scheduler.capacity.root.default.capacity", "100");
 jConf.setInt(MRJobConfig.MAP_MEMORY_MB, 512);
 jConf.setInt(MRJobConfig.REDUCE_MEMORY_MB, 512);
 jConf.setInt(MRJobConfig.MR_AM_VMEM_MB, 128);
 jConf.setInt(YarnConfiguration.YARN_MINICLUSTER_NM_PMEM_MB, 512);
 jConf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 128);
 jConf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, 512);
 mr = new MiniMRCluster(numberOfTaskTrackers, nameNode, numDir, null, null, jConf);
}

private JobConf getLocalFSJobConfClone(Configuration jc) {
 if (this.jobCloneUsingLocalFs == null) {
  this.jobCloneUsingLocalFs = new JobConf(jc);
  jobCloneUsingLocalFs.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY,
    Utilities.HADOOP_LOCAL_FS);
 }
 return this.jobCloneUsingLocalFs;
}

private JobConf initializeVertexConf(JobConf baseConf, Context context, ReduceWork reduceWork) {
 JobConf conf = new JobConf(baseConf);
 conf.set(Operator.CONTEXT_NAME_KEY, reduceWork.getName());
 // Is this required ?
 conf.set("mapred.reducer.class", ExecReducer.class.getName());
 boolean useSpeculativeExecReducers = HiveConf.getBoolVar(conf,
   HiveConf.ConfVars.HIVESPECULATIVEEXECREDUCERS);
 conf.setBoolean(org.apache.hadoop.mapreduce.MRJobConfig.REDUCE_SPECULATIVE,
   useSpeculativeExecReducers);
 return conf;
}

public String runBuildStore(Props props, String url) throws Exception {
  Path tempDir = new Path(props.getString(BUILD_TEMP_DIR, "/tmp/vold-build-and-push-" + new Random().nextLong()));
  Path outputDir = new Path(props.getString(BUILD_OUTPUT_DIR), new URI(url).getHost());
  CheckSumType checkSumType = CheckSum.fromString(props.getString(CHECKSUM_TYPE,
                                  CheckSum.toString(CheckSumType.MD5)));
  JobConf configuration = new JobConf();
  Class mapperClass;
  Class<? extends InputFormat> inputFormatClass;
    configuration.set(HadoopStoreBuilder.AVRO_REC_SCHEMA, getRecordSchema());
    configuration.set(AvroStoreBuilderMapper.AVRO_KEY_SCHEMA, getKeySchema());
    configuration.set(AvroStoreBuilderMapper.AVRO_VALUE_SCHEMA, getValueSchema());
    configuration.set(VoldemortBuildAndPushJob.AVRO_KEY_FIELD, this.keyFieldName);
    configuration.set(VoldemortBuildAndPushJob.AVRO_VALUE_FIELD, this.valueFieldName);
    mapperClass = AvroStoreBuilderMapper.class;
    inputFormatClass = AvroInputFormat.class;
  return outputDir.toString();

@Before
public void openFileSystem() throws Exception {
 conf = new JobConf();
 // all columns
 conf.set("columns", "userid,string1,subtype,decimal1,ts");
 conf.set("columns.types", "bigint,string,double,decimal,timestamp");
 // needed columns
 conf.set(ColumnProjectionUtils.READ_ALL_COLUMNS, "false");
 conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0,2");
 conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, "userid,subtype");
 fs = FileSystem.getLocal(conf);
 testFilePath = new Path(workDir, "TestOrcFile." +
   testCaseName.getMethodName() + ".orc");
 testFilePath2 = new Path(workDir, "TestOrcFile." +
 testCaseName.getMethodName() + ".2.orc");
 fs.delete(testFilePath, false);
 fs.delete(testFilePath2, false);
}

protected JobConf configStage1() throws Exception
{
  final JobConf conf = new JobConf(getConf(), ConCmptBlock.class);
  conf.set("block_width", "" + block_width);
  conf.set("recursive_diagmult", "" + recursive_diagmult);
  conf.setJobName("ConCmptBlock_pass1");
  conf.setMapperClass(MapStage1.class);
  conf.setReducerClass(RedStage1.class);
  FileInputFormat.setInputPaths(conf, edge_path, curbm_path);  
  FileOutputFormat.setOutputPath(conf, tempbm_path);  
  conf.setNumReduceTasks( nreducers );
  conf.setOutputKeyClass(IntWritable.class);
  conf.setOutputValueClass(Text.class);
  return conf;
}

 continue;
Path path = new Path(pathString);
FileSystem fs = path.getFileSystem(jobConf);
if (ignoreInvalidPath && !fs.exists(path)) {
 continue;
final String qualifiedPath = fs.makeQualified(path).toString();
str.append(separator)
 .append(StringUtils.escapeString(qualifiedPath));
jobConf.set("mapred.input.dir", str.toString());

conf.set("cluster.xml", new ClusterMapper().writeCluster(cluster));
conf.set("stores.xml",
     new StoreDefinitionsMapper().writeStoreList(Collections.singletonList(storeDef)));
conf.setBoolean(VoldemortBuildAndPushJob.SAVE_KEYS, saveKeys);
conf.setOutputKeyClass(BytesWritable.class);
conf.setOutputValueClass(BytesWritable.class);
conf.setJarByClass(getClass());
conf.setReduceSpeculativeExecution(false);
FileInputFormat.setInputPaths(conf, inputPath);
conf.set("final.output.dir", outputDir.toString());
conf.set(VoldemortBuildAndPushJob.CHECKSUM_TYPE, CheckSum.toString(checkSumType));
conf.set("dfs.umaskmode", "002");
FileOutputFormat.setOutputPath(conf, tempDir);
conf.setNumReduceTasks(numReducers);
  conf.setOutputKeyClass(ByteBuffer.class);
  conf.setOutputValueClass(ByteBuffer.class);
  conf.setReducerClass(AvroStoreBuilderReducer.class);
  Path directoryPath = new Path(outputDir.toString(), directoryName);

reader = new BufferedReader(new InputStreamReader(xlearningProcess.getInputStream()));
List<OutputInfo> outputs = Arrays.asList(amClient.getOutputLocation());
JobConf jobConf = new JobConf(conf);
jobConf.setOutputKeyClass(Text.class);
jobConf.setOutputValueClass(Text.class);
jobConf.setBoolean("mapred.output.compress", true);
jobConf.set("mapred.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");
jobConf.setOutputFormat(TextMultiOutputFormat.class);
Path remotePath = new Path(outputs.get(0).getDfsLocation() + "/_temporary/" + containerId.toString());
FileSystem dfs = remotePath.getFileSystem(jobConf);
jobConf.set(XLearningConstants.STREAM_OUTPUT_DIR, remotePath.makeQualified(dfs).toString());
OutputFormat outputFormat = ReflectionUtils.newInstance(conf.getClass(XLearningConfiguration.XLEARNING_OUTPUTFORMAT_CLASS, XLearningConfiguration.DEFAULT_XLEARNING_OUTPUTF0RMAT_CLASS, OutputFormat.class),
  jobConf);
JobID jobID = new JobID(new SimpleDateFormat("yyyyMMddHHmm").format(new Date()), 0);
TaskAttemptID taId = new TaskAttemptID(new TaskID(jobID, true, 0), 0);
jobConf.set("mapred.tip.id", taId.getTaskID().toString());
jobConf.set("mapred.task.id", taId.toString());
jobConf.set("mapred.job.id", jobID.toString());
amClient.reportMapedTaskID(containerId, taId.toString());
RecordWriter writer = outputFormat.getRecordWriter(dfs, jobConf, "part-r", Reporter.NULL);

public long produceSamples(Path samplePath, boolean textOutput) throws Exception {
  Path input = new Path(samplePath.toString() + "-seeds");
  this.numSamples = writeSeeds(input);
  LOG.info("Generating " + this.numSamples + " of samples");
  JobConf jobConf = getJobConf();
  jobConf.set("genkmeansdataset.dimensions", Integer.toString(dimension));
  FileInputFormat.setInputPaths(jobConf, input);
  FileOutputFormat.setOutputPath(jobConf, samplePath);
  jobConf.setMapperClass(MapClass.class);
  if (textOutput){
    jobConf.setInputFormat(SequenceFileInputFormat.class);
    jobConf.setOutputFormat(TextOutputFormat.class);
    jobConf.setOutputKeyClass(LongWritable.class);
    jobConf.setOutputValueClass(VectorWritable.class);
  } else {
    jobConf.setInputFormat(SequenceFileInputFormat.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);
    jobConf.setOutputKeyClass(LongWritable.class);
    jobConf.setOutputValueClass(VectorWritable.class);
  }
  jobConf.setNumReduceTasks(0);
  JobClient.runJob(jobConf);
  return this.numSamples;
}

  conf.set("hadoop.job.ugi", hadoop_ugi);
  conf.set("mapred.job.tracker", "local");
  conf.set("fs.default.name", "file:///");
  conf.set("mapred.local.dir", "/tmp/map-red");
  conf.set("mapred.child.java.opts", props.getString("mapred.child.java.opts"));
  info("mapred.child.java.opts set to " + props.getString("mapred.child.java.opts"));
      FileStatus[] statuses = fs.listStatus(new Path(latestPath), filter);
    HadoopUtils.addAllSubPaths(conf, new Path(path));
  FileOutputFormat.setOutputPath(conf, new Path(location));
  if(lowerCase.startsWith(HADOOP_PREFIX)) {
    String newKey = key.substring(HADOOP_PREFIX.length());
    conf.set(newKey, getProps().get(key));
        props.getBoolean(VoldemortBuildAndPushJob.REDUCER_OUTPUT_COMPRESS, false));
if(props.containsKey(VoldemortBuildAndPushJob.REDUCER_OUTPUT_COMPRESS_CODEC)) {
  conf.set(VoldemortBuildAndPushJob.REDUCER_OUTPUT_COMPRESS_CODEC,
       props.get(VoldemortBuildAndPushJob.REDUCER_OUTPUT_COMPRESS_CODEC));
  conf.set("mapreduce.job.credentials.binary",
       System.getenv("HADOOP_TOKEN_FILE_LOCATION"));

private void testFailAbortInternal(int version)
  throws IOException, InterruptedException {
 JobConf conf = new JobConf();
 conf.set(FileSystem.FS_DEFAULT_NAME_KEY, "faildel:///");
 conf.setClass("fs.faildel.impl", FakeFileSystem.class, FileSystem.class);
 conf.set(JobContext.TASK_ATTEMPT_ID, attempt);
 conf.setInt(org.apache.hadoop.mapreduce.lib.output.
   FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version);
 conf.setInt(MRConstants.APPLICATION_ATTEMPT_ID, 1);
 FileOutputFormat.setOutputPath(conf, outDir);
 JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
 TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
 File jobTmpDir = new File(new Path(outDir,
   FileOutputCommitter.TEMP_DIR_NAME + Path.SEPARATOR +
   conf.getInt(MRConstants.APPLICATION_ATTEMPT_ID, 0) +
   Path.SEPARATOR +
   FileOutputCommitter.TEMP_DIR_NAME).toString());
 File taskTmpDir = new File(jobTmpDir, "_" + taskID);
 File expectedFile = new File(taskTmpDir, partFile);
 assertTrue(th.getMessage().contains("fake delete failed"));
 assertTrue("job temp dir does not exists", jobTmpDir.exists());
 FileUtil.fullyDelete(new File(outDir.toString()));

@Override
public RecordReader<BytesWritable, BytesWritable> getRecordReader(InputSplit split,
                                 JobConf conf,
                                 Reporter reporter)
    throws IOException {
  String inputPathString = ((FileSplit) split).getPath().toUri().getPath();
  log.info("Input file path:" + inputPathString);
  Path inputPath = new Path(inputPathString);
  SequenceFile.Reader reader = new SequenceFile.Reader(inputPath.getFileSystem(conf),
                             inputPath,
                             conf);
  SequenceFile.Metadata meta = reader.getMetadata();
  try {
    Text keySchema = meta.get(new Text("key.schema"));
    Text valueSchema = meta.get(new Text("value.schema"));
    if(0 == keySchema.getLength() || 0 == valueSchema.getLength()) {
      throw new Exception();
    }
    // update Joboconf with schemas
    conf.set("mapper.input.key.schema", keySchema.toString());
    conf.set("mapper.input.value.schema", valueSchema.toString());
  } catch(Exception e) {
    throw new IOException("Failed to Load Schema from file:" + inputPathString + "\n");
  }
  return super.getRecordReader(split, conf, reporter);
}

     ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
JobConf conf = createMockExecutionEnvironment(workDir, new Path("mock:///"),
  "combinationAcid", inspector, false, PARTITIONS);
String[] paths = conf.getStrings("mapred.input.dir");
for(int p=0; p < PARTITIONS; ++p) {
 partDir[p] = new Path(paths[p]);
Path base0 = new Path("mock:/combinationAcid/p=0/base_0000010/bucket_00000");
setBlocks(base0, conf, new MockBlock("host1", "host2"));
conf.set(ValidTxnList.VALID_TXNS_KEY,
  new ValidReadTxnList(new long[0], new BitSet(), 1000, Long.MAX_VALUE).writeToString());
HiveInputFormat<?,?> inputFormat =
  split.inputFormatClassName());
assertEquals("mock:/combinationAcid/p=0/base_0000010/bucket_00000",
  split.getPath().toString());
assertEquals(0, split.getStart());
assertEquals(700, split.getLength());
  split.inputFormatClassName());
assertEquals("mock:/combinationAcid/p=0/base_0000010/bucket_00001",
  split.getPath().toString());
assertEquals(0, split.getStart());
assertEquals(724, split.getLength());

 job.addCacheFile(remoteFile.toUri());
jobConf.set("tmpfiles", secondFile.toString());
Path firstJar = makeJar(new Path(testRootDir, "distributed.first.jar"), 1);
Path secondJar =
  makeJar(new Path(testRootDir, "distributed.second.jar"), 2);
Path thirdJar = new Path(testRootDir, "distributed.third.jar");
localFs.copyFromLocalFile(secondJar, thirdJar);
jobConf.set("tmpjars", secondJar.toString() + "," + thirdJar.toString());
jobConf.set("tmparchives", secondArchive.toString());

/**
 * Set context for this fetch operator in to the jobconf.
 * This helps InputFormats make decisions based on the scope of the complete
 * operation.
 * @param conf the configuration to modify
 * @param paths the list of input directories
 */
static void setFetchOperatorContext(JobConf conf, List<Path> paths) {
 if (paths != null) {
  StringBuilder buff = new StringBuilder();
  for (Path path : paths) {
   if (buff.length() > 0) {
    buff.append('\t');
   }
   buff.append(StringEscapeUtils.escapeJava(path.toString()));
  }
  conf.set(FETCH_OPERATOR_DIRECTORY_LIST, buff.toString());
 }
}

HiveFileFormatUtils.prepareJobOutput(job);
job.setOutputFormat(HiveOutputFormatImpl.class);
job.setMapperClass(work.getMapperClass());
job.setNumReduceTasks(0);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(NullWritable.class);
 job.set(MRJobConfig.JOB_NAME,
   jobName != null ? jobName : "JOB" + Utilities.randGen.nextInt());
  job.set("tmpjars", addedJars);

private JobConf createBaseJobConf(HiveConf conf, String jobName, Table t, StorageDescriptor sd,
                 ValidWriteIdList writeIds, CompactionInfo ci) {
 JobConf job = new JobConf(conf);
 job.setJobName(jobName);
 job.setOutputKeyClass(NullWritable.class);
 job.setOutputValueClass(NullWritable.class);
 job.setJarByClass(CompactorMR.class);
 LOG.debug("User jar set to " + job.getJar());
 job.setMapperClass(CompactorMap.class);
 job.setNumReduceTasks(0);
 job.setInputFormat(CompactorInputFormat.class);
 job.setOutputFormat(NullOutputFormat.class);
 job.setOutputCommitter(CompactorOutputCommitter.class);
 job.set(FINAL_LOCATION, sd.getLocation());
 job.set(TMP_LOCATION, generateTmpPath(sd));
 job.set(INPUT_FORMAT_CLASS_NAME, sd.getInputFormat());
 job.set(OUTPUT_FORMAT_CLASS_NAME, sd.getOutputFormat());
 job.setBoolean(IS_COMPRESSED, sd.isCompressed());
 job.set(TABLE_PROPS, new StringableMap(t.getParameters()).toString());
 job.setInt(NUM_BUCKETS, sd.getNumBuckets());
 job.set(ValidWriteIdList.VALID_WRITEIDS_KEY, writeIds.toString());

job.setMapperClass(ExecMapper.class);
job.setNumReduceTasks(rWork != null ? rWork.getNumReduceTasks().intValue() : 0);
job.setReducerClass(ExecReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
 job.set(MRJobConfig.JOB_NAME, "JOB" + Utilities.randGen.nextInt());
   console.printInfo("Not enough sampling data.. Rolling back to single reducer task");
   rWork.setNumReduceTasks(1);
   job.setNumReduceTasks(1);
  } catch (Exception e) {
   LOG.error("Sampling error", e);
     "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
   rWork.setNumReduceTasks(1);
   job.setNumReduceTasks(1);

conf.setInt("mapred.max.split.size", 50);
RecordWriter writer =
  outFormat.getRecordWriter(fs, conf, testFilePath.toString(),
    Reporter.NULL);
writer.write(NullWritable.get(),
    .end()
    .build();
conf.set("sarg.pushdown", toKryo(sarg));
conf.set("hive.io.file.readcolumn.names", "z");
properties.setProperty("columns", "z");
properties.setProperty("columns.types", "string");
inspector = (StructObjectInspector) serde.getObjectInspector();
InputFormat<?,?> in = new OrcInputFormat();
FileInputFormat.setInputPaths(conf, testFilePath.toString());
InputSplit[] splits = in.getSplits(conf, 1);
assertEquals(0, splits.length);

Javadoc

Set the user-defined combiner class used to combine map-outputs before being sent to the reducers.

The combiner is a task-level aggregation operation which, in some cases, helps to cut down the amount of data transferred from the Mapper to the Reducer, leading to better performance.

Typically the combiner is same as the the Reducer for the job i.e. #setReducerClass(Class).

Popular methods of JobConf

<init>
A new map/reduce configuration where the behavior of reading from the default resources can be turne
get
setInputFormat
Set the InputFormat implementation for the map-reduce job.
setOutputFormat
Set the OutputFormat implementation for the map-reduce job.
getInt
setMapperClass
Set the Mapper class for the job.
setOutputKeyClass
Set the key class for the job output data.
setOutputValueClass
Set the value class for job outputs.
setReducerClass
Set the Reducer class for the job.
setNumReduceTasks
Set the requisite number of reduce tasks for this job.HOW MANY REDUCES? The right number of reduces
setBoolean
setJobName
Set the user-specified job name.

Popular in Java

Updating database using SQL prepared statement
getContentResolver (Context)
addToBackStack (FragmentTransaction)
getExternalFilesDir (Context)
RandomAccessFile (java.io)
Allows reading from and writing to a file in a random-access manner. This is different from the uni-
SimpleDateFormat (java.text)
Formats and parses dates in a locale-sensitive manner. Formatting turns a Date into a String, and pa
Manifest (java.util.jar)
The Manifest class is used to obtain attribute information for a JarFile and its entries.
BoxLayout (javax.swing)
JLabel (javax.swing)
BasicDataSource (org.apache.commons.dbcp)
Basic implementation of javax.sql.DataSource that is configured via JavaBeans properties. This is no
From CI to AI: The AI layer in your organization

How to use setmethodin org.apache.hadoop.mapred.JobConf

Best Java code snippets using org.apache.hadoop.mapred.JobConf.set (Showing top 20 results out of 1,782)

Refine search

How to use
set
method
in
org.apache.hadoop.mapred.JobConf