/** Configure a job's data model implementation class. */ public static void setDataModelClass(JobConf job, Class<? extends GenericData> modelClass) { job.setClass(CONF_DATA_MODEL, modelClass, GenericData.class); }
/** * Creates and initializes a JobConf object that can be used to execute * the DAG. The configuration object will contain configurations from mapred-site * overlaid with key/value pairs from the conf object. Finally it will also * contain some hive specific configurations that do not change from DAG to DAG. * * @param hiveConf Current conf for the execution * @return JobConf base configuration for job execution * @throws IOException */ public JobConf createConfiguration(HiveConf hiveConf) throws IOException { hiveConf.setBoolean("mapred.mapper.new-api", false); JobConf conf = new JobConf(new TezConfiguration(hiveConf)); conf.set("mapred.output.committer.class", NullOutputCommitter.class.getName()); conf.setBoolean("mapred.committer.job.setup.cleanup.needed", false); conf.setBoolean("mapred.committer.job.task.cleanup.needed", false); conf.setClass("mapred.output.format.class", HiveOutputFormatImpl.class, OutputFormat.class); conf.set(MRJobConfig.OUTPUT_KEY_CLASS, HiveKey.class.getName()); conf.set(MRJobConfig.OUTPUT_VALUE_CLASS, BytesWritable.class.getName()); conf.set("mapred.partitioner.class", HiveConf.getVar(conf, HiveConf.ConfVars.HIVEPARTITIONER)); conf.set("tez.runtime.partitioner.class", MRPartitioner.class.getName()); // Removing job credential entry/ cannot be set on the tasks conf.unset("mapreduce.job.credentials.binary"); hiveConf.stripHiddenConfigurations(conf); return conf; }
/** * Adds a named output for the job. * <p/> * * @param conf job conf to add the named output * @param namedOutput named output name, it has to be a word, letters * and numbers only, cannot be the word 'part' as * that is reserved for the * default output. * @param multi indicates if the named output is multi * @param outputFormatClass OutputFormat class. * @param schema Schema to used for this namedOutput */ private static void addNamedOutput(JobConf conf, String namedOutput, boolean multi, Class<? extends OutputFormat> outputFormatClass, Schema schema) { checkNamedOutputName(namedOutput); checkNamedOutput(conf, namedOutput, true); boolean isMapOnly = conf.getNumReduceTasks() == 0; if(schema!=null) conf.set(MO_PREFIX+namedOutput+".schema", schema.toString()); conf.set(NAMED_OUTPUTS, conf.get(NAMED_OUTPUTS, "") + " " + namedOutput); conf.setClass(MO_PREFIX + namedOutput + FORMAT, outputFormatClass, OutputFormat.class); conf.setBoolean(MO_PREFIX + namedOutput + MULTI, multi); }
/** * Creates and initializes a JobConf object that can be used to execute * the DAG. The configuration object will contain configurations from mapred-site * overlaid with key/value pairs from the hiveConf object. Finally it will also * contain some hive specific configurations that do not change from DAG to DAG. * * @param hiveConf Current hiveConf for the execution * @return JobConf base configuration for job execution * @throws IOException */ public JobConf createConfiguration(HiveConf hiveConf) throws IOException { hiveConf.setBoolean("mapred.mapper.new-api", false); JobConf conf = new JobConf(new TezConfiguration(hiveConf)); conf.set("mapred.output.committer.class", NullOutputCommitter.class.getName()); conf.setBoolean("mapred.committer.job.setup.cleanup.needed", false); conf.setBoolean("mapred.committer.job.task.cleanup.needed", false); conf.setClass("mapred.output.format.class", HiveOutputFormatImpl.class, OutputFormat.class); conf.set(MRJobConfig.OUTPUT_KEY_CLASS, HiveKey.class.getName()); conf.set(MRJobConfig.OUTPUT_VALUE_CLASS, BytesWritable.class.getName()); conf.set("mapred.partitioner.class", HiveConf.getVar(conf, HiveConf.ConfVars.HIVEPARTITIONER)); conf.set("tez.runtime.partitioner.class", MRPartitioner.class.getName()); // Removing job credential entry/ cannot be set on the tasks conf.unset("mapreduce.job.credentials.binary"); hiveConf.stripHiddenConfigurations(conf); return conf; }
conf.setClass("mapred.output.format.class", MergeFileOutputFormat.class, FileOutputFormat.class);
cloned.set(Utilities.MAPRED_MAPPER_CLASS, MergeFileMapper.class.getName()); cloned.set("mapred.input.format.class", mergeFileWork.getInputformat()); cloned.setClass("mapred.output.format.class", MergeFileOutputFormat.class, FileOutputFormat.class); } else {
conf.setClass("mapred.output.format.class", MergeFileOutputFormat.class, FileOutputFormat.class);
cloned.set(Utilities.MAPRED_MAPPER_CLASS, MergeFileMapper.class.getName()); cloned.set("mapred.input.format.class", mergeFileWork.getInputformat()); cloned.setClass("mapred.output.format.class", MergeFileOutputFormat.class, FileOutputFormat.class); } else {
conf, mapWork, fs, mrScratchDir, ctx, vertexType, localResources); conf.setClass("mapred.input.format.class", HiveInputFormat.class, InputFormat.class);
createVertex(conf, mapWork, appJarLr, additionalLr, fs, mrScratchDir, ctx, vertexType); conf.setClass("mapred.input.format.class", HiveInputFormat.class, InputFormat.class);
conf.setClass("mapred.input.format.class", HiveInputFormat.class, InputFormat.class);
conf.setClass("mapred.input.format.class", HiveInputFormat.class, InputFormat.class);
conf.set(ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname, "ETL"); conf.setBoolean(ConfVars.HIVE_IN_TEST.varname, true); conf.setClass("fs.mock.impl", MockFileSystem.class, FileSystem.class); String badUser = UserGroupInformation.getCurrentUser().getShortUserName() + "-foo"; MockFileSystem.setBlockedUgi(badUser);
/** * Set the key class for the job output data. * * @param theClass the key class for the job output data. */ public void setOutputKeyClass(Class<?> theClass) { setClass(JobContext.OUTPUT_KEY_CLASS, theClass, Object.class); }
/** * Set the value class for job outputs. * * @param theClass the value class for job outputs. */ public void setOutputValueClass(Class<?> theClass) { setClass(JobContext.OUTPUT_VALUE_CLASS, theClass, Object.class); }
@Test /** * A testing method instructing core hadoop to load an external ShuffleConsumerPlugin * as if it came from a 3rd party. */ public void testPluginAbility() { try{ // create JobConf with mapreduce.job.shuffle.consumer.plugin=TestShuffleConsumerPlugin JobConf jobConf = new JobConf(); jobConf.setClass(MRConfig.SHUFFLE_CONSUMER_PLUGIN, TestShufflePlugin.TestShuffleConsumerPlugin.class, ShuffleConsumerPlugin.class); ShuffleConsumerPlugin shuffleConsumerPlugin = null; Class<? extends ShuffleConsumerPlugin> clazz = jobConf.getClass(MRConfig.SHUFFLE_CONSUMER_PLUGIN, Shuffle.class, ShuffleConsumerPlugin.class); assertNotNull("Unable to get " + MRConfig.SHUFFLE_CONSUMER_PLUGIN, clazz); // load 3rd party plugin through core's factory method shuffleConsumerPlugin = ReflectionUtils.newInstance(clazz, jobConf); assertNotNull("Unable to load " + MRConfig.SHUFFLE_CONSUMER_PLUGIN, shuffleConsumerPlugin); } catch (Exception e) { assertTrue("Threw exception:" + e, false); } }
JobConf conf = new JobConf(); conf.set(FileSystem.FS_DEFAULT_NAME_KEY, "faildel:///"); conf.setClass("fs.faildel.impl", FakeFileSystem.class, FileSystem.class); conf.set(JobContext.TASK_ATTEMPT_ID, attempt); conf.setInt(org.apache.hadoop.mapreduce.lib.output.
/** * Set the {@link InputFormat} for the job. * @param cls the <code>InputFormat</code> to use * @throws IllegalStateException if the job is submitted */ public void setInputFormatClass(Class<? extends InputFormat> cls ) throws IllegalStateException { ensureState(JobState.DEFINE); conf.setClass(INPUT_FORMAT_CLASS_ATTR, cls, InputFormat.class); }
/** * Set the {@link OutputFormat} for the job. * @param cls the <code>OutputFormat</code> to use * @throws IllegalStateException if the job is submitted */ public void setOutputFormatClass(Class<? extends OutputFormat> cls ) throws IllegalStateException { ensureState(JobState.DEFINE); conf.setClass(OUTPUT_FORMAT_CLASS_ATTR, cls, OutputFormat.class); }
/** * Set the {@link Reducer} for the job. * @param cls the <code>Reducer</code> to use * @throws IllegalStateException if the job is submitted */ public void setReducerClass(Class<? extends Reducer> cls ) throws IllegalStateException { ensureState(JobState.DEFINE); conf.setClass(REDUCE_CLASS_ATTR, cls, Reducer.class); }