Refine search
private void pushProjection(final JobConf newjob, final StringBuilder readColumnsBuffer, final StringBuilder readColumnNamesBuffer) { String readColIds = readColumnsBuffer.toString(); String readColNames = readColumnNamesBuffer.toString(); newjob.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); newjob.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, readColIds); newjob.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, readColNames); if (LOG.isInfoEnabled()) { LOG.info("{} = {}", ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, readColIds); LOG.info("{} = {}", ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, readColNames); } }
/** * Creates and initializes a JobConf object that can be used to execute * the DAG. The configuration object will contain configurations from mapred-site * overlaid with key/value pairs from the conf object. Finally it will also * contain some hive specific configurations that do not change from DAG to DAG. * * @param hiveConf Current conf for the execution * @return JobConf base configuration for job execution * @throws IOException */ public JobConf createConfiguration(HiveConf hiveConf) throws IOException { hiveConf.setBoolean("mapred.mapper.new-api", false); JobConf conf = new JobConf(new TezConfiguration(hiveConf)); conf.set("mapred.output.committer.class", NullOutputCommitter.class.getName()); conf.setBoolean("mapred.committer.job.setup.cleanup.needed", false); conf.setBoolean("mapred.committer.job.task.cleanup.needed", false); conf.setClass("mapred.output.format.class", HiveOutputFormatImpl.class, OutputFormat.class); conf.set(MRJobConfig.OUTPUT_KEY_CLASS, HiveKey.class.getName()); conf.set(MRJobConfig.OUTPUT_VALUE_CLASS, BytesWritable.class.getName()); conf.set("mapred.partitioner.class", HiveConf.getVar(conf, HiveConf.ConfVars.HIVEPARTITIONER)); conf.set("tez.runtime.partitioner.class", MRPartitioner.class.getName()); // Removing job credential entry/ cannot be set on the tasks conf.unset("mapreduce.job.credentials.binary"); hiveConf.stripHiddenConfigurations(conf); return conf; }
private void setupExternalCacheConfig(boolean isPpd, String paths) { FileInputFormat.setInputPaths(conf, paths); conf.set(ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname, "ETL"); conf.setLong(HiveConf.ConfVars.MAPREDMINSPLITSIZE.varname, 1000); conf.setLong(HiveConf.ConfVars.MAPREDMAXSPLITSIZE.varname, 5000); conf.setBoolean(ConfVars.HIVE_ORC_MS_FOOTER_CACHE_PPD.varname, isPpd); conf.setBoolean(ConfVars.HIVEOPTINDEXFILTER.varname, isPpd); }
job.setOutputCommitter(CompactorOutputCommitter.class); job.set(FINAL_LOCATION, sd.getLocation()); job.set(TMP_LOCATION, generateTmpPath(sd)); job.set(INPUT_FORMAT_CLASS_NAME, sd.getInputFormat()); job.set(OUTPUT_FORMAT_CLASS_NAME, sd.getOutputFormat()); job.setBoolean(IS_COMPRESSED, sd.isCompressed()); job.set(TABLE_PROPS, new StringableMap(t.getParameters()).toString()); job.setInt(NUM_BUCKETS, sd.getNumBuckets()); job.setBoolean("mapreduce.map.speculative", false);
jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(Text.class); jobConf.setBoolean("mapred.output.compress", true); jobConf.set("mapred.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec"); jobConf.setOutputFormat(TextMultiOutputFormat.class); jobConf.set(XLearningConstants.STREAM_OUTPUT_DIR, remotePath.makeQualified(dfs).toString()); OutputFormat outputFormat = ReflectionUtils.newInstance(conf.getClass(XLearningConfiguration.XLEARNING_OUTPUTFORMAT_CLASS, XLearningConfiguration.DEFAULT_XLEARNING_OUTPUTF0RMAT_CLASS, OutputFormat.class), jobConf); JobID jobID = new JobID(new SimpleDateFormat("yyyyMMddHHmm").format(new Date()), 0); TaskAttemptID taId = new TaskAttemptID(new TaskID(jobID, true, 0), 0); jobConf.set("mapred.tip.id", taId.getTaskID().toString()); jobConf.set("mapred.task.id", taId.toString()); jobConf.set("mapred.job.id", jobID.toString());
cloned.setBoolean("mapred.task.is.map", true); List<Path> inputPaths = Utilities.getInputPaths(cloned, (MapWork) work, scratchDir, context, false); if (work instanceof MergeFileWork) { MergeFileWork mergeFileWork = (MergeFileWork) work; cloned.set(Utilities.MAPRED_MAPPER_CLASS, MergeFileMapper.class.getName()); cloned.set("mapred.input.format.class", mergeFileWork.getInputformat()); cloned.setClass("mapred.output.format.class", MergeFileOutputFormat.class, FileOutputFormat.class); } else { cloned.set(Utilities.MAPRED_MAPPER_CLASS, ExecMapper.class.getName()); cloned.setBoolean("mapred.task.is.map", false); Utilities.setReduceWork(cloned, (ReduceWork) work, scratchDir, false); Utilities.createTmpDirs(cloned, (ReduceWork) work);
@Before public void setup() throws Exception { conf = new JobConf(); conf.set(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, "true"); conf.setBoolean(HiveConf.ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN.varname, true); conf.set(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES, "default"); conf.setInt(HiveConf.ConfVars.HIVE_TXN_OPERATIONAL_PROPERTIES.varname, AcidUtils.AcidOperationalProperties.getDefault().toInt()); conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, DummyRow.getColumnNamesProperty()); conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, DummyRow.getColumnTypesProperty()); conf.setBoolean(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.varname, true); conf.set(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname, "BI"); OrcConf.ROWS_BETWEEN_CHECKS.setLong(conf, 1);
/** * Does actual test TeraSort job Through Ignite API * * @param gzip Whether to use GZIP. */ protected final void teraSort(boolean gzip) throws Exception { System.out.println("TeraSort ==============================================================="); getFileSystem().delete(new Path(sortOutDir), true); final JobConf jobConf = new JobConf(); jobConf.setUser(getUser()); jobConf.set("fs.defaultFS", getFsBase()); log().info("Desired number of reduces: " + numReduces()); jobConf.set("mapreduce.job.reduces", String.valueOf(numReduces())); log().info("Desired number of maps: " + numMaps()); final long splitSize = dataSizeBytes() / numMaps(); log().info("Desired split size: " + splitSize); // Force the split to be of the desired size: jobConf.set("mapred.min.split.size", String.valueOf(splitSize)); jobConf.set("mapred.max.split.size", String.valueOf(splitSize)); jobConf.setBoolean(HadoopJobProperty.SHUFFLE_MAPPER_STRIPED_OUTPUT.propertyName(), true); jobConf.setInt(HadoopJobProperty.SHUFFLE_MSG_SIZE.propertyName(), 4096); if (gzip) jobConf.setBoolean(HadoopJobProperty.SHUFFLE_MSG_GZIP.propertyName(), true); jobConf.set(HadoopJobProperty.JOB_PARTIALLY_RAW_COMPARATOR.propertyName(), TextPartiallyRawComparator.class.getName()); Job job = setupConfig(jobConf); HadoopJobId jobId = new HadoopJobId(UUID.randomUUID(), 1); IgniteInternalFuture<?> fut = grid(0).hadoop().submit(jobId, createJobInfo(job.getConfiguration(), null)); fut.get(); }
@Test public void testDoAs() throws Exception { conf.setInt(ConfVars.HIVE_ORC_COMPUTE_SPLITS_NUM_THREADS.varname, 1); conf.set(ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname, "ETL"); conf.setBoolean(ConfVars.HIVE_IN_TEST.varname, true); conf.setClass("fs.mock.impl", MockFileSystem.class, FileSystem.class); String badUser = UserGroupInformation.getCurrentUser().getShortUserName() + "-foo";
job.set(FINAL_LOCATION, sd.getLocation()); job.set(TMP_LOCATION, sd.getLocation() + "/" + TMPDIR + "_" + UUID.randomUUID().toString()); job.set(INPUT_FORMAT_CLASS_NAME, sd.getInputFormat()); job.set(OUTPUT_FORMAT_CLASS_NAME, sd.getOutputFormat()); job.setBoolean(IS_COMPRESSED, sd.isCompressed()); job.set(TABLE_PROPS, new StringableMap(t.getParameters()).toString()); job.setInt(NUM_BUCKETS, sd.getNumBuckets()); job.setBoolean("mapreduce.map.speculative", false);
private void pushProjection(final JobConf newjob, final StringBuilder readColumnsBuffer, final StringBuilder readColumnNamesBuffer) { String readColIds = readColumnsBuffer.toString(); String readColNames = readColumnNamesBuffer.toString(); newjob.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); newjob.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, readColIds); newjob.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, readColNames); if (LOG.isInfoEnabled()) { LOG.info("{} = {}", ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, readColIds); LOG.info("{} = {}", ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, readColNames); } }
/** * Creates and initializes a JobConf object that can be used to execute * the DAG. The configuration object will contain configurations from mapred-site * overlaid with key/value pairs from the hiveConf object. Finally it will also * contain some hive specific configurations that do not change from DAG to DAG. * * @param hiveConf Current hiveConf for the execution * @return JobConf base configuration for job execution * @throws IOException */ public JobConf createConfiguration(HiveConf hiveConf) throws IOException { hiveConf.setBoolean("mapred.mapper.new-api", false); JobConf conf = new JobConf(new TezConfiguration(hiveConf)); conf.set("mapred.output.committer.class", NullOutputCommitter.class.getName()); conf.setBoolean("mapred.committer.job.setup.cleanup.needed", false); conf.setBoolean("mapred.committer.job.task.cleanup.needed", false); conf.setClass("mapred.output.format.class", HiveOutputFormatImpl.class, OutputFormat.class); conf.set(MRJobConfig.OUTPUT_KEY_CLASS, HiveKey.class.getName()); conf.set(MRJobConfig.OUTPUT_VALUE_CLASS, BytesWritable.class.getName()); conf.set("mapred.partitioner.class", HiveConf.getVar(conf, HiveConf.ConfVars.HIVEPARTITIONER)); conf.set("tez.runtime.partitioner.class", MRPartitioner.class.getName()); // Removing job credential entry/ cannot be set on the tasks conf.unset("mapreduce.job.credentials.binary"); hiveConf.stripHiddenConfigurations(conf); return conf; }
int curDirNumber, int obsoleteDirNumber, HiveConf hiveConf, TxnStore txnHandler, long id, String jobName) throws IOException { job.setBoolean(IS_MAJOR, compactionType == CompactionType.MAJOR); if(dirsToSearch == null) { dirsToSearch = new StringableList(); if (baseDir != null) job.set(BASE_DIR, baseDir.toString()); job.set(DELTA_DIRS, deltaDirs.toString()); job.set(DIRS_TO_SEARCH, dirsToSearch.toString()); job.setLong(MIN_TXN, minTxn); job.setLong(MAX_TXN, maxTxn);
cloned.setBoolean("mapred.task.is.map", true); List<Path> inputPaths = Utilities.getInputPaths(cloned, mapWork, scratchDir, context, false); if (work instanceof MergeFileWork) { MergeFileWork mergeFileWork = (MergeFileWork) work; cloned.set(Utilities.MAPRED_MAPPER_CLASS, MergeFileMapper.class.getName()); cloned.set("mapred.input.format.class", mergeFileWork.getInputformat()); cloned.setClass("mapred.output.format.class", MergeFileOutputFormat.class, FileOutputFormat.class); } else { cloned.set(Utilities.MAPRED_MAPPER_CLASS, ExecMapper.class.getName()); cloned.setBoolean("mapred.task.is.map", false); Utilities.setReduceWork(cloned, (ReduceWork) work, scratchDir, false); Utilities.createTmpDirs(cloned, (ReduceWork) work);
private JobConf initializeVertexConf(JobConf baseConf, Context context, ReduceWork reduceWork) { JobConf conf = new JobConf(baseConf); conf.set(Operator.CONTEXT_NAME_KEY, reduceWork.getName()); // Is this required ? conf.set("mapred.reducer.class", ExecReducer.class.getName()); boolean useSpeculativeExecReducers = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESPECULATIVEEXECREDUCERS); conf.setBoolean(org.apache.hadoop.mapreduce.MRJobConfig.REDUCE_SPECULATIVE, useSpeculativeExecReducers); return conf; }
/** * Adds a named output for the job. * <p/> * * @param conf job conf to add the named output * @param namedOutput named output name, it has to be a word, letters * and numbers only, cannot be the word 'part' as * that is reserved for the * default output. * @param multi indicates if the named output is multi * @param outputFormatClass OutputFormat class. * @param schema Schema to used for this namedOutput */ private static void addNamedOutput(JobConf conf, String namedOutput, boolean multi, Class<? extends OutputFormat> outputFormatClass, Schema schema) { checkNamedOutputName(namedOutput); checkNamedOutput(conf, namedOutput, true); boolean isMapOnly = conf.getNumReduceTasks() == 0; if(schema!=null) conf.set(MO_PREFIX+namedOutput+".schema", schema.toString()); conf.set(NAMED_OUTPUTS, conf.get(NAMED_OUTPUTS, "") + " " + namedOutput); conf.setClass(MO_PREFIX + namedOutput + FORMAT, outputFormatClass, OutputFormat.class); conf.setBoolean(MO_PREFIX + namedOutput + MULTI, multi); }
int curDirNumber, int obsoleteDirNumber, HiveConf hiveConf, IMetaStoreClient msc, long id, String jobName) throws IOException { job.setBoolean(IS_MAJOR, compactionType == CompactionType.MAJOR); if(dirsToSearch == null) { dirsToSearch = new StringableList(); if (baseDir != null) job.set(BASE_DIR, baseDir.toString()); job.set(DELTA_DIRS, deltaDirs.toString()); job.set(DIRS_TO_SEARCH, dirsToSearch.toString()); job.setLong(MIN_TXN, minTxn); job.setLong(MAX_TXN, maxTxn);
private JobConf initializeVertexConf(JobConf baseConf, Context context, ReduceWork reduceWork) { JobConf conf = new JobConf(baseConf); conf.set(Operator.CONTEXT_NAME_KEY, reduceWork.getName()); // Is this required ? conf.set("mapred.reducer.class", ExecReducer.class.getName()); boolean useSpeculativeExecReducers = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESPECULATIVEEXECREDUCERS); conf.setBoolean(org.apache.hadoop.mapreduce.MRJobConfig.REDUCE_SPECULATIVE, useSpeculativeExecReducers); return conf; }
conf.setBoolean("mapreduce.tez.input.initializer.serialize.event.payload", false); for (int i = 0; i < mapWorkList.size(); i++) { conf.set(TEZ_MERGE_CURRENT_MERGE_FILE_PREFIX, mapWork.getName()); conf.set(Utilities.INPUT_NAME, mapWork.getName()); LOG.info("Going through each work and adding MultiMRInput"); mergeVx.addDataSource(mapWork.getName(),
private static void assertFileContentsDwrfHive( Type type, TempFile tempFile, Iterable<?> expectedValues) throws Exception { JobConf configuration = new JobConf(new Configuration(false)); configuration.set(READ_COLUMN_IDS_CONF_STR, "0"); configuration.setBoolean(READ_ALL_COLUMNS, false); Path path = new Path(tempFile.getFile().getAbsolutePath()); com.facebook.hive.orc.Reader reader = com.facebook.hive.orc.OrcFile.createReader( path.getFileSystem(configuration), path, configuration); boolean[] include = new boolean[reader.getTypes().size() + 100000]; Arrays.fill(include, true); com.facebook.hive.orc.RecordReader recordReader = reader.rows(include); StructObjectInspector rowInspector = (StructObjectInspector) reader.getObjectInspector(); StructField field = rowInspector.getStructFieldRef("test"); Iterator<?> iterator = expectedValues.iterator(); Object rowData = null; while (recordReader.hasNext()) { rowData = recordReader.next(rowData); Object expectedValue = iterator.next(); Object actualValue = rowInspector.getStructFieldData(rowData, field); actualValue = decodeRecordReaderValue(type, actualValue); assertColumnValueEquals(type, actualValue, expectedValue); } assertFalse(iterator.hasNext()); }