Refine search
private void outputDimRangeInfo() throws IOException, InterruptedException { if (col != null && minValue != null) { // output written to baseDir/colName/colName.dci-r-00000 (etc) String dimRangeFileName = col.getIdentity() + "/" + col.getName() + DIMENSION_COL_INFO_FILE_POSTFIX; mos.write(BatchConstants.CFG_OUTPUT_PARTITION, NullWritable.get(), new Text(minValue.getBytes(StandardCharsets.UTF_8)), dimRangeFileName); mos.write(BatchConstants.CFG_OUTPUT_PARTITION, NullWritable.get(), new Text(maxValue.getBytes(StandardCharsets.UTF_8)), dimRangeFileName); logger.info("write dimension range info for col : " + col.getName() + " minValue:" + minValue + " maxValue:" + maxValue); } }
@Override protected void doSetup(Context context) throws IOException { super.bindCurrentConfiguration(context.getConfiguration()); mos = new MultipleOutputs(context); String cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME); String segmentID = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_ID); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(); CubeManager cubeManager = CubeManager.getInstance(config); CubeInstance cube = cubeManager.getCube(cubeName); CubeSegment optSegment = cube.getSegmentById(segmentID); CubeSegment originalSegment = cube.getOriginalSegmentToOptimize(optSegment); rowKeySplitter = new RowKeySplitter(originalSegment); baseCuboid = cube.getCuboidScheduler().getBaseCuboidId(); recommendCuboids = cube.getCuboidsRecommend(); Preconditions.checkNotNull(recommendCuboids, "The recommend cuboid map could not be null"); }
private void setupReducer(Path output, int numberOfReducers) throws IOException { job.setReducerClass(UHCDictionaryReducer.class); job.setPartitionerClass(UHCDictionaryPartitioner.class); job.setNumReduceTasks(numberOfReducers); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class); FileOutputFormat.setOutputPath(job, output); job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString()); //prevent to create zero-sized default output LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); deletePath(job.getConfiguration(), output); }
private void outputDict(TblColRef col, Dictionary<String> dict) throws IOException, InterruptedException { // output written to baseDir/colName/colName.rldict-r-00000 (etc) String dictFileName = col.getIdentity() + "/" + col.getName() + DICT_FILE_POSTFIX; try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); DataOutputStream outputStream = new DataOutputStream(baos);) { outputStream.writeUTF(dict.getClass().getName()); dict.write(outputStream); mos.write(BatchConstants.CFG_OUTPUT_DICT, NullWritable.get(), new ArrayPrimitiveWritable(baos.toByteArray()), dictFileName); } mos.close(); } }
/** * Adds a named output for the job. * * @param job job to add the named output * @param namedOutput named output name, it has to be a word, letters * and numbers only, cannot be the word 'part' as * that is reserved for the default output. * @param outputFormatClass OutputFormat class. * @param keyClass key class * @param valueClass value class */ @SuppressWarnings("unchecked") public static void addNamedOutput(Job job, String namedOutput, Class<? extends OutputFormat> outputFormatClass, Class<?> keyClass, Class<?> valueClass) { checkNamedOutputName(job, namedOutput, true); Configuration conf = job.getConfiguration(); conf.set(MULTIPLE_OUTPUTS, conf.get(MULTIPLE_OUTPUTS, "") + " " + namedOutput); conf.setClass(MO_PREFIX + namedOutput + FORMAT, outputFormatClass, OutputFormat.class); conf.setClass(MO_PREFIX + namedOutput + KEY, keyClass, Object.class); conf.setClass(MO_PREFIX + namedOutput + VALUE, valueClass, Object.class); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); Job countingJob = new Job(conf, "JobChaining-Counting"); countingJob.setJarByClass(BasicJobChaining.class); countingJob.setMapperClass(UserIdCountMapper.class); countingJob.setCombinerClass(LongSumReducer.class); countingJob.setReducerClass(UserIdSumReducer.class); Job binningJob = new Job(new Configuration(), "JobChaining-Binning"); binningJob.setJarByClass(BasicJobChaining.class); MultipleOutputs.addNamedOutput(binningJob, MULTIPLE_OUTPUTS_BELOW_NAME, TextOutputFormat.class, Text.class, Text.class); MultipleOutputs.addNamedOutput(binningJob, MULTIPLE_OUTPUTS_ABOVE_NAME, TextOutputFormat.class, Text.class, Text.class); MultipleOutputs.setCountersEnabled(binningJob, true);
@Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); GenericOptionsParser parser = new GenericOptionsParser(conf, args); String[] otherArgs = parser.getRemainingArgs(); System.exit(2); Job job = new Job(conf, "Binning Tags"); job.setJarByClass(BinningTags.class); MultipleOutputs.addNamedOutput(job, "bins", TextOutputFormat.class, Text.class, NullWritable.class); MultipleOutputs.setCountersEnabled(job, true); job.setNumReduceTasks(0); job.setMapperClass(BinningMapper.class); job.setMapOutputKeyClass(Text.class);
String hadoopFileJar = graph.getConf().get("mapred.jar", null); if (null == hadoopFileJar) { if (new File("target/" + Tokens.FAUNUS_JOB_JAR).exists()) { job.getConfiguration().setBoolean(PATH_ENABLED, this.pathEnabled); job.getConfiguration().set("mapred.jar", hadoopFileJar); job.setInputFormatClass(this.graph.getGraphInputFormat()); if (FileInputFormat.class.isAssignableFrom(this.graph.getGraphInputFormat())) { FileInputFormat.setInputPaths(job, this.graph.getInputLocation()); MultipleOutputs.addNamedOutput(job, Tokens.SIDEEFFECT, this.graph.getSideEffectOutputFormat(), job.getOutputKeyClass(), job.getOutputKeyClass()); MultipleOutputs.addNamedOutput(job, Tokens.GRAPH, this.graph.getGraphOutputFormat(), NullWritable.class, FaunusVertex.class); } else { LazyOutputFormat.setOutputFormatClass(job, INTERMEDIATE_OUTPUT_FORMAT); MultipleOutputs.addNamedOutput(job, Tokens.SIDEEFFECT, this.graph.getSideEffectOutputFormat(), job.getOutputKeyClass(), job.getOutputKeyClass()); MultipleOutputs.addNamedOutput(job, Tokens.GRAPH, INTERMEDIATE_OUTPUT_FORMAT, NullWritable.class, FaunusVertex.class);
conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); job.setJobName("mo"); MultipleOutputs.addNamedOutput(job, TEXT, TextOutputFormat.class, Long.class, String.class); MultipleOutputs.setCountersEnabled(job, withCounters); job.setSortComparatorClass(JavaSerializationComparator.class); job.setMapOutputKeyClass(Long.class); job.setMapOutputValueClass(String.class);
private Job createSEMapReduceJob(SourceType source, Configuration conf, String varSelectMSEOutputPath) throws IOException { @SuppressWarnings("deprecation") Job job = new Job(conf, "Shifu: Variable Selection Wrapper Job : " + this.modelConfig.getModelSetName()); job.setJarByClass(getClass()); boolean isSEVarSelMulti = Boolean.TRUE.toString().equalsIgnoreCase( Environment.getProperty(Constants.SHIFU_VARSEL_SE_MULTI, Constants.SHIFU_DEFAULT_VARSEL_SE_MULTI)); if(isSEVarSelMulti) { job.setMapperClass(MultithreadedMapper.class); MultithreadedMapper.setMapperClass(job, VarSelectMapper.class); int threads; threads = Constants.SHIFU_DEFAULT_VARSEL_SE_MULTI_THREAD; conf.setInt("mapreduce.map.cpu.vcores", threads); MultithreadedMapper.setNumberOfThreads(job, threads); } else { MultipleOutputs.addNamedOutput(job, Constants.SHIFU_VARSELECT_SE_OUTPUT_NAME, TextOutputFormat.class, Text.class, Text.class); return job;
@Override public int run(String[] args) throws Exception { if ("DEBUG".equals(conf.get("job.logging"))) { logger.setLevel(Level.DEBUG); logger.debug("** Log Level set to DEBUG **"); Job job = new Job(conf, HBaseToDocumentProtoSequenceFile.class.getSimpleName() + "_" + tableName + "_" + outputDir); job.setJarByClass(HBaseToDocumentProtoSequenceFile.class); job.setNumReduceTasks(0); job.setOutputKeyClass(BytesWritable.class); SequenceFileOutputFormat.setOutputPath(job, new Path(outputDir)); MultipleOutputs.addNamedOutput(job, FAMILY_METADATA_DOCUMENT_QUALIFIER_PROTO, SequenceFileOutputFormat.class, BytesWritable.class, BytesWritable.class); MultipleOutputs.addNamedOutput(job, FAMILY_CONTENT_QUALIFIER_PROTO, SequenceFileOutputFormat.class, BytesWritable.class, BytesWritable.class); MultipleOutputs.addNamedOutput(job, "dproto", SequenceFileOutputFormat.class, BytesWritable.class, BytesWritable.class); boolean success = job.waitForCompletion(true);
@Override public void doCleanup(Context context) throws IOException, InterruptedException { mos.close(); Path outputDirBase = new Path(context.getConfiguration().get(FileOutputFormat.OUTDIR), PathNameCuboidBase); FileSystem fs = FileSystem.get(context.getConfiguration()); if (!fs.exists(outputDirBase)) { fs.mkdirs(outputDirBase); SequenceFile .createWriter(context.getConfiguration(), SequenceFile.Writer.file(new Path(outputDirBase, "part-m-00000")), SequenceFile.Writer.keyClass(Text.class), SequenceFile.Writer.valueClass(Text.class)) .close(); } }
/** * Do initialization like ModelConfig and ColumnConfig loading. */ @Override protected void setup(Context context) throws IOException, InterruptedException { loadConfigFiles(context); int[] inputOutputIndex = DTrainUtils.getInputOutputCandidateCounts(modelConfig.getNormalizeType(), this.columnConfigList); this.inputNodeCount = inputOutputIndex[0] == 0 ? inputOutputIndex[2] : inputOutputIndex[0]; this.filterOutRatio = context.getConfiguration().getFloat(Constants.SHIFU_VARSELECT_FILTEROUT_RATIO, Constants.SHIFU_DEFAULT_VARSELECT_FILTEROUT_RATIO); this.filterNum = context.getConfiguration().getInt(Constants.SHIFU_VARSELECT_FILTER_NUM, Constants.SHIFU_DEFAULT_VARSELECT_FILTER_NUM); this.outputKey = new Text(); this.outputValue = new Text(); this.filterBy = context.getConfiguration() .get(Constants.SHIFU_VARSELECT_FILTEROUT_TYPE, Constants.FILTER_BY_SE); this.mos = new MultipleOutputs<Text, Text>(context); LOG.info("FilterBy is {}, filterOutRatio is {}, filterNum is {}", filterBy, filterOutRatio, filterNum); }
Job job = MapReduceTestUtil.createJob(conf, IN_DIR, OUT_DIR, 2, 1, input); job.setJobName("mo"); MultipleOutputs.addNamedOutput(job, TEXT, TextOutputFormat.class, LongWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, SEQUENCE, SequenceFileOutputFormat.class, IntWritable.class, Text.class); MultipleOutputs.setCountersEnabled(job, withCounters); job.setMapperClass(MOMap.class); job.setReducerClass(MOReduce.class); job.waitForCompletion(true);
@Override public int run(String[] args) throws Exception { final CmdLineParser parser = new CmdLineParser(this); try { parser.parseArgument(args); } catch (final CmdLineException e) { System.err.println(e.getMessage()); System.err.println("Usage: hadoop jar HadoopImageIndexer.jar [options]"); parser.printUsage(System.err); return -1; } final Path[] paths = SequenceFileUtility.getFilePaths(input, "part"); final Path outputPath = new Path(output); if (outputPath.getFileSystem(this.getConf()).exists(outputPath) && replace) outputPath.getFileSystem(this.getConf()).delete(outputPath, true); final Job job = TextBytesJobUtil.createJob(paths, outputPath, null, this.getConf()); job.setJarByClass(this.getClass()); job.setMapperClass(PqPcaVladMapper.class); job.setNumReduceTasks(0); MultipleOutputs.addNamedOutput(job, "pcavlad", SequenceFileOutputFormat.class, Text.class, BytesWritable.class); DistributedCache.createSymlink(job.getConfiguration()); DistributedCache.addCacheFile(new URI(indexerData + "#vlad-data.bin"), job.getConfiguration()); SequenceFileOutputFormat.setCompressOutput(job, !dontcompress); job.waitForCompletion(true); return 0; }
/** * {@inheritDoc} */ protected void reduce(final Text key, final Iterable<BytesWritable> values, final Context context) throws IOException, InterruptedException { final Configuration configuration = context.getConfiguration(); final String sourcePath = configuration.get("compactionSourcePath"); final String targetPath = configuration.get("compactionTargetPath"); // Reducer stores data at the target directory retaining the directory structure of files String filePath = key.toString().replace(sourcePath, targetPath); if (key.toString().endsWith("/")) { filePath = filePath.concat("file"); } log.info("Compaction output path {}", filePath); final URI uri = URI.create(filePath); final MultipleOutputs multipleOutputs = new MultipleOutputs<NullWritable, BytesWritable>(context); try { for (final BytesWritable text : values) { multipleOutputs.write(NullWritable.get(), text, uri.toString()); } } finally { multipleOutputs.close(); } } }
/** * Set up the MapReduce job to output a schema (TBox). */ protected void configureSchemaOutput() { Path outPath = MRReasoningUtils.getSchemaPath(job.getConfiguration()); SequenceFileOutputFormat.setOutputPath(job, outPath); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(SchemaWritable.class); LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); MultipleOutputs.addNamedOutput(job, "schemaobj", SequenceFileOutputFormat.class, NullWritable.class, SchemaWritable.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT, TextOutputFormat.class, Text.class, Text.class); MultipleOutputs.setCountersEnabled(job, true); }
public SafeMapperOutputs(final Mapper.Context context) { this.context = context; this.outputs = new MultipleOutputs(this.context); this.testing = this.context.getConfiguration().getBoolean(FaunusCompiler.TESTING, false); }
final Job job = Job.getInstance(sConf.get()); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_COLUMN, SequenceFileOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_STATISTICS, SequenceFileOutputFormat.class, LongWritable.class, BytesWritable.class); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_PARTITION, TextOutputFormat.class, NullWritable.class, LongWritable.class); multipleOutputsRDD.saveAsNewAPIHadoopDatasetWithMultipleOutputs(job.getConfiguration());
/** * Set up a MapReduce job to output human-readable text. */ protected void configureTextOutput(String destination) { Path outPath; outPath = MRReasoningUtils.getOutputPath(job.getConfiguration(), destination); TextOutputFormat.setOutputPath(job, outPath); LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INTERMEDIATE_OUT, TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.TERMINAL_OUT, TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.SCHEMA_OUT, TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INCONSISTENT_OUT, TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT, TextOutputFormat.class, Text.class, Text.class); MultipleOutputs.setCountersEnabled(job, true); }