private boolean runMapReduce() throws IOException, InterruptedException, ClassNotFoundException { Path model = new Path(getOption("model")); HadoopUtil.cacheFiles(model, getConf()); //the output key is the expected value, the output value are the scores for all the labels Job testJob = prepareJob(getInputPath(), getOutputPath(), SequenceFileInputFormat.class, BayesTestMapper.class, Text.class, VectorWritable.class, SequenceFileOutputFormat.class); //testJob.getConfiguration().set(LABEL_KEY, getOption("--labels")); boolean complementary = hasOption("testComplementary"); testJob.getConfiguration().set(COMPLEMENTARY, String.valueOf(complementary)); return testJob.waitForCompletion(true); }
private boolean runMapReduce() throws IOException, InterruptedException, ClassNotFoundException { Path model = new Path(getOption("model")); HadoopUtil.cacheFiles(model, getConf()); //the output key is the expected value, the output value are the scores for all the labels Job testJob = prepareJob(getInputPath(), getOutputPath(), SequenceFileInputFormat.class, BayesTestMapper.class, Text.class, VectorWritable.class, SequenceFileOutputFormat.class); //testJob.getConfiguration().set(LABEL_KEY, getOption("--labels")); boolean complementary = hasOption("testComplementary"); testJob.getConfiguration().set(COMPLEMENTARY, String.valueOf(complementary)); return testJob.waitForCompletion(true); }
private boolean runMapReduce(Map<String, List<String>> parsedArgs) throws IOException, InterruptedException, ClassNotFoundException { Path model = new Path(getOption("model")); HadoopUtil.cacheFiles(model, getConf()); //the output key is the expected value, the output value are the scores for all the labels Job testJob = prepareJob(getInputPath(), getOutputPath(), SequenceFileInputFormat.class, BayesTestMapper.class, Text.class, VectorWritable.class, SequenceFileOutputFormat.class); //testJob.getConfiguration().set(LABEL_KEY, getOption("--labels")); //boolean complementary = parsedArgs.containsKey("testComplementary"); //always result to false as key in hash map is "--testComplementary" boolean complementary = hasOption("testComplementary"); //or complementary = parsedArgs.containsKey("--testComplementary"); testJob.getConfiguration().set(COMPLEMENTARY, String.valueOf(complementary)); return testJob.waitForCompletion(true); }
private void runSequential() throws IOException { boolean complementary = hasOption("testComplementary"); FileSystem fs = FileSystem.get(getConf()); NaiveBayesModel model = NaiveBayesModel.materialize(new Path(getOption("model")), getConf()); SequenceFile.createWriter(fs, getConf(), new Path(getOutputPath(), "part-r-00000"), Text.class, VectorWritable.class)) { SequenceFileDirIterable<Text, VectorWritable> dirIterable = new SequenceFileDirIterable<>(getInputPath(), PathType.LIST, PathFilters.partFilter(), getConf());
private void runSequential() throws IOException { boolean complementary = hasOption("testComplementary"); FileSystem fs = FileSystem.get(getConf()); NaiveBayesModel model = NaiveBayesModel.materialize(new Path(getOption("model")), getConf()); classifier = new StandardNaiveBayesClassifier(model); SequenceFile.Writer writer = SequenceFile.createWriter(fs, getConf(), new Path(getOutputPath(), "part-r-00000"), Text.class, VectorWritable.class); new SequenceFileDirIterable<Text, VectorWritable>(getInputPath(), PathType.LIST, PathFilters.partFilter(), getConf());
HadoopUtil.delete(getConf(), getOutputPath()); boolean sequential = hasOption("runSequential"); if (sequential) { FileSystem fs = FileSystem.get(getConf()); NaiveBayesModel model = NaiveBayesModel.materialize(new Path(getOption("model")), getConf()); AbstractNaiveBayesClassifier classifier; if (complementary) { new SequenceFile.Writer(fs, getConf(), getOutputPath(), Text.class, VectorWritable.class); Reader reader = new Reader(fs, getInputPath(), getConf()); Text key = new Text(); VectorWritable vw = new VectorWritable(); Map<Integer, String> labelMap = BayesUtils.readLabelIndex(getConf(), new Path(getOption("labelIndex"))); PathType.LIST, PathFilters.partFilter(), getConf()); ResultAnalyzer analyzer = new ResultAnalyzer(labelMap.values(), "DEFAULT"); analyzeResults(labelMap, dirIterable, analyzer);
HadoopUtil.delete(getConf(), getOutputPath()); Map<Integer, String> labelMap = BayesUtils.readLabelIndex(getConf(), new Path(getOption("labelIndex"))); new SequenceFileDirIterable<Text, VectorWritable>(getOutputPath(), PathType.LIST, PathFilters.partFilter(), getConf()); ResultAnalyzer analyzer = new ResultAnalyzer(labelMap.values(), "DEFAULT"); analyzeResults(labelMap, dirIterable, analyzer);
HadoopUtil.delete(getConf(), getOutputPath()); Map<Integer, String> labelMap = BayesUtils.readLabelIndex(getConf(), new Path(getOption("labelIndex"))); new SequenceFileDirIterable<>(getOutputPath(), PathType.LIST, PathFilters.partFilter(), getConf()); ResultAnalyzer analyzer = new ResultAnalyzer(labelMap.values(), "DEFAULT"); analyzeResults(labelMap, dirIterable, analyzer);