private static double lowestPerplexity(Configuration conf, Path topicModelTemp) throws IOException { double lowest = Double.MAX_VALUE; double current; int iteration = 2; while (!Double.isNaN(current = CVB0Driver.readPerplexity(conf, topicModelTemp, iteration))) { lowest = Math.min(current, lowest); iteration++; } return lowest; }
private double calculatePerplexity(Configuration conf, Path corpusPath, Path modelPath, int iteration) throws IOException, ClassNotFoundException, InterruptedException { String jobName = "Calculating perplexity for " + modelPath; log.info("About to run: {}", jobName); Path outputPath = perplexityPath(modelPath.getParent(), iteration); Job job = prepareJob(corpusPath, outputPath, CachingCVB0PerplexityMapper.class, DoubleWritable.class, DoubleWritable.class, DualDoubleSumReducer.class, DoubleWritable.class, DoubleWritable.class); job.setJobName(jobName); job.setCombinerClass(DualDoubleSumReducer.class); job.setNumReduceTasks(1); setModelPaths(job, modelPath); HadoopUtil.delete(conf, outputPath); if (!job.waitForCompletion(true)) { throw new InterruptedException("Failed to calculate perplexity for: " + modelPath); } return readPerplexity(conf, modelPath.getParent(), iteration); }
private double calculatePerplexity(Configuration conf, Path corpusPath, Path modelPath, int iteration) throws IOException, ClassNotFoundException, InterruptedException { String jobName = "Calculating perplexity for " + modelPath; log.info("About to run: {}", jobName); Path outputPath = perplexityPath(modelPath.getParent(), iteration); Job job = prepareJob(corpusPath, outputPath, CachingCVB0PerplexityMapper.class, DoubleWritable.class, DoubleWritable.class, DualDoubleSumReducer.class, DoubleWritable.class, DoubleWritable.class); job.setJobName(jobName); job.setCombinerClass(DualDoubleSumReducer.class); job.setNumReduceTasks(1); setModelPaths(job, modelPath); HadoopUtil.delete(conf, outputPath); if (!job.waitForCompletion(true)) { throw new InterruptedException("Failed to calculate perplexity for: " + modelPath); } return readPerplexity(conf, modelPath.getParent(), iteration); }
private double calculatePerplexity(Configuration conf, Path corpusPath, Path modelPath, int iteration) throws IOException, ClassNotFoundException, InterruptedException { String jobName = "Calculating perplexity for " + modelPath; log.info("About to run: {}", jobName); Path outputPath = perplexityPath(modelPath.getParent(), iteration); Job job = prepareJob(corpusPath, outputPath, CachingCVB0PerplexityMapper.class, DoubleWritable.class, DoubleWritable.class, DualDoubleSumReducer.class, DoubleWritable.class, DoubleWritable.class); job.setJobName(jobName); job.setCombinerClass(DualDoubleSumReducer.class); job.setNumReduceTasks(1); setModelPaths(job, modelPath); HadoopUtil.delete(conf, outputPath); if (!job.waitForCompletion(true)) { throw new InterruptedException("Failed to calculate perplexity for: " + modelPath); } return readPerplexity(conf, modelPath.getParent(), iteration); }
double perplexity = readPerplexity(conf, topicModelStateTempPath, i); if (Double.isNaN(perplexity)) { if (!(backfillPerplexity && i % iterationBlockSize == 0)) {
double perplexity = readPerplexity(conf, topicModelStateTempPath, i); if (Double.isNaN(perplexity)) { if (!(backfillPerplexity && i % iterationBlockSize == 0)) {
double perplexity = readPerplexity(conf, topicModelStateTempPath, i); if (Double.isNaN(perplexity)) { if (!(backfillPerplexity && i % iterationBlockSize == 0)) {