org.apache.mahout.clustering.lda.cvb.CVB0Driver.prepareJob java code examples

private Job writeTopicModel(Configuration conf, Path modelInput, Path output)
 throws IOException, InterruptedException, ClassNotFoundException {
 String jobName = String.format("Writing final topic/term distributions from %s to %s", modelInput, output);
 log.info("About to run: {}", jobName);
 Job job = prepareJob(modelInput, output, SequenceFileInputFormat.class, CVB0TopicTermVectorNormalizerMapper.class,
   IntWritable.class, VectorWritable.class, SequenceFileOutputFormat.class, jobName);
 job.submit();
 return job;
}

private Job writeTopicModel(Configuration conf, Path modelInput, Path output)
 throws IOException, InterruptedException, ClassNotFoundException {
 String jobName = String.format("Writing final topic/term distributions from %s to %s", modelInput, output);
 log.info("About to run: {}", jobName);
 Job job = prepareJob(modelInput, output, SequenceFileInputFormat.class, CVB0TopicTermVectorNormalizerMapper.class,
   IntWritable.class, VectorWritable.class, SequenceFileOutputFormat.class, jobName);
 job.submit();
 return job;
}

private Job writeTopicModel(Configuration conf, Path modelInput, Path output)
 throws IOException, InterruptedException, ClassNotFoundException {
 String jobName = String.format("Writing final topic/term distributions from %s to %s", modelInput, output);
 log.info("About to run: {}", jobName);
 Job job = prepareJob(modelInput, output, SequenceFileInputFormat.class, CVB0TopicTermVectorNormalizerMapper.class,
   IntWritable.class, VectorWritable.class, SequenceFileOutputFormat.class, jobName);
 job.submit();
 return job;
}

private Job writeDocTopicInference(Configuration conf, Path corpus, Path modelInput, Path output)
 throws IOException, ClassNotFoundException, InterruptedException {
 String jobName = String.format("Writing final document/topic inference from %s to %s", corpus, output);
 log.info("About to run: {}", jobName);
 Job job = prepareJob(corpus, output, SequenceFileInputFormat.class, CVB0DocInferenceMapper.class,
   IntWritable.class, VectorWritable.class, SequenceFileOutputFormat.class, jobName);
 FileSystem fs = FileSystem.get(corpus.toUri(), conf);
 if (modelInput != null && fs.exists(modelInput)) {
  FileStatus[] statuses = fs.listStatus(modelInput, PathFilters.partFilter());
  URI[] modelUris = new URI[statuses.length];
  for (int i = 0; i < statuses.length; i++) {
   modelUris[i] = statuses[i].getPath().toUri();
  }
  DistributedCache.setCacheFiles(modelUris, conf);
  setModelPaths(job, modelInput);
 }
 job.submit();
 return job;
}

public void runIteration(Configuration conf, Path corpusInput, Path modelInput, Path modelOutput,
             int iterationNumber, int maxIterations, int numReduceTasks)
 throws IOException, ClassNotFoundException, InterruptedException {
 String jobName = String.format("Iteration %d of %d, input path: %s",
   iterationNumber, maxIterations, modelInput);
 log.info("About to run: {}", jobName);
 Job job = prepareJob(corpusInput, modelOutput, CachingCVB0Mapper.class, IntWritable.class, VectorWritable.class,
   VectorSumReducer.class, IntWritable.class, VectorWritable.class);
 job.setCombinerClass(VectorSumReducer.class);
 job.setNumReduceTasks(numReduceTasks);
 job.setJobName(jobName);
 setModelPaths(job, modelInput);
 HadoopUtil.delete(conf, modelOutput);
 if (!job.waitForCompletion(true)) {
  throw new InterruptedException(String.format("Failed to complete iteration %d stage 1",
    iterationNumber));
 }
}

private Job writeDocTopicInference(Configuration conf, Path corpus, Path modelInput, Path output)
 throws IOException, ClassNotFoundException, InterruptedException {
 String jobName = String.format("Writing final document/topic inference from %s to %s", corpus, output);
 log.info("About to run: {}", jobName);
 Job job = prepareJob(corpus, output, SequenceFileInputFormat.class, CVB0DocInferenceMapper.class,
   IntWritable.class, VectorWritable.class, SequenceFileOutputFormat.class, jobName);
 FileSystem fs = FileSystem.get(corpus.toUri(), conf);
 if (modelInput != null && fs.exists(modelInput)) {
  FileStatus[] statuses = fs.listStatus(modelInput, PathFilters.partFilter());
  URI[] modelUris = new URI[statuses.length];
  for (int i = 0; i < statuses.length; i++) {
   modelUris[i] = statuses[i].getPath().toUri();
  }
  DistributedCache.setCacheFiles(modelUris, conf);
  setModelPaths(job, modelInput);
 }
 job.submit();
 return job;
}

public void runIteration(Configuration conf, Path corpusInput, Path modelInput, Path modelOutput,
             int iterationNumber, int maxIterations, int numReduceTasks)
 throws IOException, ClassNotFoundException, InterruptedException {
 String jobName = String.format("Iteration %d of %d, input path: %s",
   iterationNumber, maxIterations, modelInput);
 log.info("About to run: {}", jobName);
 Job job = prepareJob(corpusInput, modelOutput, CachingCVB0Mapper.class, IntWritable.class, VectorWritable.class,
   VectorSumReducer.class, IntWritable.class, VectorWritable.class);
 job.setCombinerClass(VectorSumReducer.class);
 job.setNumReduceTasks(numReduceTasks);
 job.setJobName(jobName);
 setModelPaths(job, modelInput);
 HadoopUtil.delete(conf, modelOutput);
 if (!job.waitForCompletion(true)) {
  throw new InterruptedException(String.format("Failed to complete iteration %d stage 1",
    iterationNumber));
 }
}

private Job writeDocTopicInference(Configuration conf, Path corpus, Path modelInput, Path output)
 throws IOException, ClassNotFoundException, InterruptedException {
 String jobName = String.format("Writing final document/topic inference from %s to %s", corpus, output);
 log.info("About to run: {}", jobName);
 Job job = prepareJob(corpus, output, SequenceFileInputFormat.class, CVB0DocInferenceMapper.class,
   IntWritable.class, VectorWritable.class, SequenceFileOutputFormat.class, jobName);
 FileSystem fs = FileSystem.get(corpus.toUri(), conf);
 if (modelInput != null && fs.exists(modelInput)) {
  FileStatus[] statuses = fs.listStatus(modelInput, PathFilters.partFilter());
  URI[] modelUris = new URI[statuses.length];
  for (int i = 0; i < statuses.length; i++) {
   modelUris[i] = statuses[i].getPath().toUri();
  }
  DistributedCache.setCacheFiles(modelUris, conf);
  setModelPaths(job, modelInput);
 }
 job.submit();
 return job;
}

public void runIteration(Configuration conf, Path corpusInput, Path modelInput, Path modelOutput,
             int iterationNumber, int maxIterations, int numReduceTasks)
 throws IOException, ClassNotFoundException, InterruptedException {
 String jobName = String.format("Iteration %d of %d, input path: %s",
   iterationNumber, maxIterations, modelInput);
 log.info("About to run: {}", jobName);
 Job job = prepareJob(corpusInput, modelOutput, CachingCVB0Mapper.class, IntWritable.class, VectorWritable.class,
   VectorSumReducer.class, IntWritable.class, VectorWritable.class);
 job.setCombinerClass(VectorSumReducer.class);
 job.setNumReduceTasks(numReduceTasks);
 job.setJobName(jobName);
 setModelPaths(job, modelInput);
 HadoopUtil.delete(conf, modelOutput);
 if (!job.waitForCompletion(true)) {
  throw new InterruptedException(String.format("Failed to complete iteration %d stage 1",
    iterationNumber));
 }
}

private double calculatePerplexity(Configuration conf, Path corpusPath, Path modelPath, int iteration)
 throws IOException, ClassNotFoundException, InterruptedException {
 String jobName = "Calculating perplexity for " + modelPath;
 log.info("About to run: {}", jobName);
 Path outputPath = perplexityPath(modelPath.getParent(), iteration);
 Job job = prepareJob(corpusPath, outputPath, CachingCVB0PerplexityMapper.class, DoubleWritable.class,
   DoubleWritable.class, DualDoubleSumReducer.class, DoubleWritable.class, DoubleWritable.class);
 job.setJobName(jobName);
 job.setCombinerClass(DualDoubleSumReducer.class);
 job.setNumReduceTasks(1);
 setModelPaths(job, modelPath);
 HadoopUtil.delete(conf, outputPath);
 if (!job.waitForCompletion(true)) {
  throw new InterruptedException("Failed to calculate perplexity for: " + modelPath);
 }
 return readPerplexity(conf, modelPath.getParent(), iteration);
}

private double calculatePerplexity(Configuration conf, Path corpusPath, Path modelPath, int iteration)
 throws IOException, ClassNotFoundException, InterruptedException {
 String jobName = "Calculating perplexity for " + modelPath;
 log.info("About to run: {}", jobName);
 Path outputPath = perplexityPath(modelPath.getParent(), iteration);
 Job job = prepareJob(corpusPath, outputPath, CachingCVB0PerplexityMapper.class, DoubleWritable.class,
   DoubleWritable.class, DualDoubleSumReducer.class, DoubleWritable.class, DoubleWritable.class);
 job.setJobName(jobName);
 job.setCombinerClass(DualDoubleSumReducer.class);
 job.setNumReduceTasks(1);
 setModelPaths(job, modelPath);
 HadoopUtil.delete(conf, outputPath);
 if (!job.waitForCompletion(true)) {
  throw new InterruptedException("Failed to calculate perplexity for: " + modelPath);
 }
 return readPerplexity(conf, modelPath.getParent(), iteration);
}

private double calculatePerplexity(Configuration conf, Path corpusPath, Path modelPath, int iteration)
 throws IOException, ClassNotFoundException, InterruptedException {
 String jobName = "Calculating perplexity for " + modelPath;
 log.info("About to run: {}", jobName);
 Path outputPath = perplexityPath(modelPath.getParent(), iteration);
 Job job = prepareJob(corpusPath, outputPath, CachingCVB0PerplexityMapper.class, DoubleWritable.class,
   DoubleWritable.class, DualDoubleSumReducer.class, DoubleWritable.class, DoubleWritable.class);
 job.setJobName(jobName);
 job.setCombinerClass(DualDoubleSumReducer.class);
 job.setNumReduceTasks(1);
 setModelPaths(job, modelPath);
 HadoopUtil.delete(conf, outputPath);
 if (!job.waitForCompletion(true)) {
  throw new InterruptedException("Failed to calculate perplexity for: " + modelPath);
 }
 return readPerplexity(conf, modelPath.getParent(), iteration);
}

Popular methods of CVB0Driver

Popular in Java

Running tasks concurrently on multiple threads
runOnUiThread (Activity)
startActivity (Activity)
getSupportFragmentManager (FragmentActivity)
HttpServer (com.sun.net.httpserver)
This class implements a simple HTTP server. A HttpServer is bound to an IP address and port number a
MalformedURLException (java.net)
This exception is thrown when a program attempts to create an URL from an incorrect specification.
SecureRandom (java.security)
This class generates cryptographically secure pseudo-random numbers. It is best to invoke SecureRand
LinkedList (java.util)
Doubly-linked list implementation of the List and Dequeinterfaces. Implements all optional list oper
FileUtils (org.apache.commons.io)
General file manipulation utilities. Facilities are provided in the following areas: * writing to a
Get (org.apache.hadoop.hbase.client)
Used to perform Get operations on a single row. To get everything for a row, instantiate a Get objec
Top Vim plugins

How to use prepareJobmethodin org.apache.mahout.clustering.lda.cvb.CVB0Driver

Best Java code snippets using org.apache.mahout.clustering.lda.cvb.CVB0Driver.prepareJob (Showing top 12 results out of 315)

How to use
prepareJob
method
in
org.apache.mahout.clustering.lda.cvb.CVB0Driver