public static void main (String[] args) throws IOException { InstanceList training = InstanceList.load (new File(args[0])); int numTopics = args.length > 1 ? Integer.parseInt(args[1]) : 200; NPTopicModel lda = new NPTopicModel (5.0, 10.0, 0.1); lda.addInstances(training, numTopics); lda.sample(1000); }
public void printState (File f) throws IOException { PrintStream out = new PrintStream(new GZIPOutputStream(new BufferedOutputStream(new FileOutputStream(f)))); printState(out); out.close(); }
public void sample (int iterations) throws IOException { for (int iteration = 1; iteration <= iterations; iteration++) { long iterationStart = System.currentTimeMillis(); // Loop over every document in the corpus for (int doc = 0; doc < data.size(); doc++) { FeatureSequence tokenSequence = (FeatureSequence) data.get(doc).instance.getData(); LabelSequence topicSequence = (LabelSequence) data.get(doc).topicSequence; sampleTopicsForOneDoc (tokenSequence, topicSequence); } long elapsedMillis = System.currentTimeMillis() - iterationStart; logger.info(iteration + "\t" + elapsedMillis + "ms\t" + numTopics); // Occasionally print more information if (showTopicsInterval != 0 && iteration % showTopicsInterval == 0) { logger.info("<" + iteration + "> #Topics: " + numTopics + "\n" + topWords (wordsPerTopic)); } } }
public void sample (int iterations) throws IOException { for (int iteration = 1; iteration <= iterations; iteration++) { long iterationStart = System.currentTimeMillis(); // Loop over every document in the corpus for (int doc = 0; doc < data.size(); doc++) { FeatureSequence tokenSequence = (FeatureSequence) data.get(doc).instance.getData(); LabelSequence topicSequence = (LabelSequence) data.get(doc).topicSequence; sampleTopicsForOneDoc (tokenSequence, topicSequence); } long elapsedMillis = System.currentTimeMillis() - iterationStart; logger.info(iteration + "\t" + elapsedMillis + "ms\t" + numTopics); // Occasionally print more information if (showTopicsInterval != 0 && iteration % showTopicsInterval == 0) { logger.info("<" + iteration + "> #Topics: " + numTopics + "\n" + topWords (wordsPerTopic)); } } }
public static void main (String[] args) throws IOException { InstanceList training = InstanceList.load (new File(args[0])); int numTopics = args.length > 1 ? Integer.parseInt(args[1]) : 200; NPTopicModel lda = new NPTopicModel (5.0, 10.0, 0.1); lda.addInstances(training, numTopics); lda.sample(1000); }
public void sample (int iterations) throws IOException { for (int iteration = 1; iteration <= iterations; iteration++) { long iterationStart = System.currentTimeMillis(); // Loop over every document in the corpus for (int doc = 0; doc < data.size(); doc++) { FeatureSequence tokenSequence = (FeatureSequence) data.get(doc).instance.getData(); LabelSequence topicSequence = (LabelSequence) data.get(doc).topicSequence; sampleTopicsForOneDoc (tokenSequence, topicSequence); } long elapsedMillis = System.currentTimeMillis() - iterationStart; logger.info(iteration + "\t" + elapsedMillis + "ms\t" + numTopics); // Occasionally print more information if (showTopicsInterval != 0 && iteration % showTopicsInterval == 0) { logger.info("<" + iteration + "> #Topics: " + numTopics + "\n" + topWords (wordsPerTopic)); } } }
public void printState (File f) throws IOException { PrintStream out = new PrintStream(new GZIPOutputStream(new BufferedOutputStream(new FileOutputStream(f)))); printState(out); out.close(); }
public static void main (String[] args) throws IOException { InstanceList training = InstanceList.load (new File(args[0])); int numTopics = args.length > 1 ? Integer.parseInt(args[1]) : 200; NPTopicModel lda = new NPTopicModel (5.0, 10.0, 0.1); lda.addInstances(training, numTopics); lda.sample(1000); }
public void printState (File f) throws IOException { PrintStream out = new PrintStream(new GZIPOutputStream(new BufferedOutputStream(new FileOutputStream(f)))); printState(out); out.close(); }