List<IN> output = baseClassifiers.get(0).classifySentence(tokens); output = baseClassifiers.get(i).classifySentence(tokens); baseOutputs.add(output);
throw new RuntimeException(e); String labeledText = classifier.classifyWithInlineXML(text); taggedContents = labeledText; untaggedContents = text; Set<String> tags = classifier.labels(); String background = classifier.backgroundSymbol(); String tagPattern = ""; for (String tag : tags) { } else { untaggedContents = editorPane.getText(); taggedContents = classifier.classifyWithInlineXML(untaggedContents); Set<String> tags = classifier.labels(); String background = classifier.backgroundSymbol(); String tagPattern = ""; for (String tag : tags) {
List<List<CoreLabel>> out = classifier.classify(fileContents); for (List<CoreLabel> sentence : out) { for (CoreLabel word : sentence) { out = classifier.classifyFile(args[1]); for (List<CoreLabel> sentence : out) { for (CoreLabel word : sentence) { List<Triple<String, Integer, Integer>> list = classifier.classifyToCharacterOffsets(fileContents); for (Triple<String, Integer, Integer> item : list) { System.out.println(item.first() + ": " + fileContents.substring(item.second(), item.third())); DocumentReaderAndWriter<CoreLabel> readerAndWriter = classifier.makePlainTextReaderAndWriter(); classifier.classifyAndWriteAnswersKBest(args[1], 10, readerAndWriter); classifier.printProbs(args[1], readerAndWriter); "I go to school at Stanford University, which is located in California." }; for (String str : example) { System.out.println(classifier.classifyToString(str)); System.out.print(classifier.classifyToString(str, "slashTags", false)); System.out.print(classifier.classifyToString(str, "tabbedEntities", false)); System.out.println(classifier.classifyWithInlineXML(str)); System.out.println(classifier.classifyToString(str, "xml", true));
public void classifyFilesAndWriteAnswers(Collection<File> testFiles, DocumentReaderAndWriter<IN> readerWriter, boolean outputScores) throws IOException { ObjectBank<List<IN>> documents = makeObjectBankFromFiles(testFiles, readerWriter); classifyAndWriteAnswers(documents, readerWriter, outputScores); }
/** * Classify a List of IN using whatever additional information is passed in globalInfo. * Used by SUTime (NumberSequenceClassifier), which requires the doc date to resolve relative dates. * * @param tokenSequence The List of IN to be classified. * @return The classified List of IN, where the classifier output for * each token is stored in its "answer" field. */ public List<IN> classifySentenceWithGlobalInformation(List<? extends HasWord> tokenSequence, final CoreMap doc, final CoreMap sentence) { List<IN> document = preprocessTokens(tokenSequence); classifyWithGlobalInformation(document, doc, sentence); return document; }
/** * Load a test file, run the classifier on it, and then print the answers to * stdout (with timing to stderr). This uses the value of flags.documentReader * to determine testFile format. By default, this means that it is set up to * read a tab-separated columns test file * * @param testFile The file to test on. * @param outputScores Whether to calculate and then log performance scores (P/R/F1) * @return A Triple of P/R/F1 if outputScores is true, else null */ public Triple<Double,Double,Double> classifyAndWriteAnswers(String testFile, boolean outputScores) throws IOException { return classifyAndWriteAnswers(testFile, defaultReaderAndWriter(), outputScores); }
public List<String> segmentString(String sentence, DocumentReaderAndWriter<IN> readerAndWriter) { ObjectBank<List<IN>> docs = makeObjectBankFromString(sentence, readerAndWriter); StringWriter stringWriter = new StringWriter(); PrintWriter stringPrintWriter = new PrintWriter(stringWriter); for (List<IN> doc : docs) { classify(doc); readerAndWriter.printAnswers(doc, stringPrintWriter); stringPrintWriter.println(); } stringPrintWriter.close(); String segmented = stringWriter.toString(); return Arrays.asList(segmented.split("\\s")); }
int numDocs = 0; for (List<IN> doc : documents) { classify(doc); numWords += doc.size(); writeAnswers(doc, outStream, readerWriter); resultsCounted = (resultsCounted && countResults(doc, entityTP, entityFP, entityFN)); numDocs++; " words per second."); if (resultsCounted) { printResults(entityTP, entityFP, entityFN);
throws IOException { if (flags.exportFeatures != null) { dumpFeatures(documents); while (wrapper.peek()) { List<IN> results = wrapper.poll(); writeAnswers(results, printWriter, readerWriter); resultsCounted = resultsCounted && countResults(results, entityTP, entityFP, entityFN); writeAnswers(results, printWriter, readerWriter); resultsCounted = resultsCounted && countResults(results, entityTP, entityFP, entityFN); while (wrapper.peek()) { List<IN> results = wrapper.poll(); writeAnswers(results, printWriter, readerWriter); resultsCounted = resultsCounted && countResults(results, entityTP, entityFP, entityFN); " words per second."); if (outputScores) { return printResults(entityTP, entityFP, entityFN); } else { return null;
private void makeTagMaps() { Set<String> tags = classifier.labels(); String backgroundSymbol = classifier.backgroundSymbol(); tagToColorMap = makeTagToColorMap(tags, backgroundSymbol); }
public Sampler<List<IN>> getSampler(final List<IN> input) { return new Sampler<List<IN>>() { SequenceModel model = getSequenceModel(input); SequenceSampler sampler = new SequenceSampler(); @Override public List<IN> drawSample() { int[] sampleArray = sampler.bestSequence(model); List<IN> sample = new ArrayList<>(); int i = 0; for (IN word : input) { IN newWord = tokenFactory.makeToken(word); newWord.set(CoreAnnotations.AnswerAnnotation.class, classIndex.get(sampleArray[i++])); sample.add(newWord); } return sample; } }; }
/** * Load a test file, run the classifier on it, and then print the answers to * stdout (with timing to stderr). * * @param testFile The file to test on. * @param readerWriter A reader and writer to use for the output * @param outputScores Whether to calculate and then log performance scores (P/R/F1) * @return A Triple of P/R/F1 if outputScores is true, else null */ public Triple<Double,Double,Double> classifyAndWriteAnswers(String testFile, DocumentReaderAndWriter<IN> readerWriter, boolean outputScores) throws IOException { ObjectBank<List<IN>> documents = makeObjectBankFromFile(testFile, readerWriter); return classifyAndWriteAnswers(documents, readerWriter, outputScores); }
public void classifyStdin(DocumentReaderAndWriter<IN> readerWriter) throws IOException { BufferedReader is = IOUtils.readerFromStdin(flags.inputEncoding); for (String line; (line = is.readLine()) != null; ) { Collection<List<IN>> documents = makeObjectBankFromString(line, readerWriter); if (flags.keepEmptySentences && documents.isEmpty()) { documents = Collections.<List<IN>>singletonList(Collections.<IN>emptyList()); } classifyAndWriteAnswers(documents, readerWriter, false); } }
Counter<List<IN>> kBest = classifyKBest(doc, CoreAnnotations.AnswerAnnotation.class, k); numWords += doc.size(); List<List<IN>> sorted = Counters.toSortedList(kBest); printWriter.println("<sentence id=" + numSentences + " k=" + n + " logProb=" + kBest.getCount(l) + " prob=" + Math.exp(kBest.getCount(l)) + '>'); writeAnswers(l, printWriter, readerAndWriter); printWriter.println("</sentence>"); n++;
PlainTextDocumentReaderAndWriter.OutputStyle outFormat = PlainTextDocumentReaderAndWriter.OutputStyle.fromShortName(outputFormat); DocumentReaderAndWriter<IN> textDocumentReaderAndWriter = plainTextReaderAndWriter(); ObjectBank<List<IN>> documents = makeObjectBankFromString(sentences, textDocumentReaderAndWriter); List<IN> docOutput = classify(doc); if (textDocumentReaderAndWriter instanceof PlainTextDocumentReaderAndWriter) {
/** * Classify the contents of a {@link String}. Plain text or XML is expected * and the {@link PlainTextDocumentReaderAndWriter} is used by default. * The classifier will treat each sentence as a separate document. The output can be * specified to be in a choice of formats: Output is in inline XML format * (e.g., <PERSON>Bill Smith</PERSON> went to * <LOCATION>Paris</LOCATION> .) * * @param sentences The string to be classified * @return A {@link String} with annotated with classification information. */ public String classifyWithInlineXML(String sentences) { return classifyToString(sentences, "inlineXML", true); }
Timing timer = new Timing(); ObjectBank<List<IN>> documents = makeObjectBankFromFile(testFile, readerAndWriter); int numWords = 0; int numSentences = 0; Counter<List<IN>> kBest = classifyKBest(doc, AnswerAnnotation.class, k); numWords += doc.size(); List<List<IN>> sorted = Counters.toSortedList(kBest); System.out.println("<sentence id=" + numSentences + " k=" + n + " logProb=" + kBest.getCount(l) + " prob=" + Math.exp(kBest.getCount(l)) + '>'); writeAnswers(l, System.out, readerAndWriter); System.out.println("</sentence>"); n++;
while (wrapper.peek()) { List<IN> results = wrapper.poll(); writeAnswers(results, printWriter, readerWriter); resultsCounted = resultsCounted && countResults(results, entityTP, entityFP, entityFN); writeAnswers(results, printWriter, readerWriter); resultsCounted = resultsCounted && countResults(results, entityTP, entityFP, entityFN); while (wrapper.peek()) { List<IN> results = wrapper.poll(); writeAnswers(results, printWriter, readerWriter); resultsCounted = resultsCounted && countResults(results, entityTP, entityFP, entityFN); + nf.format(wordspersec) + " words per second."); if (resultsCounted) { printResults(entityTP, entityFP, entityFN);
/** * Classify the contents of a file. * * @param filename * Contains the sentence(s) to be classified. * @return {@link List} of classified List of IN. */ public List<List<IN>> classifyFile(String filename) { ObjectBank<List<IN>> documents = makeObjectBankFromFile(filename, plainTextReaderAndWriter); List<List<IN>> result = new ArrayList<List<IN>>(); for (List<IN> document : documents) { // System.err.println(document); classify(document); List<IN> sentence = new ArrayList<IN>(); for (IN wi : document) { sentence.add(wi); // System.err.println(wi); } result.add(sentence); } return result; }
wi.set(AnswerAnnotation.class, backgroundSymbol()); document.add(wi); i++; wrapper.processDocument(document); classifyWithGlobalInformation(document, doc, sentence);