public void annotate(Annotation annotation) { // turn the annotation into a sentence if (annotation.has(CoreAnnotations.SentencesAnnotation.class)) { for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); ArrayList<TaggedWord> tagged = null; tagged = pos.apply(tokens); for (int i = 0; i < tokens.size(); ++i) { tokens.get(i).set(PartOfSpeechAnnotation.class, tagged.get(i).tag()); } } } else { throw new RuntimeException("unable to find words/tokens in: " + annotation); } }
public void annotate(Annotation annotation) { if (annotation.has(TokensAnnotation.class)) { List<CoreLabel> tokens = annotation.get(TokensAnnotation.class); List<CoreLabel> dateTokens = new ArrayList<CoreLabel>(); List<CoreLabel> newTokens = process(tokens, dateTokens); // We assume that if someone is using this annotator, they don't // want the old tokens any more and get rid of them annotation.set(TokensAnnotation.class, newTokens); // if the doc date was found, save it. it is used by SUTime (inside the "ner" annotator) if(dateTokens.size() > 0){ StringBuffer os = new StringBuffer(); boolean first = true; for (CoreLabel t : dateTokens) { if (!first) os.append(" "); os.append(t.word()); first = false; } //System.err.println("DOC DATE IS: " + os.toString()); annotation.set(DocDateAnnotation.class, os.toString()); } } }
@Override public void annotate(Annotation annotation) { if (annotation.has(CoreAnnotations.TokensAnnotation.class)) { List<CoreLabel> tokens = annotation.get(CoreAnnotations.TokensAnnotation.class); List<CoreLabel> newTokens = process(annotation, tokens); // We assume that if someone is using this annotator, they don't // want the old tokens any more and get rid of them annotation.set(CoreAnnotations.TokensAnnotation.class, newTokens); } }
Map<String, GeocodResult> cache = new HashMap<>(); if (annotation.has(CoreAnnotations.SentencesAnnotation.class)) { for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
@Override public void annotate(Annotation annotation) { if (annotation.has(CoreAnnotations.SentencesAnnotation.class)) { for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
@Override public void annotate(Annotation annotation) { if (annotation.has(CoreAnnotations.SentencesAnnotation.class)) { int sentOffset = 0; for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { SemanticGraph dependencies = sentence.get( SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class); DepParseInfo info = new DepParseInfo(dependencies); List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); if (dependencies != null) { for (int i = 0; i < tokens.size(); i++) { CoreLabel token = tokens.get(i); int j = i + sentOffset; String label = info.getDepLabels().get(j + 1); int head = info.getDepParents().get(j + 1) - 1 - sentOffset; if (head < -1) { head = -1; } token.set(CoreAnnotations.CoNLLDepTypeAnnotation.class, label); token.set(CoreAnnotations.CoNLLDepParentIndexAnnotation.class, head); } } sentOffset += tokens.size(); } } else { throw new RuntimeException("unable to find words/tokens in: " + annotation); } }
if (annotation.has(CoreAnnotations.TextAnnotation.class)) { String text = annotation.get(CoreAnnotations.TextAnnotation.class); Reader r = new StringReader(text); // don't wrap in BufferedReader. It gives you nothing for in memory String unless you need the readLine() method!
/** * Does the actual work of splitting TextAnnotation into CoreLabels, * which are then attached to the TokensAnnotation. */ @Override public void annotate(Annotation annotation) { Timing timer = null; if (VERBOSE) { timer = new Timing(); timer.start(); System.err.print("Tokenizing ... "); } if (annotation.has(CoreAnnotations.TextAnnotation.class)) { String text = annotation.get(CoreAnnotations.TextAnnotation.class); Reader r = new StringReader(text); // don't wrap in BufferedReader. It gives you nothing for in memory String unless you need the readLine() method! List<CoreLabel> tokens = getTokenizer(r).tokenize(); // cdm 2010-05-15: This is now unnecessary, as it is done in CoreLabelTokenFactory // for (CoreLabel token: tokens) { // token.set(CoreAnnotations.TextAnnotation.class, token.get(CoreAnnotations.TextAnnotation.class)); // } annotation.set(CoreAnnotations.TokensAnnotation.class, tokens); if (VERBOSE) { timer.stop("done."); System.err.println("output: " + annotation.get(CoreAnnotations.TokensAnnotation.class)); System.err.println(); } } else { throw new RuntimeException("unable to find text in annotation: " + annotation); } }
public void annotate(Annotation annotation) { if (VERBOSE) { System.err.print("Finding lemmas ..."); } Morphology morphology = new Morphology(); if (annotation.has(CoreAnnotations.SentencesAnnotation.class)) { for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); //System.err.println("Lemmatizing sentence: " + tokens); for (CoreLabel token : tokens) { String text = token.get(CoreAnnotations.TextAnnotation.class); String posTag = token.get(CoreAnnotations.PartOfSpeechAnnotation.class); addLemma(morphology, CoreAnnotations.LemmaAnnotation.class, token, text, posTag); } } } else { throw new RuntimeException("Unable to find words/tokens in: " + annotation); } }
public void annotate(Annotation annotation) { if (VERBOSE) { System.err.print("Finding lemmas ..."); } Morphology morphology = new Morphology(); if (annotation.has(CoreAnnotations.SentencesAnnotation.class)) { for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); //System.err.println("Lemmatizing sentence: " + tokens); for (CoreLabel token : tokens) { String text = token.get(CoreAnnotations.TextAnnotation.class); String posTag = token.get(PartOfSpeechAnnotation.class); this.addLemma(morphology, LemmaAnnotation.class, token, text, posTag); } } } else { throw new RuntimeException("Unable to find words/tokens in: " + annotation); } }
System.err.print("PTB tokenizing..."); if (annotation.has(CoreAnnotations.TokensAnnotation.class)) {
@Override public void annotate(Annotation annotation) { if (annotation.has(CoreAnnotations.SentencesAnnotation.class)) {
System.err.print("Sentence splitting ..."); if ( ! annotation.has(CoreAnnotations.TokensAnnotation.class)) { throw new IllegalArgumentException("WordsToSentencesAnnotator: unable to find words/tokens in: " + annotation);
@Override public void annotate(Annotation annotation) { // turn the annotation into a sentence if (annotation.has(CoreAnnotations.SentencesAnnotation.class)) { if (nThreads == 1) { for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { doOneSentence(sentence); } } else { MulticoreWrapper<CoreMap, CoreMap> wrapper = new MulticoreWrapper<CoreMap, CoreMap>(nThreads, new POSTaggerProcessor()); for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { wrapper.put(sentence); while (wrapper.peek()) { wrapper.poll(); } } wrapper.join(); while (wrapper.peek()) { wrapper.poll(); } } } else { throw new RuntimeException("unable to find words/tokens in: " + annotation); } }