/** * create a Lemma view in the TextAnnotation argument, and return a reference to that View. */ public View createLemmaView(TextAnnotation inputTa) throws IOException { String[] toks = inputTa.getTokens(); TokenLabelView lemmaView = new TokenLabelView(ViewNames.LEMMA, NAME, inputTa, 1.0); for (int i = 0; i < toks.length; ++i) { String lemma = getLemma(inputTa, i); Constituent lemmaConstituent = new Constituent(lemma, ViewNames.LEMMA, inputTa, i, i + 1); lemmaView.addConstituent(lemmaConstituent); } inputTa.addView(ViewNames.LEMMA, lemmaView); return lemmaView; }
/** * create a Lemma view in the TextAnnotation argument, and return a reference to that View. */ public View createLemmaView(TextAnnotation inputTa) throws IOException { String[] toks = inputTa.getTokens(); TokenLabelView lemmaView = new TokenLabelView(ViewNames.LEMMA, NAME, inputTa, 1.0); for (int i = 0; i < toks.length; ++i) { String lemma = getLemma(inputTa, i); Constituent lemmaConstituent = new Constituent(lemma, ViewNames.LEMMA, inputTa, i, i + 1); lemmaView.addConstituent(lemmaConstituent); } inputTa.addView(ViewNames.LEMMA, lemmaView); return lemmaView; }
@Override public void addView(TextAnnotation ta) throws AnnotatorException { Annotation document = new Annotation(ta.text); pipeline.annotate(document); TokenLabelView vu = new TokenLabelView(viewName, ta); for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) { for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) { String trueCase = token.get(CoreAnnotations.TrueCaseTextAnnotation.class); int beginCharOffsetS = token.beginPosition(); int endCharOffset = token.endPosition() - 1; List<Constituent> overlappingCons = ta.getView(ViewNames.TOKENS).getConstituentsOverlappingCharSpan(beginCharOffsetS, endCharOffset); int endIndex = overlappingCons.stream().max(Comparator.comparing(Constituent::getEndSpan)).get().getEndSpan(); Constituent c = new Constituent(trueCase, viewName, ta, endIndex - 1, endIndex); vu.addConstituent(c); } } ta.addView(viewName, vu); } }
@Override public void addView(TextAnnotation ta) throws AnnotatorException { Annotation document = new Annotation(ta.text); pipeline.annotate(document); TokenLabelView vu = new TokenLabelView(viewName, ta); for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) { for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) { String trueCase = token.get(CoreAnnotations.TrueCaseTextAnnotation.class); int beginCharOffsetS = token.beginPosition(); int endCharOffset = token.endPosition() - 1; List<Constituent> overlappingCons = ta.getView(ViewNames.TOKENS).getConstituentsOverlappingCharSpan(beginCharOffsetS, endCharOffset); int endIndex = overlappingCons.stream().max(Comparator.comparing(Constituent::getEndSpan)).get().getEndSpan(); Constituent c = new Constituent(trueCase, viewName, ta, endIndex - 1, endIndex); vu.addConstituent(c); } } ta.addView(viewName, vu); } }
@Override protected void addView(TextAnnotation ta) throws AnnotatorException { Annotation document = new Annotation(ta.text); pipeline.annotate(document); TokenLabelView vu = new TokenLabelView(viewName, ta); for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) { for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) { String trueCase = token.get(CoreAnnotations.TrueCaseTextAnnotation.class); int beginCharOffsetS = token.beginPosition(); int endCharOffset = token.endPosition() - 1; List<Constituent> overlappingCons = ta.getView(ViewNames.TOKENS).getConstituentsOverlappingCharSpan(beginCharOffsetS, endCharOffset); int endIndex = overlappingCons.stream().max(Comparator.comparing(Constituent::getEndSpan)).get().getEndSpan(); Constituent c = new Constituent(trueCase, viewName, ta, endIndex - 1, endIndex); vu.addConstituent(c); } } ta.addView(viewName, vu); } }
@Override public void addView(TextAnnotation ta) throws AnnotatorException { Annotation document = new Annotation(ta.text); pipeline.annotate(document); TokenLabelView vu = new TokenLabelView(viewName, ta); for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) { for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) { String trueCase = token.get(CoreAnnotations.TrueCaseTextAnnotation.class); int beginCharOffsetS = token.beginPosition(); int endCharOffset = token.endPosition() - 1; List<Constituent> overlappingCons = ta.getView(ViewNames.TOKENS).getConstituentsOverlappingCharSpan(beginCharOffsetS, endCharOffset); int endIndex = overlappingCons.stream().max(Comparator.comparing(Constituent::getEndSpan)).get().getEndSpan(); Constituent c = new Constituent(trueCase, viewName, ta, endIndex - 1, endIndex); vu.addConstituent(c); } } ta.addView(viewName, vu); } }
/** * annotates TextAnnotation with POS view and adds it to the TextAnnotation. * * @param record TextAnnotation to annotate */ @Override public void addView(TextAnnotation record) throws AnnotatorException { if (!record.hasView(tokensfield) && !record.hasView(sentencesfield)) { throw new AnnotatorException("Record must be tokenized and sentence split first"); } long startTime = System.currentTimeMillis(); List<Token> input = LBJavaUtils.recordToLBJTokens(record); List<Constituent> tokens = record.getView(ViewNames.TOKENS).getConstituents(); TokenLabelView posView = new TokenLabelView(ViewNames.POS, getAnnotatorName(), record, 1.0); int tcounter = 0; for (Token lbjtoken : input) { tagger.discreteValue(lbjtoken); Constituent token = tokens.get(tcounter); Constituent label = new Constituent(tagger.discreteValue(lbjtoken), ViewNames.POS, record, token.getStartSpan(), token.getEndSpan()); posView.addConstituent(label); tcounter++; } long endTime = System.currentTimeMillis(); logger.debug("Tagged input in {}ms", endTime - startTime); record.addView(ViewNames.POS, posView); }
/** * annotates TextAnnotation with POS view and adds it to the TextAnnotation. * * @param record TextAnnotation to annotate */ @Override public void addView(TextAnnotation record) throws AnnotatorException { if (!record.hasView(tokensfield) && !record.hasView(sentencesfield)) { throw new AnnotatorException("Record must be tokenized and sentence split first"); } long startTime = System.currentTimeMillis(); List<Token> input = LBJavaUtils.recordToLBJTokens(record); List<Constituent> tokens = record.getView(ViewNames.TOKENS).getConstituents(); TokenLabelView posView = new TokenLabelView(ViewNames.POS, getAnnotatorName(), record, 1.0); int tcounter = 0; for (Token lbjtoken : input) { tagger.discreteValue(lbjtoken); Constituent token = tokens.get(tcounter); Constituent label = new Constituent(tagger.discreteValue(lbjtoken), ViewNames.POS, record, token.getStartSpan(), token.getEndSpan()); posView.addConstituent(label); tcounter++; } long endTime = System.currentTimeMillis(); logger.debug("Tagged input in {}ms", endTime - startTime); record.addView(ViewNames.POS, posView); }
public TextAnnotation(String corpusId, String id, String text, IntPair[] characterOffsets, String[] tokens, int[] sentenceEndPositions) { super(); // if the string is non-empty, the position of the last element should equal to the number of tokens if (tokens.length > 0 && sentenceEndPositions[sentenceEndPositions.length - 1] != tokens.length) throw new IllegalArgumentException("Invalid sentence boundary. " + "Last element should be the number of tokens"); this.corpusId = corpusId; this.id = id; this.text = text; this.symtab = new SymbolTable(); this.setTokens(tokens, characterOffsets); SpanLabelView view = new SpanLabelView(ViewNames.SENTENCE, "UserSpecified", this, 1d); int start = 0; for (int s : sentenceEndPositions) { view.addSpanLabel(start, s, ViewNames.SENTENCE, 1d); start = s; } this.addView(ViewNames.SENTENCE, view); // Add a TOKENS view in order to access tokens the same way as everything else in the // sentence TokenLabelView tokenLabelView = new TokenLabelView(ViewNames.TOKENS, "UserSpecified", this, 1d); for (int i = 0; i < tokens.length; i++) { tokenLabelView.addConstituent(new Constituent("", ViewNames.TOKENS, this, i, i + 1)); } this.addView(ViewNames.TOKENS, tokenLabelView); }
public TextAnnotation(String corpusId, String id, String text, IntPair[] characterOffsets, String[] tokens, int[] sentenceEndPositions) { super(); // if the string is non-empty, the position of the last element should equal to the number of tokens if (tokens.length > 0 && sentenceEndPositions[sentenceEndPositions.length - 1] != tokens.length) throw new IllegalArgumentException("Invalid sentence boundary. " + "Last element should be the number of tokens"); this.corpusId = corpusId; this.id = id; this.text = text; this.symtab = new SymbolTable(); this.setTokens(tokens, characterOffsets); SpanLabelView view = new SpanLabelView(ViewNames.SENTENCE, "UserSpecified", this, 1d); int start = 0; for (int s : sentenceEndPositions) { view.addSpanLabel(start, s, ViewNames.SENTENCE, 1d); start = s; } this.addView(ViewNames.SENTENCE, view); // Add a TOKENS view in order to access tokens the same way as everything else in the // sentence TokenLabelView tokenLabelView = new TokenLabelView(ViewNames.TOKENS, "UserSpecified", this, 1d); for (int i = 0; i < tokens.length; i++) { tokenLabelView.addConstituent(new Constituent("", ViewNames.TOKENS, this, i, i + 1)); } this.addView(ViewNames.TOKENS, tokenLabelView); }