/** * The default way to create a {@link TextAnnotation} from pre-tokenized text. * * @param tokenizedSentences A list of sentences, each one being a list of tokens * @return A {@link TextAnnotation} containing the SENTENCE and TOKENS views. */ public static TextAnnotation createTextAnnotationFromTokens(String corpusId, String textId, List<String[]> tokenizedSentences) { Tokenization tokenization = tokenizeTextSpan(tokenizedSentences); StringBuilder text = new StringBuilder(); for (String[] sentenceTokens : tokenizedSentences) text.append(StringUtils.join(sentenceTokens, ' ')) .append(System.lineSeparator()); return new TextAnnotation(corpusId, textId, text.toString(), tokenization.getCharacterOffsets(), tokenization.getTokens(), tokenization.getSentenceEndTokenIndexes()); }
/** * The default way to create a {@link TextAnnotation} from pre-tokenized text. * * @param tokenizedSentences A list of sentences, each one being a list of tokens * @return A {@link TextAnnotation} containing the SENTENCE and TOKENS views. */ public static TextAnnotation createTextAnnotationFromTokens(String corpusId, String textId, List<String[]> tokenizedSentences) { Tokenization tokenization = tokenizeTextSpan(tokenizedSentences); StringBuilder text = new StringBuilder(); for (String[] sentenceTokens : tokenizedSentences) text.append(StringUtils.join(sentenceTokens, ' ')) .append(System.lineSeparator()); return new TextAnnotation(corpusId, textId, text.toString(), tokenization.getCharacterOffsets(), tokenization.getTokens(), tokenization.getSentenceEndTokenIndexes()); }
TextAnnotation ta = new TextAnnotation("", "", text, offs, surfs, ends); return ta;
TextAnnotation ta = new TextAnnotation("", "", text, offs, surfs, ends); return ta;
@Override public TextAnnotation createTextAnnotation(String corpusId, String textId, String text, Tokenizer.Tokenization tokenization) throws IllegalArgumentException { return new TextAnnotation(corpusId, textId, text, tokenization.getCharacterOffsets(), tokenization.getTokens(), tokenization.getSentenceEndTokenIndexes()); }
@Override public TextAnnotation createTextAnnotation(String corpusId, String textId, String text, Tokenization tokenization) throws IllegalArgumentException { return new TextAnnotation(corpusId, textId, text, tokenization.getCharacterOffsets(), tokenization.getTokens(), tokenization.getSentenceEndTokenIndexes()); }
@Override public TextAnnotation createTextAnnotation(String corpusId, String textId, String text, Tokenizer.Tokenization tokenization) throws IllegalArgumentException { return new TextAnnotation(corpusId, textId, text, tokenization.getCharacterOffsets(), tokenization.getTokens(), tokenization.getSentenceEndTokenIndexes()); }
@Override public TextAnnotation createTextAnnotation(String corpusId, String textId, String text, Tokenization tokenization) throws IllegalArgumentException { return new TextAnnotation(corpusId, textId, text, tokenization.getCharacterOffsets(), tokenization.getTokens(), tokenization.getSentenceEndTokenIndexes()); }
public TextAnnotation getTextAnnotation(String text){ Annotation document = new Annotation(text); pipeline.annotate(document); List<CoreLabel> tokens = new ArrayList<>(); List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class); int[] sen_ends = new int[sentences.size()]; int sen_idx = 0; for (CoreMap sentence : sentences) { for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) { tokens.add(token); } sen_ends[sen_idx++] = tokens.size(); } String[] surfaces = new String[tokens.size()]; IntPair[] tokenCharOffsets = new IntPair[tokens.size()]; for(int i = 0; i < tokens.size(); i++){ surfaces[i] = tokens.get(i).originalText(); tokenCharOffsets[i] = new IntPair(tokens.get(i).beginPosition(), tokens.get(i).endPosition()); // System.out.println(surfaces[i]); // System.out.println(tokenCharOffsets[i]); } // System.out.println(sen_ends[0]); TextAnnotation ta = new TextAnnotation("", "", text, tokenCharOffsets, surfaces, sen_ends); return ta; }
public TextAnnotation getTextAnnotation(String text){ Annotation document = new Annotation(text); pipeline.annotate(document); List<CoreLabel> tokens = new ArrayList<>(); List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class); int[] sen_ends = new int[sentences.size()]; int sen_idx = 0; for (CoreMap sentence : sentences) { for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) { tokens.add(token); } sen_ends[sen_idx++] = tokens.size(); } String[] surfaces = new String[tokens.size()]; IntPair[] tokenCharOffsets = new IntPair[tokens.size()]; for(int i = 0; i < tokens.size(); i++){ surfaces[i] = tokens.get(i).originalText(); tokenCharOffsets[i] = new IntPair(tokens.get(i).beginPosition(), tokens.get(i).endPosition()); // System.out.println(surfaces[i]); // System.out.println(tokenCharOffsets[i]); } // System.out.println(sen_ends[0]); TextAnnotation ta = new TextAnnotation("", "", text, tokenCharOffsets, surfaces, sen_ends); return ta; }
assert tokensPairs.size() == tokenSize; String text = ta.getText().substring(tokensPairs.get(0).getFirst() + firstCharOffset, tokensPairs.get(tokensPairs.size()-1).getSecond() + firstCharOffset); TextAnnotation newTA = new TextAnnotation(ta.corpusId, ta.id, text, tokensPairs.toArray(new IntPair[tokenSize]), tokens.toArray(new String[tokenSize]), new int[]{tokenSize});
assert tokensPairs.size() == tokenSize; String text = ta.getText().substring(tokensPairs.get(0).getFirst() + firstCharOffset, tokensPairs.get(tokensPairs.size()-1).getSecond() + firstCharOffset); TextAnnotation newTA = new TextAnnotation(ta.corpusId, ta.id, text, tokensPairs.toArray(new IntPair[tokenSize]), tokens.toArray(new String[tokenSize]), new int[]{tokenSize});
new TextAnnotation(corpusId, id, text, offsets, tokens, sentences.getSecond());
new TextAnnotation(corpusId, id, text, offsets, tokens, sentences.getSecond());
public static TextAnnotation readTextAnnotation(TextAnnotationProto taImpl) throws Exception { String corpusId = taImpl.getCorpusId(); String id = taImpl.getId(); String text = taImpl.getText(); String[] tokens = taImpl.getTokensList().toArray(new String[0]); Pair<Pair<String, Double>, int[]> sentences = readSentences(taImpl.getSentences()); IntPair[] offsets = TokenUtils.getTokenOffsets(text, tokens); TextAnnotation ta = new TextAnnotation(corpusId, id, text, offsets, tokens, sentences.getSecond()); for (ViewProto view : taImpl.getViewsList()) { String viewName = view.getViewName(); List<View> topKViews = new ArrayList<>(); for (ViewDataProto viewData : view.getViewDataList()) { topKViews.add(readViewData(viewData, ta)); } if (viewName.equals(ViewNames.SENTENCE)) ta.removeView(viewName); ta.addTopKView(viewName, topKViews); if (viewName.equals(ViewNames.SENTENCE)) ta.setSentences(); } for (Map.Entry<String, String> entry: taImpl.getPropertiesMap().entrySet()) { ta.addAttribute(entry.getKey(), entry.getValue()); } return ta; }
public static TextAnnotation readTextAnnotation(TextAnnotationProto taImpl) throws Exception { String corpusId = taImpl.getCorpusId(); String id = taImpl.getId(); String text = taImpl.getText(); String[] tokens = taImpl.getTokensList().toArray(new String[0]); Pair<Pair<String, Double>, int[]> sentences = readSentences(taImpl.getSentences()); IntPair[] offsets = TokenUtils.getTokenOffsets(text, tokens); TextAnnotation ta = new TextAnnotation(corpusId, id, text, offsets, tokens, sentences.getSecond()); for (ViewProto view : taImpl.getViewsList()) { String viewName = view.getViewName(); List<View> topKViews = new ArrayList<>(); for (ViewDataProto viewData : view.getViewDataList()) { topKViews.add(readViewData(viewData, ta)); } if (viewName.equals(ViewNames.SENTENCE)) ta.removeView(viewName); ta.addTopKView(viewName, topKViews); if (viewName.equals(ViewNames.SENTENCE)) ta.setSentences(); } for (Map.Entry<String, String> entry: taImpl.getPropertiesMap().entrySet()) { ta.addAttribute(entry.getKey(), entry.getValue()); } return ta; }
new TextAnnotation(corpusId, textId, text, offsets, tokens, sentenceEndPositions);
new TextAnnotation(corpusId, textId, text, offsets, tokens, sentenceEndPositions);
throws IllegalArgumentException { Tokenizer.Tokenization tokenization = tokenizer.tokenizeTextSpan(text); TextAnnotation ta = new TextAnnotation(corpusId, textId, text, tokenization.getCharacterOffsets(), tokenization.getTokens(), tokenization.getSentenceEndTokenIndexes()); SpanLabelView view =
throws IllegalArgumentException { Tokenizer.Tokenization tokenization = tokenizer.tokenizeTextSpan(text); TextAnnotation ta = new TextAnnotation(corpusId, textId, text, tokenization.getCharacterOffsets(), tokenization.getTokens(), tokenization.getSentenceEndTokenIndexes()); SpanLabelView view =