/** * The default way to create a {@link TextAnnotation} from pre-tokenized text. * * @param tokenizedSentences A list of sentences, each one being an array of tokens * @return A {@link TextAnnotation} containing the SENTENCE and TOKENS views. */ public static TextAnnotation createTextAnnotationFromTokens(List<String[]> tokenizedSentences) { return createTextAnnotationFromTokens("", "", tokenizedSentences); }
/** * The default way to create a {@link TextAnnotation} from pre-tokenized text. * * @param tokenizedSentences A list of sentences, each one being an array of tokens * @return A {@link TextAnnotation} containing the SENTENCE and TOKENS views. */ public static TextAnnotation createTextAnnotationFromTokens(List<String[]> tokenizedSentences) { return createTextAnnotationFromTokens("", "", tokenizedSentences); }
public static TextAnnotation createFromTokenizedString(String text) { return BasicTextAnnotationBuilder.createTextAnnotationFromTokens(Collections .singletonList(text.split(" "))); }
public static TextAnnotation createFromTokenizedString(String text) { return BasicTextAnnotationBuilder.createTextAnnotationFromTokens(Collections .singletonList(text.split(" "))); }
public TextAnnotation preProcess(List<String[]> text) throws AnnotatorException { TextAnnotation ta = BasicTextAnnotationBuilder.createTextAnnotationFromTokens(text); addViewsFromAnnotatorService(ta); return ta; }
/** * A way to create a {@link TextAnnotation} from pre-tokenized text from Python * * @param tokenizedSentences A list of sentences, each one being an list of tokens * @return A {@link TextAnnotation} containing the SENTENCE and TOKENS views. */ public static TextAnnotation createTextAnnotationFromListofListofTokens(List<List<Object>> tokenizedSentences) { // This function takes List<List<Object>> to be able to run with cogcomp-nlpy (using pyjnius) // Convert the inner lists to String arrays // Call the default TextAnnotation builder function List<String[]> tokenizedSentences_formatted = new ArrayList<String[]>(); // Converting inner list to array for (List<Object> sentence : tokenizedSentences) { String[] sentence_array = new String[sentence.size()]; int token_idx = 0; for (Object w : sentence) { sentence_array[token_idx] = (String) w; token_idx += 1; } tokenizedSentences_formatted.add(sentence_array); } return createTextAnnotationFromTokens("", "", tokenizedSentences_formatted); }
public TextAnnotation preProcess(List<String[]> text) throws AnnotatorException { TextAnnotation ta = BasicTextAnnotationBuilder.createTextAnnotationFromTokens(text); addViewsFromAnnotatorService(ta); return ta; }
/** * A way to create a {@link TextAnnotation} from pre-tokenized text from Python * * @param tokenizedSentences A list of sentences, each one being an list of tokens * @return A {@link TextAnnotation} containing the SENTENCE and TOKENS views. */ public static TextAnnotation createTextAnnotationFromListofListofTokens(List<List<Object>> tokenizedSentences) { // This function takes List<List<Object>> to be able to run with cogcomp-nlpy (using pyjnius) // Convert the inner lists to String arrays // Call the default TextAnnotation builder function List<String[]> tokenizedSentences_formatted = new ArrayList<String[]>(); // Converting inner list to array for (List<Object> sentence : tokenizedSentences) { String[] sentence_array = new String[sentence.size()]; int token_idx = 0; for (Object w : sentence) { sentence_array[token_idx] = (String) w; token_idx += 1; } tokenizedSentences_formatted.add(sentence_array); } return createTextAnnotationFromTokens("", "", tokenizedSentences_formatted); }
TextAnnotation annotate(String corpusId, String sentId, String[] tokens) throws AnnotatorException { // Ignore the root token List<String[]> words = Collections.singletonList(Arrays.copyOfRange(tokens, 1, tokens.length)); TextAnnotation ta = BasicTextAnnotationBuilder.createTextAnnotationFromTokens(corpusId, sentId, words); ta.addView(pos); ta.addView(lemma); ta.addView(chunk); return ta; }
TextAnnotation annotate(String corpusId, String sentId, String[] tokens) throws AnnotatorException { // Ignore the root token List<String[]> words = Collections.singletonList(Arrays.copyOfRange(tokens, 1, tokens.length)); TextAnnotation ta = BasicTextAnnotationBuilder.createTextAnnotationFromTokens(corpusId, sentId, words); ta.addView(pos); ta.addView(lemma); ta.addView(chunk); return ta; }
public static void main(String[] args) throws EdisonException, AnnotatorException { gazetteersInstance.ignoreGazetteer("Weapons.gz"); gazetteersInstance.ignoreGazetteer("Weapons.Missile.gz"); List<String[]> sentences = Arrays.asList("I live in Chicago , Illinois .".split("\\s+"), "I met George Bush .".split("\\s+")); TextAnnotation ta = BasicTextAnnotationBuilder.createTextAnnotationFromTokens(sentences); gazetteersInstance.addView(ta); System.out.println(ta.toString()); System.out.println(ta.getView(gazetteersInstance.getViewName()).toString()); }
/** * Create a new {@link TextAnnotation} from a single line of bracketed text * * @param line The bracketed string to be processed * @param lineId The ID of the {@link TextAnnotation} * @return A {@link TextAnnotation} with a populated {@link ViewNames#POS} view */ public TextAnnotation createTextAnnotation(String line, String lineId) { String[] wordPOSPairs = splitWordsPattern.split(line.substring(1, line.length() - 1)); List<String> words = new ArrayList<>(wordPOSPairs.length); List<String> pos = new ArrayList<>(wordPOSPairs.length); for (String wordPOSPair : wordPOSPairs) { String[] split = whitespacePattern.split(wordPOSPair); words.add(split[1]); pos.add(split[0]); } List<String[]> tokenizedSentences = Collections.singletonList(words.toArray(new String[words.size()])); TextAnnotation ta = BasicTextAnnotationBuilder.createTextAnnotationFromTokens(corpusName, lineId, tokenizedSentences); TokenLabelView posView = new TokenLabelView(ViewNames.POS, ta); for (int i = 0; i < pos.size(); i++) posView.addTokenLabel(i, pos.get(i), 1.0); ta.addView(ViewNames.POS, posView); return ta; }
/** * Create a new {@link TextAnnotation} from a single line of bracketed text * * @param line The bracketed string to be processed * @param lineId The ID of the {@link TextAnnotation} * @return A {@link TextAnnotation} with a populated {@link ViewNames#POS} view */ public TextAnnotation createTextAnnotation(String line, String lineId) { String[] wordPOSPairs = splitWordsPattern.split(line.substring(1, line.length() - 1)); List<String> words = new ArrayList<>(wordPOSPairs.length); List<String> pos = new ArrayList<>(wordPOSPairs.length); for (String wordPOSPair : wordPOSPairs) { String[] split = whitespacePattern.split(wordPOSPair); words.add(split[1]); pos.add(split[0]); } List<String[]> tokenizedSentences = Collections.singletonList(words.toArray(new String[words.size()])); TextAnnotation ta = BasicTextAnnotationBuilder.createTextAnnotationFromTokens(corpusName, lineId, tokenizedSentences); TokenLabelView posView = new TokenLabelView(ViewNames.POS, ta); for (int i = 0; i < pos.size(); i++) posView.addTokenLabel(i, pos.get(i), 1.0); ta.addView(ViewNames.POS, posView); return ta; }
public static void main(String[] args) throws EdisonException, AnnotatorException { gazetteersInstance.ignoreGazetteer("Weapons.gz"); gazetteersInstance.ignoreGazetteer("Weapons.Missile.gz"); List<String[]> sentences = Arrays.asList("I live in Chicago , Illinois .".split("\\s+"), "I met George Bush .".split("\\s+")); TextAnnotation ta = BasicTextAnnotationBuilder.createTextAnnotationFromTokens(sentences); gazetteersInstance.addView(ta); System.out.println(ta.toString()); System.out.println(ta.getView(gazetteersInstance.getViewName()).toString()); }
Collections.singletonList(tokens.toArray(new String[tokens.size()])); TextAnnotation ta = BasicTextAnnotationBuilder.createTextAnnotationFromTokens(corpusId, String.valueOf(taId), tokenizedSentence); addGoldView(ta, labels);
Collections.singletonList(tokens.toArray(new String[tokens.size()])); TextAnnotation ta = BasicTextAnnotationBuilder.createTextAnnotationFromTokens(corpusId, String.valueOf(taId), tokenizedSentence); addGoldView(ta, labels);
BasicTextAnnotationBuilder.createTextAnnotationFromTokens(PENN_TREEBANK_WSJ, id, Collections.singletonList(text));
BasicTextAnnotationBuilder.createTextAnnotationFromTokens(PENN_TREEBANK_WSJ, id, Collections.singletonList(text));
protected TextAnnotation initializeDummySentenceVerb() { List<String[]> listOfTokens = new ArrayList<>(); listOfTokens.add(new String[] {"I", "do", "."}); TextAnnotation ta = BasicTextAnnotationBuilder.createTextAnnotationFromTokens("", "", listOfTokens); TokenLabelView tlv = new TokenLabelView(ViewNames.POS, "Test", ta, 1.0); tlv.addTokenLabel(0, "PRP", 1d); tlv.addTokenLabel(1, "VBP", 1d); tlv.addTokenLabel(2, ".", 1d); ta.addView(ViewNames.POS, tlv); ta.addView(ViewNames.NER, new SpanLabelView(ViewNames.NER, "test", ta, 1d)); SpanLabelView chunks = new SpanLabelView(ViewNames.SHALLOW_PARSE, "test", ta, 1d); chunks.addSpanLabel(0, 1, "NP", 1d); chunks.addSpanLabel(1, 2, "VP", 1d); ta.addView(ViewNames.SHALLOW_PARSE, chunks); TokenLabelView view = new TokenLabelView(ViewNames.LEMMA, "test", ta, 1d); view.addTokenLabel(0, "i", 1d); view.addTokenLabel(1, "do", 1d); view.addTokenLabel(2, ".", 1d); ta.addView(ViewNames.LEMMA, view); return ta; }
protected TextAnnotation initializeDummySentenceVerb() { List<String[]> listOfTokens = new ArrayList<>(); listOfTokens.add(new String[] {"I", "do", "."}); TextAnnotation ta = BasicTextAnnotationBuilder.createTextAnnotationFromTokens("", "", listOfTokens); TokenLabelView tlv = new TokenLabelView(ViewNames.POS, "Test", ta, 1.0); tlv.addTokenLabel(0, "PRP", 1d); tlv.addTokenLabel(1, "VBP", 1d); tlv.addTokenLabel(2, ".", 1d); ta.addView(ViewNames.POS, tlv); ta.addView(ViewNames.NER, new SpanLabelView(ViewNames.NER, "test", ta, 1d)); SpanLabelView chunks = new SpanLabelView(ViewNames.SHALLOW_PARSE, "test", ta, 1d); chunks.addSpanLabel(0, 1, "NP", 1d); chunks.addSpanLabel(1, 2, "VP", 1d); ta.addView(ViewNames.SHALLOW_PARSE, chunks); TokenLabelView view = new TokenLabelView(ViewNames.LEMMA, "test", ta, 1d); view.addTokenLabel(0, "i", 1d); view.addTokenLabel(1, "do", 1d); view.addTokenLabel(2, ".", 1d); ta.addView(ViewNames.LEMMA, view); return ta; }