public StringSequenceAnnotationPattern(Class textKey, Set<List<String>> targets, boolean ignoreCase) { this.textKey = textKey; phraseTable = new PhraseTable(false, ignoreCase, false); for (List<String> target:targets) { phraseTable.addPhrase(target); if (maxNodes < 0 || target.size() > maxNodes) maxNodes = target.size(); } }
} else if (node instanceof Phrase) { Phrase phrase = (Phrase) node; int matchedTokenEnd = checkWordListMatch( phrase, wordList, 0, wordList.size(), i, true); int matchedTokenEnd = checkWordListMatch( phrase, wordList, 0, wordList.size(), i, true); int matchedTokenEnd = checkWordListMatch( phrase, wordList, 0, wordList.size(), wordList.size(), true); return (matchedTokenEnd >= 0)? phrase:null;
public void clear() { phraseTable.clear(); } }
public boolean addPhrase(String phraseText, String tag, Object phraseData) { WordList wordList = toNormalizedWordList(phraseText); return addPhrase(phraseText, tag, wordList, phraseData); }
public List<PhraseMatch> findMatches(String text, int tokenStart, int tokenEnd, boolean needNormalization) { WordList tokens = toNormalizedWordList(text); return findMatches(tokens, tokenStart, tokenEnd, false); }
/** * Given a segment of text, returns list of spans (PhraseMatch) that corresponds * to a phrase in the table * @param text Input text to search over * @return List of all matched spans */ public List<PhraseMatch> findAllMatches(String text) { WordList tokens = toNormalizedWordList(text); return findAllMatches(tokens, 0, tokens.size(), false); }
public void addPhrases(Collection<String> phraseTexts) { for (String phraseText:phraseTexts) { addPhrase(phraseText, null); } }
public List<PhraseMatch> findAllMatches(WordList tokens, int tokenStart, int tokenEnd, boolean needNormalization) { return findMatches(null, tokens, tokenStart, tokenEnd, needNormalization, true /* find all */, false /* don't need to match end exactly */); }
public WordList toNormalizedWordList(String phraseText) { String[] words = splitText(phraseText); List<String> list = new ArrayList<>(words.length); for (String word:words) { word = getNormalizedForm(word); if (word.length() > 0) { list.add(word); } } return new StringList(list); }
for (int i = tokenStart; i < tokenEnd; i++) { String word = tokens.getWord(i); word = getNormalizedForm(word); if (word.length() != 0) { normalized.add(word); List<PhraseMatch> matched = findMatchesNormalized(acceptablePhrases, new StringList(normalized), 0, normalized.size(), findAll, matchEnd); for (PhraseMatch pm:matched) { return findMatchesNormalized(acceptablePhrases, tokens, tokenStart, tokenEnd, findAll, matchEnd);
int matchedTokenEnd = checkWordListMatch( oldphrase, wordList, 0, wordList.size(), i+1, true); if (matchedTokenEnd >= 0) { int matchedTokenEnd = checkWordListMatch( oldphrase, wordList, 0, wordList.size(), i, true); if (matchedTokenEnd >= 0) { if (obj instanceof Phrase) { Phrase oldphrase = (Phrase) obj; addPhrase(newMap, oldphrase, i+1); } else { throw new RuntimeException("Unexpected class in list " + obj.getClass() + " while converting list to map"); Phrase oldphrase = (Phrase) tree.get(PHRASE_END); if (oldphrase != null) { int matchedTokenEnd = checkWordListMatch( oldphrase, wordList, 0, wordList.size(), wordList.size(), true); if (matchedTokenEnd >= 0) {
public List<PhraseMatch> findAllMatches(List<Phrase> acceptablePhrases, WordList tokens, int tokenStart, int tokenEnd, boolean needNormalization) { return findMatches(acceptablePhrases, tokens, tokenStart, tokenEnd, needNormalization, true /* find all */, false /* don't need to match end exactly */); }
public String getNormalizedForm(String word) { String normalized = normalizedCache.get(word); if (normalized == null) { normalized = createNormalizedForm(word); synchronized (this) { normalizedCache.put(word, normalized); } } return normalized; }
/** * Given a list of tokens, returns list of spans (PhraseMatch) that corresponds * to a phrase in the table * @param tokens List of tokens to search over * @return List of all matched spans */ public List<PhraseMatch> findAllMatches(WordList tokens) { return findAllMatches(tokens, 0, tokens.size(), true); }
public boolean containsKey(Object key) { return get(key) != null; }
/** * Given a segment of text, returns list of spans (PhraseMatch) that corresponds * to a phrase in the table (filtered by the list of acceptable phrase) * @param acceptablePhrases - What phrases to look for (need to be subset of phrases already in table) * @param text Input text to search over * @return List of all matched spans */ public List<PhraseMatch> findAllMatches(List<Phrase> acceptablePhrases, String text) { WordList tokens = toNormalizedWordList(text); return findAllMatches(acceptablePhrases, tokens, 0, tokens.size(), false); }
public List<PhraseMatch> findMatches(String text) { WordList tokens = toNormalizedWordList(text); return findMatches(tokens, 0, tokens.size(), false); }
public boolean addPhrase(List<String> tokens) { return addPhrase(tokens, null); }
public List<PhraseMatch> findAllMatches(WordList tokens, int tokenStart, int tokenEnd, boolean needNormalization) { return findMatches(null, tokens, tokenStart, tokenEnd, needNormalization, true /* find all */, false /* don't need to match end exactly */); }
public WordList toNormalizedWordList(String phraseText) { String[] words = splitText(phraseText); List<String> list = new ArrayList<>(words.length); for (String word:words) { word = getNormalizedForm(word); if (word.length() > 0) { list.add(word); } } return new StringList(list); }