public static String toString(WordList wordList) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < wordList.size(); i++) { if (sb.length() > 0) { sb.append(" "); } sb.append(wordList.getWord(i)); } return sb.toString(); }
private synchronized void addPhrase(Map<String,Object> tree, Phrase phrase, int wordIndex) { String word = (phrase.wordList.size() <= wordIndex)? PHRASE_END:phrase.wordList.getWord(wordIndex); Object node = tree.get(word); if (node == null) { tree.put(word, phrase); } else if (node instanceof Phrase) { // create list with this phrase and other and put it here List list = new ArrayList(2); list.add(phrase); list.add(node); tree.put(word, list); } else if (node instanceof Map) { addPhrase((Map<String,Object>) node, phrase, wordIndex+1); } else if (node instanceof List) { ((List) node).add(phrase); } else { throw new RuntimeException("Unexpected class " + node.getClass() + " while adding word " + wordIndex + "(" + word + ") in phrase " + phrase.getText()); } }
for (int i = 0; i < wordList.size(); i++) { String word = wordList.getWord(i); Object node = tree.get(word); Phrase phrase = (Phrase) node; int matchedTokenEnd = checkWordListMatch( phrase, wordList, 0, wordList.size(), i, true); phrase, wordList, 0, wordList.size(), i, true); if (phrase != null) { int matchedTokenEnd = checkWordListMatch( phrase, wordList, 0, wordList.size(), wordList.size(), true); return (matchedTokenEnd >= 0)? phrase:null; } else {
public List<PhraseMatch> findMatches(String text) { WordList tokens = toNormalizedWordList(text); return findMatches(tokens, 0, tokens.size(), false); }
public List<PhraseMatch> findMatches(WordList tokens, int tokenStart, int tokenEnd, boolean needNormalization) { return findMatches(null, tokens, tokenStart, tokenEnd, needNormalization, false /* don't need to find all */, false /* don't need to match end exactly */); }
public List<PhraseMatch> findAllMatches(WordList tokens, int tokenStart, int tokenEnd, boolean needNormalization) { return findMatches(null, tokens, tokenStart, tokenEnd, needNormalization, true /* find all */, false /* don't need to match end exactly */); }
/** * Given a segment of text, returns list of spans (PhraseMatch) that corresponds * to a phrase in the table (filtered by the list of acceptable phrase) * @param acceptablePhrases - What phrases to look for (need to be subset of phrases already in table) * @param text Input text to search over * @return List of all matched spans */ public List<PhraseMatch> findAllMatches(List<Phrase> acceptablePhrases, String text) { WordList tokens = toNormalizedWordList(text); return findAllMatches(acceptablePhrases, tokens, 0, tokens.size(), false); }
public List<PhraseMatch> findMatches(WordList tokens, int tokenStart, int tokenEnd, boolean needNormalization) { return findMatches(null, tokens, tokenStart, tokenEnd, needNormalization, false /* don't need to find all */, false /* don't need to match end exactly */); }
public List<PhraseMatch> findMatches(WordList tokens, int tokenStart, int tokenEnd, boolean needNormalization) { return findMatches(null, tokens, tokenStart, tokenEnd, needNormalization, false /* don't need to find all */, false /* don't need to match end exactly */); }
public static String toString(WordList wordList) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < wordList.size(); i++) { if (sb.length() > 0) { sb.append(" "); } sb.append(wordList.getWord(i)); } return sb.toString(); }
boolean newPhraseAdded = false; // True if the phrase was a new phrase boolean oldPhraseNewFormAdded = false; // True if the phrase already exists, and this was new form added to old phrase for (int i = wordIndex; i < wordList.size(); i++) { String word = Interner.globalIntern(wordList.getWord(i)); Object node = tree.get(word); oldphrase, wordList, 0, wordList.size(), i+1, true); if (matchedTokenEnd >= 0) { oldPhraseNewFormAdded = oldphrase.addForm(phraseText); oldphrase, wordList, 0, wordList.size(), i, true); if (matchedTokenEnd >= 0) { oldPhraseNewFormAdded = oldphrase.addForm(phraseText); if (wordList.size() == 0) { log.warn(phraseText + " not added"); } else { if (oldphrase != null) { int matchedTokenEnd = checkWordListMatch( oldphrase, wordList, 0, wordList.size(), wordList.size(), true); if (matchedTokenEnd >= 0) { oldPhraseNewFormAdded = oldphrase.addForm(phraseText);
protected int checkWordListMatch(Phrase phrase, WordList tokens, int tokenStart, int tokenEnd, int checkStart, boolean matchEnd) { if (checkStart < tokenStart) return -1; int i; int phraseSize = phrase.wordList.size(); for (i = checkStart; i < tokenEnd && i - tokenStart < phraseSize; i++) { String word = tokens.getWord(i); String phraseWord = phrase.wordList.getWord(i - tokenStart); if (!phraseWord.equals(word)) { return -1; } } if (i - tokenStart == phraseSize) { // All tokens in phrase has been matched! if (matchEnd) { return (i == tokenEnd)? i:-1; } else { return i; } } else { return -1; } }
/** * Given a segment of text, returns list of spans (PhraseMatch) that corresponds * to a phrase in the table (filtered by the list of acceptable phrase) * @param acceptablePhrases - What phrases to look for (need to be subset of phrases already in table) * @param text Input text to search over * @return List of all matched spans */ public List<PhraseMatch> findAllMatches(List<Phrase> acceptablePhrases, String text) { WordList tokens = toNormalizedWordList(text); return findAllMatches(acceptablePhrases, tokens, 0, tokens.size(), false); }
public List<PhraseMatch> findMatches(WordList tokens, int tokenStart, int tokenEnd, boolean needNormalization) { return findMatches(null, tokens, tokenStart, tokenEnd, needNormalization, false /* don't need to find all */, false /* don't need to match end exactly */); }
public List<PhraseMatch> findMatches(String text) { WordList tokens = toNormalizedWordList(text); return findMatches(tokens, 0, tokens.size(), false); }
/** * Given a list of tokens, returns list of spans (PhraseMatch) that corresponds * to a phrase in the table * @param tokens List of tokens to search over * @return List of all matched spans */ public List<PhraseMatch> findAllMatches(WordList tokens) { return findAllMatches(tokens, 0, tokens.size(), true); }
public List<PhraseMatch> findAllMatches(WordList tokens, int tokenStart, int tokenEnd, boolean needNormalization) { return findMatches(null, tokens, tokenStart, tokenEnd, needNormalization, true /* find all */, false /* don't need to match end exactly */); }
/** * Given a segment of text, returns list of spans (PhraseMatch) that corresponds * to a phrase in the table * @param text Input text to search over * @return List of all matched spans */ public List<PhraseMatch> findAllMatches(String text) { WordList tokens = toNormalizedWordList(text); return findAllMatches(tokens, 0, tokens.size(), false); }