/** Appends the index prefix if necessary and delegates to the subclass */ protected final void appendBodyString(StringBuilder buffer) { appendIndexString(buffer); buffer.append(stringValue()); }
final public int encode(ByteBuffer buffer) { encodeThis(buffer); return 1; }
public boolean equals(Object o) { if (! (o instanceof Match)) return false; Match other=(Match)o; if (other.position!=position) return false; if (!other.item.equals(item)) return false; return true; }
public int hashCode() { return 17*item.getIndexedString().hashCode()+ 33*item.getIndexName().hashCode(); }
index=termItem.getIndexName(); else if (!termItem.getIndexName().equals(index)) break; String lowercased = toLowerCase(termItem.stringValue()); boolean matched=state.tryDeltaWord(lowercased); if (!matched && ignorePluralForm) {
/** * Creates a match * * @param item the match to add * @param replaceValue the string to replace this match by, usually the item.getIndexedString() * which is what the replace value will be if it is passed as null here */ public Match(FlattenedItem item,String replaceValue) { this.item=item.getItem(); if (replaceValue==null) this.replaceValue=item.getItem().getIndexedString(); else this.replaceValue=replaceValue; this.parent=this.item.getParent(); this.position=item.getPosition(); }
private String matchTerms(int numberOfTerms,RuleEvaluation e) { StringBuilder b=new StringBuilder(); for (int i=0; i<numberOfTerms; i++) { e.addMatch(e.currentItem(),e.currentItem().getItem().getIndexedString()); b.append(e.currentItem().getItem().stringValue()); if (i<(numberOfTerms-1)) b.append(" "); e.next(); } return b.toString(); }
private void normalizeWord(Language language, IndexFacts.Session indexFacts, TermItem term, ListIterator<Item> i) { if ( ! (term instanceof WordItem)) return; if ( ! term.isNormalizable()) return; Index index = indexFacts.getIndex(term.getIndexName()); if (index.isAttribute()) return; if ( ! index.getNormalize()) return; WordItem word = (WordItem) term; String accentDropped = linguistics.getTransformer().accentDrop(word.getWord(), language); if (accentDropped.length() == 0) i.remove(); else word.setWord(accentDropped); }
private void addLiterals(RankItem rankTerms, Item item, IndexFacts.Session indexFacts) { if (item == null) return; if (item instanceof NotItem) { addLiterals(rankTerms, ((NotItem) item).getPositiveItem(), indexFacts); } else if (item instanceof CompositeItem) { for (Iterator<Item> i = ((CompositeItem)item).getItemIterator(); i.hasNext(); ) addLiterals(rankTerms, i.next(), indexFacts); } else if (item instanceof TermItem) { TermItem termItem = (TermItem)item; Index index = indexFacts.getIndex(termItem.getIndexName()); if (index.getLiteralBoost()) rankTerms.addItem(new WordItem(toLowerCase(termItem.getRawWord()), index.getName() + "_literal")); } }
protected boolean labelMatches(TermItem evaluationTerm,RuleEvaluation e) { String indexName=evaluationTerm.getIndexName(); String label=getLabel(); if (label==null) label=e.getCurrentLabel(); if ("".equals(indexName) && label==null) return true; if (indexName.equals(label)) return true; if (e.getTraceLevel()>=4) e.trace(4,"'" + this + "' does not match, label of " + e.currentItem() + " was required to be " + label); return false; }
private void repeatedConsecutiveTermsInPhraseCheck(PhraseItem phrase) { if (phrase.getItemCount() > MAX_REPEATED_CONSECUTIVE_TERMS_IN_PHRASE) { String prev = null; int repeatedCount = 0; for (int i = 0; i < phrase.getItemCount(); ++i) { Item item = phrase.getItem(i); if (item instanceof TermItem) { TermItem term = (TermItem) item; String current = term.getIndexedString(); if (prev != null) { if (prev.equals(current)) { repeatedCount++; if (repeatedCount >= MAX_REPEATED_CONSECUTIVE_TERMS_IN_PHRASE) { repeatedConsecutiveTermsInPhraseRejections.add(); throw new IllegalArgumentException("More than " + MAX_REPEATED_CONSECUTIVE_TERMS_IN_PHRASE + " ocurrences of term '" + current + "' in a row detected in phrase : " + phrase.toString()); } } else { repeatedCount = 0; } } prev = current; } else { prev = null; repeatedCount = 0; } } } } private static final class Count {
private Substring getOffsets(BlockItem b) { if (b instanceof TermItem) { return b.getOrigin(); } else if (b instanceof CompositeItem) { Item i = ((CompositeItem) b).getItem(0); if (i instanceof TermItem) { return ((TermItem) i).getOrigin(); // this should always be the case } else { getLogger().log(LogLevel.WARNING, "Weird, BlockItem '" + b + "' was a composite containing " + i.getClass().getName() + ", expected TermItem."); } } return null; }
protected void annotatePhrase(PhraseMatcher.Phrase phrase,Query query,int traceLevel) { for (StringTokenizer tokens=new StringTokenizer(phrase.getData(),"|",false) ; tokens.hasMoreTokens(); ) { String token=tokens.nextToken(); int semicolonIndex=token.indexOf(";"); String annotation=token; String value=""; if (semicolonIndex>0) { annotation=token.substring(0,semicolonIndex); value=token.substring(semicolonIndex+1); } // Annotate all matched items phrase.getItem(0).addAnnotation(annotation,phrase); if (traceLevel>=4) query.trace(" Annotating '" + phrase + "' as " + annotation + (value.equals("") ? "" :"=" + value),false,1); } }
@Override public void disclose(Discloser discloser) { super.disclose(discloser); discloser.addProperty("fromSegmented", fromSegmented); discloser.addProperty("segmentIndex", segmentIndex); discloser.addProperty("stemmed", stemmed); discloser.addProperty("words", words); }
@Override public DispatchForm itemToForm(Item item, ItemIdMapper itemIdMapper) { TermItem termItem = (TermItem)item; DispatchForm form = new DispatchForm(termItem.getItemType().name()); ItemInitializer.initializeForm(form, item, itemIdMapper); form.addChild(getValue(termItem)); return form; }
private boolean automataMatch(RuleEvaluation e) { FlattenedItem current=e.currentItem(); if (current==null) return false; Object annotation=current.getItem().getAnnotation(conditionName); if (annotation==null) return false; if (! (annotation instanceof PhraseMatcher.Phrase)) return false; PhraseMatcher.Phrase phrase=(PhraseMatcher.Phrase)annotation; Choicepoint choicePoint=e.getChoicepoint(this,true); boolean matches=automataMatchPhrase(phrase,e); if (!matches && e.isInNegation()) { // TODO: Temporary hack! Works for single items only e.addMatch(current,null); } if ((!matches && !e.isInNegation() || (matches && e.isInNegation()))) choicePoint.backtrackPosition(); return matches; }
@Override protected void appendBodyString(StringBuilder buffer) { buffer.append(fieldName).append(':'); buffer.append('{'); for (Iterator<Item> i = getItemIterator(); i.hasNext();) { TermItem term = (TermItem) i.next(); buffer.append(term.getIndexName()).append(':').append(term.getIndexedString()); if (i.hasNext()) { buffer.append(' '); } } buffer.append('}'); }
private boolean rewriteToNGramMatching(Item item, int indexInParent, IndexFacts.Session indexFacts, Query query) { boolean rewritten = false; if (item instanceof SegmentItem) { // handle CJK segmented terms which should be grams instead SegmentItem segments = (SegmentItem)item; Index index = indexFacts.getIndex(segments.getIndexName()); if (index.isNGram()) { Item grams = splitToGrams(segments, toLowerCase(segments.getRawWord()), index.getGramSize(), query); replaceItemByGrams(item, grams, indexInParent); rewritten = true; } } else if (item instanceof CompositeItem) { CompositeItem composite = (CompositeItem)item; for (int i=0; i<composite.getItemCount(); i++) rewritten = rewriteToNGramMatching(composite.getItem(i), i, indexFacts, query) || rewritten; } else if (item instanceof TermItem) { TermItem term = (TermItem)item; Index index = indexFacts.getIndex(term.getIndexName()); if (index.isNGram()) { Item grams = splitToGrams(term,term.stringValue(), index.getGramSize(), query); replaceItemByGrams(item, grams, indexInParent); rewritten = true; } } return rewritten; }
public void replace() { PhraseItem phrase=new PhraseItem(); TermItem firstWord=(TermItem)owner.setItem(startIndex,phrase); replace(firstWord,0); phrase.setIndexName(firstWord.getIndexName()); phrase.addItem(firstWord); for (int i=1; i<length; i++) { TermItem followingWord=(TermItem)owner.removeItem(startIndex+1); replace(followingWord,i); phrase.addItem(followingWord); } }
private void repeatedTermsInPhraseCheck(PhraseItem phrase) { if (phrase.getItemCount() > MAX_REPEATED_TERMS_IN_PHRASE) { Map<String, Count> repeatedCount = new HashMap<>(); for (int i = 0; i < phrase.getItemCount(); ++i) { Item item = phrase.getItem(i); if (item instanceof TermItem) { TermItem term = (TermItem) item; String current = term.getIndexedString(); Count count = repeatedCount.get(current); if (count != null) { if (count.get() >= MAX_REPEATED_TERMS_IN_PHRASE) { repeatedTermsInPhraseRejections.add(); throw new IllegalArgumentException("Phrase contains more than " + MAX_REPEATED_TERMS_IN_PHRASE + " occurrences of term '" + current + "' in phrase : " + phrase.toString()); } count.inc(); } else { repeatedCount.put(current, new Count(1)); } } } } }