private Item checkBlock(BlockItem b, StemContext context) { if (b instanceof PrefixItem || !b.isWords()) return (Item) b; if (b.isFromQuery() && !b.isStemmed()) { Index index = context.indexFacts.getIndex(b.getIndexName()); StemMode stemMode = index.getStemMode(); if (stemMode != StemMode.NONE) return stem(b, context, index); } return (Item) b; }
private Substring getOffsets(BlockItem b) { if (b instanceof TermItem) { return b.getOrigin(); } else if (b instanceof CompositeItem) { Item i = ((CompositeItem) b).getItem(0); if (i instanceof TermItem) { return ((TermItem) i).getOrigin(); // this should always be the case } else { getLogger().log(LogLevel.WARNING, "Weird, BlockItem '" + b + "' was a composite containing " + i.getClass().getName() + ", expected TermItem."); } } return null; }
private CompositeItem phraseSegment(BlockItem current, String indexName) { CompositeItem composite; composite = new PhraseSegmentItem(current.getRawWord(), current.stringValue(), true, true); composite.setIndexName(indexName); return composite; }
private TaggableItem singleWordSegment(BlockItem current, StemList segment, Index index, Substring substring, boolean insidePhrase) { String indexName = current.getIndexName(); if (insidePhrase == false && ((index.getLiteralBoost() || index.getStemMode() == StemMode.ALL))) { List<Alternative> terms = new ArrayList<>(segment.size() + 1); terms.add(new Alternative(current.stringValue(), 1.0d)); for (String term : segment) { terms.add(new Alternative(term, 0.7d)); } WordAlternativesItem alternatives = new WordAlternativesItem(indexName, current.isFromQuery(), substring, terms); if (alternatives.getAlternatives().size() > 1) { return alternatives; } } WordItem first = singleStemSegment((Item) current, segment.get(0), indexName, substring); return first; }
private Item stem(BlockItem current, StemContext context, Index index) { Item blockAsItem = (Item)current; CompositeItem composite; List<StemList> segments = linguistics.getStemmer().stem(current.stringValue(), index.getStemMode(), context.language); String indexName = current.getIndexName(); Substring substring = getOffsets(current);
private AndSegmentItem createAndSegment(BlockItem current) { return new AndSegmentItem(current.stringValue(), true, true); }
private CompositeItem chooseCompositeForCJK(BlockItem current, CompositeItem parent, String indexName) { CompositeItem composite; if (current.getSegmentingRule() == SegmentingRule.LANGUAGE_DEFAULT) { if (parent instanceof PhraseItem || current instanceof PhraseSegmentItem) { composite = phraseSegment(current, indexName); } else composite = createAndSegment(current); } else { switch (current.getSegmentingRule()) { case PHRASE: composite = phraseSegment(current, indexName); break; case BOOLEAN_AND: composite = createAndSegment(current); break; default: throw new IllegalArgumentException( "Unknown segmenting rule: " + current.getSegmentingRule() + ". This is a bug in Vespa, as the implementation has gotten out of sync." + " Please create a ticket as soon as possible."); } } return composite; }
BlockItem blockDeletionCandidate = (BlockItem) deleteCandidate; BlockItem blockToCheck = (BlockItem) check; if (blockDeletionCandidate.stringValue().equals(blockToCheck.stringValue())) { composite.removeItem(i); break;
/** * Splits the given item into n-grams and adds them as a CompositeItem containing WordItems searching the * index of the input term. If the result is a single gram, that single WordItem is returned rather than the AndItem * * @param term the term to split, must be an item which implement the IndexedItem and BlockItem "mixins" * @param text the text of the item, just stringValue() if the item is a TermItem * @param gramSize the gram size to split to * @param query the query in which this rewriting is done * @return the root of the query subtree produced by this, containing the split items */ protected Item splitToGrams(Item term, String text, int gramSize, Query query) { String index = ((HasIndexItem)term).getIndexName(); CompositeItem gramsItem = createGramRoot(query); gramsItem.setIndexName(index); Substring origin = ((BlockItem)term).getOrigin(); for (Iterator<GramSplitter.Gram> i = getGramSplitter().split(text,gramSize); i.hasNext(); ) { GramSplitter.Gram gram = i.next(); WordItem gramWord = new WordItem(gram.extractFrom(text), index, false, origin); gramWord.setWeight(term.getWeight()); gramWord.setProtected(true); gramsItem.addItem(gramWord); } return gramsItem.getItemCount()==1 ? gramsItem.getItem(0) : gramsItem; // return the AndItem, or just the single gram if not multiple }
Substring termSubstring = ((BlockItem) item).getOrigin(); Substring substring = new Substring(termSubstring.start, token.substring.start, termSubstring.getSuperstring()); // XXX: Unsafe end? String str = buffer.toString();