/** * Adds subitem. The word will have its index name set to the index name * of this phrase. If the item is a word, it will simply be added, * if the item is a phrase, each of the words of the phrase will be added. * * @throws IllegalArgumentException if the given item is not a WordItem or PhraseItem */ public void addItem(Item item) { if (item instanceof WordItem) { addWordItem((WordItem) item); } else { throw new IllegalArgumentException("Can not add " + item + " to a segment phrase"); } }
protected void appendBodyString(StringBuilder buffer) { appendIndexString(buffer); appendContentsString(buffer); }
@Override public int encode(ByteBuffer buffer) { encodeThis(buffer); return encodeContent(buffer, 1); }
private static void serializeWords(StringBuilder destination, PhraseSegmentItem segment) { for (int i = 0; i < segment.getItemCount(); ++i) { if (i > 0) { destination.append(", "); } Item current = segment.getItem(i); if (current instanceof WordItem) { destination.append('"'); escape(((WordItem) current).getIndexedString(), destination).append('"'); } else { throw new IllegalArgumentException("Serializing of " + current.getClass().getSimpleName() + " in phrases not implemented, please report this as a bug."); } } }
static boolean serialize(StringBuilder destination, Item item, boolean includeField) { PhraseSegmentItem phrase = (PhraseSegmentItem) item; Substring origin = phrase.getOrigin(); String image; int offset; destination.append(normalizeIndexName(phrase.getIndexName())).append(" contains "); image = phrase.getRawWord(); offset = 0; length = image.length(); destination.append(", ").append(annotations); if (phrase.getSegmentingRule() == SegmentingRule.BOOLEAN_AND) { destination.append(", ").append('"').append(AND_SEGMENTING).append("\": true");
@NonNull private TaggableItem segment(String field, OperatorNode<ExpressionOperator> ast, String wordData, boolean fromQuery, Class<?> parent, Language language) { String toSegment = wordData; Substring s = getOrigin(ast); Language usedLanguage = language == null ? currentlyParsing.getLanguage() : language; if (s != null) { toSegment = s.getValue(); } List<String> words = segmenter.segment(toSegment, usedLanguage); TaggableItem wordItem; if (words.size() == 0) { wordItem = new WordItem(wordData, fromQuery); } else if (words.size() == 1 || !phraseArgumentSupported(parent)) { wordItem = new WordItem(words.get(0), fromQuery); } else { wordItem = new PhraseSegmentItem(toSegment, fromQuery, false); ((PhraseSegmentItem) wordItem).setIndexName(field); for (String w : words) { WordItem segment = new WordItem(w, fromQuery); prepareWord(field, ast, segment); ((PhraseSegmentItem) wordItem).addItem(segment); } ((PhraseSegmentItem) wordItem).lock(); } return wordItem; }
/** * Creates a phrase containing the same words and state (as pertinent) as * the given SegmentAndItem. */ public PhraseSegmentItem(AndSegmentItem segAnd) { super(segAnd.getRawWord(), segAnd.stringValue(), segAnd.isFromQuery(), segAnd.isStemmed(), segAnd.getOrigin()); if (segAnd.getItemCount() > 0) { WordItem w = (WordItem) segAnd.getItem(0); setIndexName(w.getIndexName()); for (Iterator<Item> i = segAnd.getItemIterator(); i.hasNext();) { WordItem word = (WordItem) i.next(); addWordItem(word); } } }
private CompositeItem phraseSegment(BlockItem current, String indexName) { CompositeItem composite; composite = new PhraseSegmentItem(current.getRawWord(), current.stringValue(), true, true); composite.setIndexName(indexName); return composite; }
public int encodeContent(ByteBuffer buffer) { return encodeContent(buffer, 0); }
private void addWordItem(WordItem word) { word.setIndexName(this.getIndexName()); super.addItem(word); }
/** * Returns a subitem as a word item * * @param index the (0-base) index of the item to return * @throws IndexOutOfBoundsException if there is no subitem at index */ public WordItem getWordItem(int index) { return (WordItem) getItem(index); }
protected void appendBodyString(StringBuilder buffer) { appendIndexString(buffer); buffer.append("\""); for (Iterator<Item> i = getItemIterator(); i.hasNext();) { Item item = i.next(); if (item instanceof WordItem) { WordItem wordItem = (WordItem) item; buffer.append(wordItem.getWord()); } else if (item instanceof PhraseSegmentItem) { PhraseSegmentItem seg = (PhraseSegmentItem) item; seg.appendContentsString(buffer); } else { buffer.append(item.toString()); } if (i.hasNext()) { buffer.append(" "); } } buffer.append("\""); }
PhraseSegmentItem phraseSegmentDeletionCandidate = (PhraseSegmentItem) deleteCandidate; PhraseSegmentItem phraseSegmentToCheck = (PhraseSegmentItem) check; if (phraseSegmentDeletionCandidate.getIndexedString().equals(phraseSegmentToCheck.getIndexedString())) { composite.removeItem(i); break;
CompositeItem composite = new PhraseSegmentItem(token.toString(), normalizedToken, true, false, token.substring); int n = 0; for (String segment : segments) {
public int encode(ByteBuffer buffer) { encodeThis(buffer); int itemCount = 1; for (Iterator<Item> i = getItemIterator(); i.hasNext();) { Item subitem = i.next(); if (subitem instanceof PhraseSegmentItem) { PhraseSegmentItem seg = (PhraseSegmentItem) subitem; // "What encode does, minus what encodeThis does" itemCount += seg.encodeContent(buffer); } else { itemCount += subitem.encode(buffer); } } return itemCount; }
/** * Convert segment items into their mutable counterpart, do not update query tree. * Non-segment items are returned directly. * * @return a mutable CompositeItem instance */ private CompositeItem convertSegmentItem(CompositeItem item) { if (!(item instanceof SegmentItem)) { return item; } CompositeItem converted = null; if (item instanceof AndSegmentItem) { converted = new AndItem(); } else if (item instanceof PhraseSegmentItem) { PhraseItem p = new PhraseItem(); PhraseSegmentItem old = (PhraseSegmentItem) item; p.setIndexName(old.getIndexName()); converted = p; } else { // TODO: Do something else than nothing for unknowns? return item; } for (Iterator<Item> i = item.getItemIterator(); i.hasNext();) { converted.addItem(i.next()); } return converted; }
phrase = new PhraseSegmentItem(origin.getValue(), origin.getValue(), true, !stem, origin); } else { phrase = new AndSegmentItem(origin.getValue(), true, !stem);