WeightedPhraseInfo first = itr.next(); work.add( first ); int workEndOffset = first.getEndOffset(); while ( itr.hasNext() ) { WeightedPhraseInfo current = itr.next(); if ( current.getStartOffset() <= workEndOffset ) { workEndOffset = Math.max( workEndOffset, current.getEndOffset() ); work.add( current ); } else { work.set( 0, current ); } else { phraseList.add( new WeightedPhraseInfo( work ) ); work.clear(); work.add( current ); workEndOffset = current.getEndOffset(); phraseList.add( work.get( 0 ) ); } else { phraseList.add( new WeightedPhraseInfo( work ) ); work.clear();
void addIfNoOverlap( WeightedPhraseInfo wpi ){ for( WeightedPhraseInfo existWpi : phraseList ){ if( existWpi.isOffsetOverlap( wpi ) ) return; } phraseList.add( wpi ); }
public void addIfNoOverlap( WeightedPhraseInfo wpi ){ for( WeightedPhraseInfo existWpi : getPhraseList() ){ if( existWpi.isOffsetOverlap( wpi ) ) { // WeightedPhraseInfo.addIfNoOverlap() dumps the second part of, for example, hyphenated words (social-economics). // The result is that all informations in TermInfo are lost and not available for further operations. existWpi.getTermsInfos().addAll( wpi.getTermsInfos() ); return; } } getPhraseList().add( wpi ); }
fieldTermStack.push( ti ); if( currMap.isValidTermOrPhrase( phraseCandidate ) ){ addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate, currMap.getBoost(), currMap.getTermOrPhraseNumber() ) ); currMap = fieldQuery.searchPhrase( field, phraseCandidate ); if( currMap != null ){ addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate, currMap.getBoost(), currMap.getTermOrPhraseNumber() ) ); break;
while((phraseInfo = queue.top()) != null){ if( phraseInfo.getStartOffset() < startOffset ) { queue.removeTop(); continue; final int currentPhraseStartOffset = phraseInfo.getStartOffset(); int currentPhraseEndOffset = phraseInfo.getEndOffset(); int spanStart = Math.max(currentPhraseStartOffset - margin, startOffset); int spanEnd = Math.max(currentPhraseEndOffset, spanStart + fragCharSize); if (phraseInfo.getEndOffset() <= spanEnd) { currentPhraseEndOffset = phraseInfo.getEndOffset(); if (acceptPhrase(queue.removeTop(), currentPhraseEndOffset - currentPhraseStartOffset, fragCharSize)) { wpil.add(phraseInfo);
fieldTermStack.push( ti ); if( currMap.isValidTermOrPhrase( phraseCandidate ) ){ addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate, currMap.getBoost(), currMap.getTermOrPhraseNumber() ) ); currMap = fieldQuery.searchPhrase( field, phraseCandidate ); if( currMap != null ){ addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate, currMap.getBoost(), currMap.getTermOrPhraseNumber() ) ); break;
if( phraseInfo.getStartOffset() < startOffset ) continue; int st = phraseInfo.getStartOffset() - MARGIN < startOffset ? startOffset : phraseInfo.getStartOffset() - MARGIN; int en = st + fragCharSize; if( phraseInfo.getEndOffset() > en ) en = phraseInfo.getEndOffset(); startOffset = en; if( phraseInfo.getEndOffset() <= en ) wpil.add( phraseInfo ); else
for ( TermInfo ti : phraseInfo.getTermsInfos()) { if ( distinctTerms.add( ti.getText() ) ) phraseTotalBoost += ti.getWeight() * phraseInfo.getBoost(); length++; tempSubInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(), phraseInfo.getSeqnum(), phraseTotalBoost ) );
@Override public boolean equals(Object obj) { if (this == obj) { return true; } if (obj == null) { return false; } if (getClass() != obj.getClass()) { return false; } WeightedPhraseInfo other = (WeightedPhraseInfo) obj; if (getStartOffset() != other.getStartOffset()) { return false; } if (getEndOffset() != other.getEndOffset()) { return false; } if (getBoost() != other.getBoost()) { return false; } return true; }
/** * A predicate to decide if the given {@link WeightedPhraseInfo} should be * accepted as a highlighted phrase or if it should be discarded. * <p> * The default implementation discards phrases that are composed of more than one term * and where the matchLength exceeds the fragment character size. * * @param info the phrase info to accept * @param matchLength the match length of the current phrase * @param fragCharSize the configured fragment character size * @return <code>true</code> if this phrase info should be accepted as a highligh phrase */ protected boolean acceptPhrase(WeightedPhraseInfo info, int matchLength, int fragCharSize) { return info.getTermsOffsets().size() <= 1 || matchLength <= fragCharSize; }
@Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result + getStartOffset(); result = prime * result + getEndOffset(); long b = Double.doubleToLongBits( getBoost() ); result = prime * result + ( int )( b ^ ( b >>> 32 ) ); return result; }
public boolean isOffsetOverlap( WeightedPhraseInfo other ){ int so = getStartOffset(); int eo = getEndOffset(); int oso = other.getStartOffset(); int oeo = other.getEndOffset(); if( so <= oso && oso < eo ) return true; if( so < oeo && oeo <= eo ) return true; if( oso <= so && so < oeo ) return true; if( oso < eo && eo <= oeo ) return true; return false; }
@Override public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ) { float totalBoost = 0; List<SubInfo> subInfos = new ArrayList<>(); for( WeightedPhraseInfo phraseInfo : phraseInfoList ){ subInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(), phraseInfo.getSeqnum(), phraseInfo.getBoost() ) ); totalBoost += phraseInfo.getBoost(); } getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, subInfos, totalBoost ) ); }
@Override public String toString(){ StringBuilder sb = new StringBuilder(); sb.append( getText() ).append( '(' ).append( boost ).append( ")(" ); for( Toffs to : termsOffsets ){ sb.append( to ); } sb.append( ')' ); return sb.toString(); }
public boolean isOffsetOverlap( WeightedPhraseInfo other ){ int so = getStartOffset(); int eo = getEndOffset(); int oso = other.getStartOffset(); int oeo = other.getEndOffset(); if( so <= oso && oso < eo ) return true; if( so < oeo && oeo <= eo ) return true; if( oso <= so && so < oeo ) return true; if( oso < eo && eo <= oeo ) return true; return false; }
@Override public int compareTo( WeightedPhraseInfo other ) { int diff = getStartOffset() - other.getStartOffset(); if ( diff != 0 ) { return diff; } diff = getEndOffset() - other.getEndOffset(); if ( diff != 0 ) { return diff; } return (int) Math.signum( getBoost() - other.getBoost() ); }