/** * Fixes problems with broken analysis chains if positions and offsets are messed up that can lead to * {@link StringIndexOutOfBoundsException} in the {@link FastVectorHighlighter} */ public static WeightedFragInfo fixWeightedFragInfo(MappedFieldType fieldType, Field[] values, WeightedFragInfo fragInfo) { assert fragInfo != null : "FragInfo must not be null"; assert fieldType.name().equals(values[0].name()) : "Expected MappedFieldType for field " + values[0].name(); if (!fragInfo.getSubInfos().isEmpty() && containsBrokenAnalysis(fieldType.indexAnalyzer())) { /* This is a special case where broken analysis like WDF is used for term-vector creation at index-time * which can potentially mess up the offsets. To prevent a SAIIOBException we need to resort * the fragments based on their offsets rather than using solely the positions as it is done in * the FastVectorHighlighter. Yet, this is really a lucene problem and should be fixed in lucene rather * than in this hack... aka. "we are are working on in!" */ final List<SubInfo> subInfos = fragInfo.getSubInfos(); CollectionUtil.introSort(subInfos, new Comparator<SubInfo>() { @Override public int compare(SubInfo o1, SubInfo o2) { int startOffset = o1.getTermsOffsets().get(0).getStartOffset(); int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset(); return FragmentBuilderHelper.compare(startOffset, startOffset2); } }); return new WeightedFragInfo(Math.min(fragInfo.getSubInfos().get(0).getTermsOffsets().get(0).getStartOffset(), fragInfo.getStartOffset()), fragInfo.getEndOffset(), subInfos, fragInfo.getTotalBoost()); } else { return fragInfo; } }
public WeightedFragInfo( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ){ this.startOffset = startOffset; this.endOffset = endOffset; subInfos = new ArrayList<SubInfo>(); for( WeightedPhraseInfo phraseInfo : phraseInfoList ){ SubInfo subInfo = new SubInfo( phraseInfo.text, phraseInfo.termsOffsets, phraseInfo.seqnum ); subInfos.add( subInfo ); totalBoost += phraseInfo.boost; } }
@Override public String toString(){ StringBuilder sb = new StringBuilder(); sb.append( "subInfos=(" ); for( SubInfo si : subInfos ) sb.append( si.toString() ); sb.append( ")/" ).append( totalBoost ).append( '(' ).append( startOffset ).append( ',' ).append( endOffset ).append( ')' ); return sb.toString(); }
Toffs firstToffs = fragInfo.getSubInfos().get(0).getTermsOffsets().get(0); if (fragInfo.getStartOffset() >= fieldEnd || firstToffs.getStartOffset() >= fieldEnd) { continue; SubInfo subInfo = subInfoIterator.next(); List<Toffs> toffsList = new ArrayList<>(); Iterator<Toffs> toffsIterator = subInfo.getTermsOffsets().iterator(); while (toffsIterator.hasNext()) { Toffs toffs = toffsIterator.next(); subInfos.add(new SubInfo(subInfo.getText(), toffsList, subInfo.getSeqnum(), subInfo.getBoost())); boost += subInfo.getBoost(); if (subInfo.getTermsOffsets().isEmpty()) { subInfoIterator.remove();
length++; tempSubInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(), phraseInfo.getSeqnum(), phraseTotalBoost ) ); float subInfoBoost = tempSubInfo.getBoost() * norm; realSubInfos.add( new SubInfo( tempSubInfo.getText(), tempSubInfo.getTermsOffsets(), tempSubInfo.getSeqnum(), subInfoBoost )); totalBoost += subInfoBoost;
/** * Fixes problems with broken analysis chains if positions and offsets are messed up that can lead to * {@link StringIndexOutOfBoundsException} in the {@link FastVectorHighlighter} */ public static WeightedFragInfo fixWeightedFragInfo(MappedFieldType fieldType, Field[] values, WeightedFragInfo fragInfo) { assert fragInfo != null : "FragInfo must not be null"; assert fieldType.name().equals(values[0].name()) : "Expected MappedFieldType for field " + values[0].name(); if (!fragInfo.getSubInfos().isEmpty() && containsBrokenAnalysis(fieldType.indexAnalyzer())) { /* This is a special case where broken analysis like WDF is used for term-vector creation at index-time * which can potentially mess up the offsets. To prevent a SAIIOBException we need to resort * the fragments based on their offsets rather than using solely the positions as it is done in * the FastVectorHighlighter. Yet, this is really a lucene problem and should be fixed in lucene rather * than in this hack... aka. "we are are working on in!" */ final List<SubInfo> subInfos = fragInfo.getSubInfos(); CollectionUtil.introSort(subInfos, new Comparator<SubInfo>() { @Override public int compare(SubInfo o1, SubInfo o2) { int startOffset = o1.getTermsOffsets().get(0).getStartOffset(); int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset(); return FragmentBuilderHelper.compare(startOffset, startOffset2); } }); return new WeightedFragInfo(Math.min(fragInfo.getSubInfos().get(0).getTermsOffsets().get(0).getStartOffset(), fragInfo.getStartOffset()), fragInfo.getEndOffset(), subInfos, fragInfo.getTotalBoost()); } else { return fragInfo; } }
/** * Fixes problems with broken analysis chains if positions and offsets are messed up that can lead to * {@link StringIndexOutOfBoundsException} in the {@link FastVectorHighlighter} */ public static WeightedFragInfo fixWeightedFragInfo(FieldMapper mapper, Field[] values, WeightedFragInfo fragInfo) { assert fragInfo != null : "FragInfo must not be null"; assert mapper.fieldType().names().indexName().equals(values[0].name()) : "Expected FieldMapper for field " + values[0].name(); if (!fragInfo.getSubInfos().isEmpty() && (containsBrokenAnalysis(mapper.fieldType().indexAnalyzer()))) { /* This is a special case where broken analysis like WDF is used for term-vector creation at index-time * which can potentially mess up the offsets. To prevent a SAIIOBException we need to resort * the fragments based on their offsets rather than using soley the positions as it is done in * the FastVectorHighlighter. Yet, this is really a lucene problem and should be fixed in lucene rather * than in this hack... aka. "we are are working on in!" */ final List<SubInfo> subInfos = fragInfo.getSubInfos(); CollectionUtil.introSort(subInfos, new Comparator<SubInfo>() { @Override public int compare(SubInfo o1, SubInfo o2) { int startOffset = o1.getTermsOffsets().get(0).getStartOffset(); int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset(); return FragmentBuilderHelper.compare(startOffset, startOffset2); } }); return new WeightedFragInfo(Math.min(fragInfo.getSubInfos().get(0).getTermsOffsets().get(0).getStartOffset(), fragInfo.getStartOffset()), fragInfo.getEndOffset(), subInfos, fragInfo.getTotalBoost()); } else { return fragInfo; } }
/** * Fixes problems with broken analysis chains if positions and offsets are messed up that can lead to * {@link StringIndexOutOfBoundsException} in the {@link FastVectorHighlighter} */ public static WeightedFragInfo fixWeightedFragInfo(MappedFieldType fieldType, Field[] values, WeightedFragInfo fragInfo) { assert fragInfo != null : "FragInfo must not be null"; assert fieldType.name().equals(values[0].name()) : "Expected MappedFieldType for field " + values[0].name(); if (!fragInfo.getSubInfos().isEmpty() && containsBrokenAnalysis(fieldType.indexAnalyzer())) { /* This is a special case where broken analysis like WDF is used for term-vector creation at index-time * which can potentially mess up the offsets. To prevent a SAIIOBException we need to resort * the fragments based on their offsets rather than using soley the positions as it is done in * the FastVectorHighlighter. Yet, this is really a lucene problem and should be fixed in lucene rather * than in this hack... aka. "we are are working on in!" */ final List<SubInfo> subInfos = fragInfo.getSubInfos(); CollectionUtil.introSort(subInfos, new Comparator<SubInfo>() { @Override public int compare(SubInfo o1, SubInfo o2) { int startOffset = o1.getTermsOffsets().get(0).getStartOffset(); int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset(); return FragmentBuilderHelper.compare(startOffset, startOffset2); } }); return new WeightedFragInfo(Math.min(fragInfo.getSubInfos().get(0).getTermsOffsets().get(0).getStartOffset(), fragInfo.getStartOffset()), fragInfo.getEndOffset(), subInfos, fragInfo.getTotalBoost()); } else { return fragInfo; } }
/** * Fixes problems with broken analysis chains if positions and offsets are messed up that can lead to * {StringIndexOutOfBoundsException} in the {FastVectorHighlighter} */ public static WeightedFragInfo fixWeightedFragInfo(FieldMapper mapper, Field[] values, WeightedFragInfo fragInfo) { assert fragInfo != null : "FragInfo must not be null"; assert mapper.fieldType().name().equals(values[0].name()) : "Expected FieldMapper for field " + values[0].name(); if (!fragInfo.getSubInfos().isEmpty() && (containsBrokenAnalysis(mapper.fieldType().indexAnalyzer()))) { /* This is a special case where broken analysis like WDF is used for term-vector creation at index-time * which can potentially mess up the offsets. To prevent a SAIIOBException we need to resort * the fragments based on their offsets rather than using soley the positions as it is done in * the FastVectorHighlighter. Yet, this is really a lucene problem and should be fixed in lucene rather * than in this hack... aka. "we are are working on in!" */ final List<SubInfo> subInfos = fragInfo.getSubInfos(); CollectionUtil.introSort(subInfos, (o1, o2) -> { int startOffset = o1.getTermsOffsets().get(0).getStartOffset(); int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset(); return FragmentBuilderHelper.compare(startOffset, startOffset2); }); return new WeightedFragInfo(Math.min(fragInfo.getSubInfos().get(0).getTermsOffsets().get(0).getStartOffset(), fragInfo.getStartOffset()), fragInfo.getEndOffset(), subInfos, fragInfo.getTotalBoost()); } else { return fragInfo; } }
/** * Fixes problems with broken analysis chains if positions and offsets are messed up that can lead to * {@link StringIndexOutOfBoundsException} in the {@link FastVectorHighlighter} */ public static WeightedFragInfo fixWeightedFragInfo(FieldMapper mapper, Field[] values, WeightedFragInfo fragInfo) { assert fragInfo != null : "FragInfo must not be null"; assert mapper.fieldType().name().equals(values[0].name()) : "Expected FieldMapper for field " + values[0].name(); if (!fragInfo.getSubInfos().isEmpty() && (containsBrokenAnalysis(mapper.fieldType().indexAnalyzer()))) { /* This is a special case where broken analysis like WDF is used for term-vector creation at index-time * which can potentially mess up the offsets. To prevent a SAIIOBException we need to resort * the fragments based on their offsets rather than using soley the positions as it is done in * the FastVectorHighlighter. Yet, this is really a lucene problem and should be fixed in lucene rather * than in this hack... aka. "we are are working on in!" */ final List<SubInfo> subInfos = fragInfo.getSubInfos(); CollectionUtil.introSort(subInfos, new Comparator<SubInfo>() { @Override public int compare(SubInfo o1, SubInfo o2) { int startOffset = o1.getTermsOffsets().get(0).getStartOffset(); int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset(); return FragmentBuilderHelper.compare(startOffset, startOffset2); } }); return new WeightedFragInfo(Math.min(fragInfo.getSubInfos().get(0).getTermsOffsets().get(0).getStartOffset(), fragInfo.getStartOffset()), fragInfo.getEndOffset(), subInfos, fragInfo.getTotalBoost()); } else { return fragInfo; } }
protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo, String[] preTags, String[] postTags, Encoder encoder ){ StringBuilder fragment = new StringBuilder(); final int s = fragInfo.getStartOffset(); int[] modifiedStartOffset = { s }; String src = getFragmentSourceMSO( buffer, index, values, s, fragInfo.getEndOffset(), modifiedStartOffset ); int srcIndex = 0; for( SubInfo subInfo : fragInfo.getSubInfos() ){ for( Toffs to : subInfo.getTermsOffsets() ){ fragment .append( encoder.encodeText( src.substring( srcIndex, to.getStartOffset() - modifiedStartOffset[0] ) ) ) .append( getPreTag( preTags, subInfo.getSeqnum() ) ) .append( encoder.encodeText( src.substring( to.getStartOffset() - modifiedStartOffset[0], to.getEndOffset() - modifiedStartOffset[0] ) ) ) .append( getPostTag( postTags, subInfo.getSeqnum() ) ); srcIndex = to.getEndOffset() - modifiedStartOffset[0]; } } fragment.append( encoder.encodeText( src.substring( srcIndex ) ) ); return fragment.toString(); }
@Override public int compare(SubInfo o1, SubInfo o2) { int startOffset = o1.getTermsOffsets().get(0).getStartOffset(); int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset(); return FragmentBuilderHelper.compare(startOffset, startOffset2); } });
@Override public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ) { float totalBoost = 0; List<SubInfo> subInfos = new ArrayList<>(); for( WeightedPhraseInfo phraseInfo : phraseInfoList ){ subInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(), phraseInfo.getSeqnum(), phraseInfo.getBoost() ) ); totalBoost += phraseInfo.getBoost(); } getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, subInfos, totalBoost ) ); }
@Override public int compare(SubInfo o1, SubInfo o2) { int startOffset = o1.getTermsOffsets().get(0).getStartOffset(); int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset(); return FragmentBuilderHelper.compare(startOffset, startOffset2); } });
@Override public int compare(SubInfo o1, SubInfo o2) { int startOffset = o1.getTermsOffsets().get(0).getStartOffset(); int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset(); return FragmentBuilderHelper.compare(startOffset, startOffset2); } });
@Override public String toString(){ StringBuilder sb = new StringBuilder(); sb.append( "subInfos=(" ); for( SubInfo si : subInfos ) sb.append( si.toString() ); sb.append( ")/" ).append( totalBoost ).append( '(' ).append( startOffset ).append( ',' ).append( endOffset ).append( ')' ); return sb.toString(); }
@Override public int compare(SubInfo o1, SubInfo o2) { int startOffset = o1.getTermsOffsets().get(0).getStartOffset(); int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset(); return FragmentBuilderHelper.compare(startOffset, startOffset2); } });
@Override public int compare(SubInfo o1, SubInfo o2) { int startOffset = o1.getTermsOffsets().get(0).getStartOffset(); int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset(); return FragmentBuilderHelper.compare(startOffset, startOffset2); } });