/** * Fixes problems with broken analysis chains if positions and offsets are messed up that can lead to * {@link StringIndexOutOfBoundsException} in the {@link FastVectorHighlighter} */ public static WeightedFragInfo fixWeightedFragInfo(MappedFieldType fieldType, Field[] values, WeightedFragInfo fragInfo) { assert fragInfo != null : "FragInfo must not be null"; assert fieldType.name().equals(values[0].name()) : "Expected MappedFieldType for field " + values[0].name(); if (!fragInfo.getSubInfos().isEmpty() && containsBrokenAnalysis(fieldType.indexAnalyzer())) { /* This is a special case where broken analysis like WDF is used for term-vector creation at index-time * which can potentially mess up the offsets. To prevent a SAIIOBException we need to resort * the fragments based on their offsets rather than using solely the positions as it is done in * the FastVectorHighlighter. Yet, this is really a lucene problem and should be fixed in lucene rather * than in this hack... aka. "we are are working on in!" */ final List<SubInfo> subInfos = fragInfo.getSubInfos(); CollectionUtil.introSort(subInfos, new Comparator<SubInfo>() { @Override public int compare(SubInfo o1, SubInfo o2) { int startOffset = o1.getTermsOffsets().get(0).getStartOffset(); int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset(); return FragmentBuilderHelper.compare(startOffset, startOffset2); } }); return new WeightedFragInfo(Math.min(fragInfo.getSubInfos().get(0).getTermsOffsets().get(0).getStartOffset(), fragInfo.getStartOffset()), fragInfo.getEndOffset(), subInfos, fragInfo.getTotalBoost()); } else { return fragInfo; } }
protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo, String[] preTags, String[] postTags, Encoder encoder ){ StringBuilder fragment = new StringBuilder(); final int s = fragInfo.getStartOffset(); int[] modifiedStartOffset = { s }; String src = getFragmentSourceMSO( buffer, index, values, s, fragInfo.getEndOffset(), modifiedStartOffset ); int srcIndex = 0; for( SubInfo subInfo : fragInfo.getSubInfos() ){ for( Toffs to : subInfo.getTermsOffsets() ){ fragment .append( encoder.encodeText( src.substring( srcIndex, to.getStartOffset() - modifiedStartOffset[0] ) ) ) .append( getPreTag( preTags, subInfo.getSeqnum() ) ) .append( encoder.encodeText( src.substring( to.getStartOffset() - modifiedStartOffset[0], to.getEndOffset() - modifiedStartOffset[0] ) ) ) .append( getPostTag( postTags, subInfo.getSeqnum() ) ); srcIndex = to.getEndOffset() - modifiedStartOffset[0]; } } fragment.append( encoder.encodeText( src.substring( srcIndex ) ) ); return fragment.toString(); }
fieldEnd += field.stringValue().length() + 1; // + 1 for going to next field with same name. if (fragInfo.getStartOffset() >= fieldStart && fragInfo.getEndOffset() >= fieldStart && fragInfo.getStartOffset() <= fieldEnd && fragInfo.getEndOffset() <= fieldEnd) { fieldNameToFragInfos.get(field.name()).add(fragInfo); continue fragInfos; if (fragInfo.getEndOffset() > fieldStart && fragInfo.getEndOffset() < fieldEnd) { fragEnd = fragInfo.getEndOffset();
/** * Fixes problems with broken analysis chains if positions and offsets are messed up that can lead to * {@link StringIndexOutOfBoundsException} in the {@link FastVectorHighlighter} */ public static WeightedFragInfo fixWeightedFragInfo(FieldMapper mapper, Field[] values, WeightedFragInfo fragInfo) { assert fragInfo != null : "FragInfo must not be null"; assert mapper.fieldType().names().indexName().equals(values[0].name()) : "Expected FieldMapper for field " + values[0].name(); if (!fragInfo.getSubInfos().isEmpty() && (containsBrokenAnalysis(mapper.fieldType().indexAnalyzer()))) { /* This is a special case where broken analysis like WDF is used for term-vector creation at index-time * which can potentially mess up the offsets. To prevent a SAIIOBException we need to resort * the fragments based on their offsets rather than using soley the positions as it is done in * the FastVectorHighlighter. Yet, this is really a lucene problem and should be fixed in lucene rather * than in this hack... aka. "we are are working on in!" */ final List<SubInfo> subInfos = fragInfo.getSubInfos(); CollectionUtil.introSort(subInfos, new Comparator<SubInfo>() { @Override public int compare(SubInfo o1, SubInfo o2) { int startOffset = o1.getTermsOffsets().get(0).getStartOffset(); int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset(); return FragmentBuilderHelper.compare(startOffset, startOffset2); } }); return new WeightedFragInfo(Math.min(fragInfo.getSubInfos().get(0).getTermsOffsets().get(0).getStartOffset(), fragInfo.getStartOffset()), fragInfo.getEndOffset(), subInfos, fragInfo.getTotalBoost()); } else { return fragInfo; } }
/** * Fixes problems with broken analysis chains if positions and offsets are messed up that can lead to * {@link StringIndexOutOfBoundsException} in the {@link FastVectorHighlighter} */ public static WeightedFragInfo fixWeightedFragInfo(MappedFieldType fieldType, Field[] values, WeightedFragInfo fragInfo) { assert fragInfo != null : "FragInfo must not be null"; assert fieldType.name().equals(values[0].name()) : "Expected MappedFieldType for field " + values[0].name(); if (!fragInfo.getSubInfos().isEmpty() && containsBrokenAnalysis(fieldType.indexAnalyzer())) { /* This is a special case where broken analysis like WDF is used for term-vector creation at index-time * which can potentially mess up the offsets. To prevent a SAIIOBException we need to resort * the fragments based on their offsets rather than using soley the positions as it is done in * the FastVectorHighlighter. Yet, this is really a lucene problem and should be fixed in lucene rather * than in this hack... aka. "we are are working on in!" */ final List<SubInfo> subInfos = fragInfo.getSubInfos(); CollectionUtil.introSort(subInfos, new Comparator<SubInfo>() { @Override public int compare(SubInfo o1, SubInfo o2) { int startOffset = o1.getTermsOffsets().get(0).getStartOffset(); int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset(); return FragmentBuilderHelper.compare(startOffset, startOffset2); } }); return new WeightedFragInfo(Math.min(fragInfo.getSubInfos().get(0).getTermsOffsets().get(0).getStartOffset(), fragInfo.getStartOffset()), fragInfo.getEndOffset(), subInfos, fragInfo.getTotalBoost()); } else { return fragInfo; } }
/** * Fixes problems with broken analysis chains if positions and offsets are messed up that can lead to * {@link StringIndexOutOfBoundsException} in the {@link FastVectorHighlighter} */ public static WeightedFragInfo fixWeightedFragInfo(MappedFieldType fieldType, Field[] values, WeightedFragInfo fragInfo) { assert fragInfo != null : "FragInfo must not be null"; assert fieldType.name().equals(values[0].name()) : "Expected MappedFieldType for field " + values[0].name(); if (!fragInfo.getSubInfos().isEmpty() && containsBrokenAnalysis(fieldType.indexAnalyzer())) { /* This is a special case where broken analysis like WDF is used for term-vector creation at index-time * which can potentially mess up the offsets. To prevent a SAIIOBException we need to resort * the fragments based on their offsets rather than using solely the positions as it is done in * the FastVectorHighlighter. Yet, this is really a lucene problem and should be fixed in lucene rather * than in this hack... aka. "we are are working on in!" */ final List<SubInfo> subInfos = fragInfo.getSubInfos(); CollectionUtil.introSort(subInfos, new Comparator<SubInfo>() { @Override public int compare(SubInfo o1, SubInfo o2) { int startOffset = o1.getTermsOffsets().get(0).getStartOffset(); int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset(); return FragmentBuilderHelper.compare(startOffset, startOffset2); } }); return new WeightedFragInfo(Math.min(fragInfo.getSubInfos().get(0).getTermsOffsets().get(0).getStartOffset(), fragInfo.getStartOffset()), fragInfo.getEndOffset(), subInfos, fragInfo.getTotalBoost()); } else { return fragInfo; } }
/** * Fixes problems with broken analysis chains if positions and offsets are messed up that can lead to * {@link StringIndexOutOfBoundsException} in the {@link FastVectorHighlighter} */ public static WeightedFragInfo fixWeightedFragInfo(FieldMapper mapper, Field[] values, WeightedFragInfo fragInfo) { assert fragInfo != null : "FragInfo must not be null"; assert mapper.fieldType().name().equals(values[0].name()) : "Expected FieldMapper for field " + values[0].name(); if (!fragInfo.getSubInfos().isEmpty() && (containsBrokenAnalysis(mapper.fieldType().indexAnalyzer()))) { /* This is a special case where broken analysis like WDF is used for term-vector creation at index-time * which can potentially mess up the offsets. To prevent a SAIIOBException we need to resort * the fragments based on their offsets rather than using soley the positions as it is done in * the FastVectorHighlighter. Yet, this is really a lucene problem and should be fixed in lucene rather * than in this hack... aka. "we are are working on in!" */ final List<SubInfo> subInfos = fragInfo.getSubInfos(); CollectionUtil.introSort(subInfos, new Comparator<SubInfo>() { @Override public int compare(SubInfo o1, SubInfo o2) { int startOffset = o1.getTermsOffsets().get(0).getStartOffset(); int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset(); return FragmentBuilderHelper.compare(startOffset, startOffset2); } }); return new WeightedFragInfo(Math.min(fragInfo.getSubInfos().get(0).getTermsOffsets().get(0).getStartOffset(), fragInfo.getStartOffset()), fragInfo.getEndOffset(), subInfos, fragInfo.getTotalBoost()); } else { return fragInfo; } }
/** * Fixes problems with broken analysis chains if positions and offsets are messed up that can lead to * {StringIndexOutOfBoundsException} in the {FastVectorHighlighter} */ public static WeightedFragInfo fixWeightedFragInfo(FieldMapper mapper, Field[] values, WeightedFragInfo fragInfo) { assert fragInfo != null : "FragInfo must not be null"; assert mapper.fieldType().name().equals(values[0].name()) : "Expected FieldMapper for field " + values[0].name(); if (!fragInfo.getSubInfos().isEmpty() && (containsBrokenAnalysis(mapper.fieldType().indexAnalyzer()))) { /* This is a special case where broken analysis like WDF is used for term-vector creation at index-time * which can potentially mess up the offsets. To prevent a SAIIOBException we need to resort * the fragments based on their offsets rather than using soley the positions as it is done in * the FastVectorHighlighter. Yet, this is really a lucene problem and should be fixed in lucene rather * than in this hack... aka. "we are are working on in!" */ final List<SubInfo> subInfos = fragInfo.getSubInfos(); CollectionUtil.introSort(subInfos, (o1, o2) -> { int startOffset = o1.getTermsOffsets().get(0).getStartOffset(); int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset(); return FragmentBuilderHelper.compare(startOffset, startOffset2); }); return new WeightedFragInfo(Math.min(fragInfo.getSubInfos().get(0).getTermsOffsets().get(0).getStartOffset(), fragInfo.getStartOffset()), fragInfo.getEndOffset(), subInfos, fragInfo.getTotalBoost()); } else { return fragInfo; } }