private String frags2string(final TextFragment[] frags, final HighlightOpts opts) { final StringBuilder sb = new StringBuilder(); String sep = ""; for (final TextFragment f : frags) { final String fragStr = f.toString(); log.trace("found fragment {}", f); sb.append(sep); sb.append(opts.joinHi ? fragStr.replaceAll(opts.patternExpr, "$1") : fragStr); sep = opts.fragSep; } return sb.toString(); }
private String frags2string(final TextFragment[] frags, final HighlightOpts opts) { final StringBuilder sb = new StringBuilder(); String sep = ""; for (final TextFragment f : frags) { final String fragStr = f.toString(); log.trace("found fragment {}", f); sb.append(sep); sb.append(opts.joinHi ? fragStr.replaceAll(opts.patternExpr, "$1") : fragStr); sep = opts.fragSep; } return sb.toString(); }
/** * Highlights chosen terms in a text, extracting the most relevant sections. The document text is analysed in chunks to record * hit statistics across the document. After accumulating stats, the fragments with the highest scores are returned as an * array of strings in order of score (contiguous fragments are merged into one in their original order to improve * readability) * * @param text * text to highlight terms in * @param maxNumFragments * the maximum number of fragments. * * @return highlighted text fragments (between 0 and maxNumFragments number of fragments) * @throws InvalidTokenOffsetsException * thrown if any token's endOffset exceeds the provided text's length */ public final String[] getBestFragments(TokenStream tokenStream, String text, int maxNumFragments) throws IOException, InvalidTokenOffsetsException { maxNumFragments = Math.max(1, maxNumFragments); // sanity check TextFragment[] frag = getBestTextFragments(tokenStream, text, true, maxNumFragments); // Get text ArrayList<String> fragTexts = new ArrayList<String>(); for (int i = 0; i < frag.length; i++) { if ((frag[i] != null) && (frag[i].getScore() > 0)) { fragTexts.add(frag[i].toString()); } } return fragTexts.toArray(new String[0]); }
@Override public void onRecordAddedToResultSet(OLuceneQueryContext queryContext, OContextualRecordId recordId, Document ret, final ScoreDoc score) { recordId.setContext(new HashMap<String, Object>() {{ HashMap<String, TextFragment[]> frag = queryContext.getFragments(); frag.entrySet().stream().forEach(f -> { TextFragment[] fragments = f.getValue(); StringBuilder hlField = new StringBuilder(); for (int j = 0; j < fragments.length; j++) { if ((fragments[j] != null) && (fragments[j].getScore() > 0)) { hlField.append(fragments[j].toString()); } } put("$" + f.getKey() + "_hl", hlField.toString()); }); put("$score", score.score); }}); }
fragTexts.add(frag[i].toString());
private String getExcerpt(Analyzer analyzer, IndexSearcher searcher, ScoreDoc doc) throws IOException { StringBuilder excerpt = new StringBuilder(); for (IndexableField field : searcher.getIndexReader().document(doc.doc).getFields()) { String name = field.name(); // only full text or analyzed fields if (name.startsWith(FieldNames.FULLTEXT) || name.startsWith(FieldNames.ANALYZED_FIELD_PREFIX)) { String text = field.stringValue(); TokenStream tokenStream = analyzer.tokenStream(name, text); try { TextFragment[] textFragments = highlighter.getBestTextFragments(tokenStream, text, true, 2); if (textFragments != null && textFragments.length > 0) { for (TextFragment fragment : textFragments) { if (excerpt.length() > 0) { excerpt.append("..."); } excerpt.append(fragment.toString()); } break; } } catch (InvalidTokenOffsetsException e) { LOG.error("higlighting failed", e); } } } return excerpt.toString(); }
private String getExcerpt(Analyzer analyzer, IndexSearcher searcher, ScoreDoc doc) throws IOException { StringBuilder excerpt = new StringBuilder(); for (IndexableField field : searcher.getIndexReader().document(doc.doc).getFields()) { String name = field.name(); // only full text or analyzed fields if (name.startsWith(FieldNames.FULLTEXT) || name.startsWith(FieldNames.ANALYZED_FIELD_PREFIX)) { String text = field.stringValue(); TokenStream tokenStream = analyzer.tokenStream(name, text); try { TextFragment[] textFragments = highlighter.getBestTextFragments(tokenStream, text, true, 2); if (textFragments != null && textFragments.length > 0) { for (TextFragment fragment : textFragments) { if (excerpt.length() > 0) { excerpt.append("..."); } excerpt.append(fragment.toString()); } break; } } catch (InvalidTokenOffsetsException e) { LOG.error("higlighting failed", e); } } } return excerpt.toString(); }
columnNameToExcerpts.put(columnName, fragment.toString());
columnNameToExcerpts.put(columnName, fragment.toString());
protected final List<String> getBestFragments( Query query, Formatter formatter, TokenStream tokenStream, String text, int maxNumFragments ) throws IOException { Highlighter highlighter = new Highlighter( formatter, new CleaningEncoder(), new QueryScorer( query ) ); highlighter.setTextFragmenter( new OneLineFragmenter() ); tokenStream.reset(); maxNumFragments = Math.max( 1, maxNumFragments ); // sanity check TextFragment[] frag = highlighter.getBestTextFragments( tokenStream, text, false, maxNumFragments ); // Get text ArrayList<String> fragTexts = new ArrayList<String>( maxNumFragments ); for ( int i = 0; i < frag.length; i++ ) { if ( ( frag[i] != null ) && ( frag[i].getScore() > 0 ) ) { fragTexts.add( frag[i].toString() ); } } return fragTexts; }
protected final List<String> getBestFragments( Query query, Formatter formatter, TokenStream tokenStream, String text, int maxNumFragments ) throws IOException { Highlighter highlighter = new Highlighter( formatter, new CleaningEncoder(), new QueryScorer( query ) ); highlighter.setTextFragmenter( new OneLineFragmenter() ); maxNumFragments = Math.max( 1, maxNumFragments ); // sanity check TextFragment[] frag; // Get text ArrayList<String> fragTexts = new ArrayList<>( maxNumFragments ); try { frag = highlighter.getBestTextFragments( tokenStream, text, false, maxNumFragments ); for ( int i = 0; i < frag.length; i++ ) { if ( ( frag[i] != null ) && ( frag[i].getScore() > 0 ) ) { fragTexts.add( frag[i].toString() ); } } } catch ( InvalidTokenOffsetsException e ) { // empty? } return fragTexts; }
protected final List<String> getBestFragments( Query query, Formatter formatter, TokenStream tokenStream, String text, int maxNumFragments ) throws IOException { Highlighter highlighter = new Highlighter( formatter, new CleaningEncoder(), new QueryScorer( query ) ); highlighter.setTextFragmenter( new OneLineFragmenter() ); maxNumFragments = Math.max( 1, maxNumFragments ); // sanity check TextFragment[] frag; // Get text ArrayList<String> fragTexts = new ArrayList<String>( maxNumFragments ); try { frag = highlighter.getBestTextFragments( tokenStream, text, false, maxNumFragments ); for ( int i = 0; i < frag.length; i++ ) { if ( ( frag[i] != null ) && ( frag[i].getScore() > 0 ) ) { fragTexts.add( frag[i].toString() ); } } } catch ( InvalidTokenOffsetsException e ) { // empty? } return fragTexts; }
protected final List<String> getBestFragments( Query query, Formatter formatter, TokenStream tokenStream, String text, int maxNumFragments ) throws IOException { Highlighter highlighter = new Highlighter( formatter, new CleaningEncoder(), new QueryScorer( query ) ); highlighter.setTextFragmenter( new OneLineFragmenter() ); maxNumFragments = Math.max( 1, maxNumFragments ); // sanity check TextFragment[] frag; // Get text ArrayList<String> fragTexts = new ArrayList<>( maxNumFragments ); try { frag = highlighter.getBestTextFragments( tokenStream, text, false, maxNumFragments ); for ( int i = 0; i < frag.length; i++ ) { if ( ( frag[i] != null ) && ( frag[i].getScore() > 0 ) ) { fragTexts.add( frag[i].toString() ); } } } catch ( InvalidTokenOffsetsException e ) { // empty? } return fragTexts; }
for (TextFragment fragment: frags) { if ((fragment != null) && (fragment.getScore() > 0)) { fragTexts.add(fragment.toString());
private void handleHighlight(List<LumongoHighlighter> highlighterList, ScoredResult.Builder srBuilder, org.bson.Document doc) { for (LumongoHighlighter highlighter : highlighterList) { HighlightRequest highlightRequest = highlighter.getHighlight(); String indexField = highlightRequest.getField(); String storedFieldName = indexConfig.getStoredFieldName(indexField); if (storedFieldName != null) { HighlightResult.Builder highLightResult = HighlightResult.newBuilder(); highLightResult.setField(storedFieldName); Object storeFieldValues = ResultHelper.getValueFromMongoDocument(doc, storedFieldName); LumongoUtil.handleLists(storeFieldValues, (value) -> { String content = value.toString(); TokenStream tokenStream = perFieldAnalyzer.tokenStream(indexField, content); try { TextFragment[] bestTextFragments = highlighter .getBestTextFragments(tokenStream, content, false, highlightRequest.getNumberOfFragments()); for (TextFragment bestTextFragment : bestTextFragments) { if (bestTextFragment != null && bestTextFragment.getScore() > 0) { highLightResult.addFragments(bestTextFragment.toString()); } } } catch (Exception e) { throw new RuntimeException(e); } }); srBuilder.addHighlightResult(highLightResult); } } }