private void getSuggestion(Term term, IndexReader ir, List<String> result) throws IOException { if (term == null) { return; } String[] toks = TABSPACE.split(term.text(), 0); for (String tok : toks) { //TODO below seems to be case insensitive ... for refs/defs this is bad SuggestWord[] words = checker.suggestSimilar(new Term(term.field(), tok), SPELLCHECK_SUGGEST_WORD_COUNT, ir, SuggestMode.SUGGEST_ALWAYS); for (SuggestWord w : words) { result.add(w.string); } } }
public TermWeight(IndexSearcher searcher, boolean needsScores, float boost, TermContext termStates) throws IOException { super(TermQuery.this); if (needsScores && termStates == null) { throw new IllegalStateException("termStates are required when scores are needed"); } this.needsScores = needsScores; this.termStates = termStates; this.similarity = searcher.getSimilarity(needsScores); final CollectionStatistics collectionStats; final TermStatistics termStats; if (needsScores) { collectionStats = searcher.collectionStatistics(term.field()); termStats = searcher.termStatistics(term, termStates); } else { // we do not need the actual stats, use fake stats with docFreq=maxDoc and ttf=-1 final int maxDoc = searcher.getIndexReader().maxDoc(); collectionStats = new CollectionStatistics(term.field(), maxDoc, -1, -1, -1); termStats = new TermStatistics(term.bytes(), maxDoc, -1); } this.stats = similarity.computeWeight(boost, collectionStats, termStats); }
private ImageSearchHits search(String[] hashes, GlobalFeature queryFeature, IndexReader reader) throws IOException { IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarity(new BaseSimilarity()); BooleanQuery query = null; BooleanQuery.Builder builder = new BooleanQuery.Builder(); for (int i = 0; i < hashes.length; i++) { builder.add(new BooleanClause(new TermQuery(new Term(hashesFieldName, hashes[i] + "")), BooleanClause.Occur.SHOULD)); } else builder.add(new BooleanClause(new TermQuery(new Term(hashesFieldName, hashes[i] + "")), BooleanClause.Occur.SHOULD)); query = builder.build(); TopDocs docs = searcher.search(query, maxResultsHashBased); double tmpScore; for (int i = 0; i < docs.scoreDocs.length; i++) { feature.setByteArrayRepresentation(reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).bytes, reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).offset, reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).length); tmpScore = queryFeature.getDistance(feature); assert (tmpScore >= 0);
@Override protected Object createValue(IndexReader reader, Entry entryKey) throws IOException { Entry entry = entryKey; String field = entry.field; if (reader.maxDoc() == reader.docFreq(new Term(field))) { return DocIdSet.EMPTY_DOCIDSET; } OpenBitSet res = new OpenBitSet(reader.maxDoc()); TermDocs termDocs = reader.termDocs(); TermEnum termEnum = reader.terms (new Term (field)); try { do { Term term = termEnum.term(); if (term==null || term.field() != field) break; termDocs.seek (termEnum); while (termDocs.next()) { res.fastSet(termDocs.doc()); } } while (termEnum.next()); } finally { termDocs.close(); termEnum.close(); } res.flip(0, reader.maxDoc()); return res; } }
public ImageSearchHits search(Document doc, IndexReader reader) throws IOException { GlobalFeature queryFeature = null; try { queryFeature = feature.getClass().newInstance(); } catch (InstantiationException | IllegalAccessException e) { e.printStackTrace(); return null; } if (useDocValues) { docValues = MultiDocValues.getBinaryValues(reader, featureFieldName); // find the id of the document in the reader, then do search ... TODO: find another way instead of calling the searcher every time. TopDocs topDocs = searcher.search(new TermQuery(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, doc.get(DocumentBuilder.FIELD_NAME_IDENTIFIER))), 1); if (topDocs.totalHits > 0) { int docID = topDocs.scoreDocs[0].doc; docValues.advanceExact(docID); queryFeature.setByteArrayRepresentation(docValues.binaryValue().bytes, docValues.binaryValue().offset, docValues.binaryValue().length); return search(MetricSpaces.generateBoostedQuery(queryFeature, numHashesUsedForQuery), queryFeature, searcher.getIndexReader()); } } else { queryFeature.setByteArrayRepresentation(doc.getBinaryValue(featureFieldName).bytes, doc.getBinaryValue(featureFieldName).offset, doc.getBinaryValue(featureFieldName).length); return search(MetricSpaces.generateBoostedQuery(queryFeature, numHashesUsedForQuery), queryFeature, reader); } return null; }
private BooleanQuery buildLineageQuery(final Collection<String> flowFileUuids) { // Create a query for all Events related to the FlowFiles of interest. We do this by adding all ID's as // "SHOULD" clauses and then setting the minimum required to 1. final BooleanQuery lineageQuery; if (flowFileUuids == null || flowFileUuids.isEmpty()) { lineageQuery = null; } else { lineageQuery = new BooleanQuery(); for (final String flowFileUuid : flowFileUuids) { lineageQuery.add(new TermQuery(new Term(SearchableFields.FlowFileUUID.getSearchableFieldName(), flowFileUuid)), Occur.SHOULD); } lineageQuery.setMinimumNumberShouldMatch(1); } return lineageQuery; }
boolean stops, boolean clusters, boolean corners) { BooleanQuery query = new BooleanQuery(); BooleanQuery termQuery = new BooleanQuery(); termQuery.add(new PrefixQuery(new Term("name", queryString)), BooleanClause.Occur.SHOULD); } else { List<String> list = new ArrayList<String>(); PhraseQuery phraseQuery = new PhraseQuery(); for (String phraseToken : token.substring(1, token.length() - 1).split(" ")) { phraseQuery.add(new Term("name", phraseToken.toLowerCase())); termQuery.add(new FuzzyQuery(new Term("name", token)), BooleanClause.Occur.SHOULD); termQuery.add(new TermQuery(new Term("code", token)), BooleanClause.Occur.SHOULD); typeQuery.add(new TermQuery(new Term("category", Category.STOP.name())), BooleanClause.Occur.SHOULD); typeQuery.add(new TermQuery(new Term("category", Category.CLUSTER.name())), BooleanClause.Occur.SHOULD); typeQuery.add(new TermQuery(new Term("category", Category.CORNER.name())), BooleanClause.Occur.SHOULD);
public ImageSearchHits search(Document doc, IndexReader reader) throws IOException { IndexSearcher is = new IndexSearcher(reader); TermQuery tq = new TermQuery(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, doc.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0])); TopDocs topDocs = is.search(tq, 1); if (topDocs.totalHits > 0) { return search(topDocs.scoreDocs[0].doc); } else return null; }
/** * Writes a document to contain the serialized version of {@code settings}, * with a {@link QueryBuilder#OBJUID} value set to * {@link #INDEX_ANALYSIS_SETTINGS_OBJUID}. An existing version of the * document is first deleted. * @param writer a defined, target instance * @param settings a defined instance * @throws IOException if I/O error occurs while writing Lucene */ public void write(IndexWriter writer, IndexAnalysisSettings settings) throws IOException { byte[] objser = settings.serialize(); writer.deleteDocuments(new Term(QueryBuilder.OBJUID, INDEX_ANALYSIS_SETTINGS_OBJUID)); Document doc = new Document(); StringField uidfield = new StringField(QueryBuilder.OBJUID, INDEX_ANALYSIS_SETTINGS_OBJUID, Field.Store.NO); doc.add(uidfield); doc.add(new StoredField(QueryBuilder.OBJSER, objser)); doc.add(new StoredField(QueryBuilder.OBJVER, INDEX_ANALYSIS_SETTINGS_OBJVER)); writer.addDocument(doc); }
private Long findTruckIdFromIndex(FullTextSession session, String itemDescription) { FullTextQuery q = session.createFullTextQuery( new TermQuery( new Term( "truck.items.description", itemDescription ) ), Driver.class ); q.setProjection( "truck.id" ); @SuppressWarnings("unchecked") List<Object[]> results = q.list(); if ( results.isEmpty() ) { return null; } return (Long) results.get( 0 )[0]; }
BooleanQuery.Builder builder = new BooleanQuery.Builder(); String pkey = Token.getKeyForClass(PatternsAnnotations.ProcessedTextAnnotation.class); for(String en2: en.getValue()){ if(!processedKey || !stopWords.contains(en2.toLowerCase())) builder.add(new BooleanClause(new TermQuery(new Term(en.getKey(), en2)), BooleanClause.Occur.MUST)); BooleanQuery query = builder.build(); TopDocs tp = searcher.search(query, Integer.MAX_VALUE); Set<String> sentids = new HashSet<>(); if (tp.totalHits > 0) { for (ScoreDoc s : tp.scoreDocs) { int docId = s.doc; Document d = searcher.doc(docId); sentids.add(d.get("sentid"));
flowFileIdQuery = null; } else { flowFileIdQuery = new BooleanQuery(); for (final String flowFileUuid : flowFileUuids) { flowFileIdQuery.add(new TermQuery(new Term(SearchableFields.FlowFileUUID.getSearchableFieldName(), flowFileUuid)), Occur.SHOULD); flowFileIdQuery.setMinimumNumberShouldMatch(1); final TopDocs uuidQueryTopDocs = searcher.getIndexSearcher().search(flowFileIdQuery, MAX_QUERY_RESULTS); final long searchEnd = System.nanoTime(); final Set<ProvenanceEventRecord> recs = docsToEventConverter.convert(uuidQueryTopDocs, searcher.getIndexSearcher().getIndexReader());
@Override public Map<Integer, Set<E>> getPatternsForAllTokens(String sentId) { try { TermQuery query = new TermQuery(new Term("sentid", sentId)); TopDocs tp = searcher.search(query,1); if (tp.totalHits > 0) { for (ScoreDoc s : tp.scoreDocs) { int docId = s.doc; Document d = searcher.doc(docId); byte[] st = d.getBinaryValue("patterns").bytes; ByteArrayInputStream baip = new ByteArrayInputStream(st); ObjectInputStream ois = new ObjectInputStream(baip); return (Map<Integer, Set<E>>) ois.readObject(); } } else throw new RuntimeException("Why no patterns for sentid " + sentId + ". Number of documents in index are " + size()); }catch(IOException e){ throw new RuntimeException(e); } catch (ClassNotFoundException e) { throw new RuntimeException(e); } return null; }
/** populate the list of categories by reading the values from the categoryField in the index */ protected void scanCategories() throws IOException { TermEnum te = indexReader.terms(new Term(categoryFieldName)); final Set<String> c = categories; do { if (!te.term().field().equals(categoryFieldName)) break; c.add(te.term().text()); } while (te.next()); log.info("Scanned " + c.size() + " categories from index"); }
@Override public long getTotalTokenCount() { LuceneSearcher luceneSearcher = getLuceneSearcher(1); try { RegexpQuery query = new RegexpQuery(new Term("totalTokenCount", ".*")); TopDocs docs = luceneSearcher.searcher.search(query, 1000); // Integer.MAX_VALUE might cause OOE on wrong index if (docs.totalHits == 0) { throw new RuntimeException("Expected 'totalTokenCount' meta documents not found in 1grams index: " + luceneSearcher.directory); } else if (docs.totalHits > 1000) { throw new RuntimeException("Did not expect more than 1000 'totalTokenCount' meta documents: " + docs.totalHits + " in " + luceneSearcher.directory); } else { long result = 0; for (ScoreDoc scoreDoc : docs.scoreDocs) { long tmp = Long.parseLong(luceneSearcher.reader.document(scoreDoc.doc).get("totalTokenCount")); if (tmp > result) { // due to the way FrequencyIndexCreator adds these totalTokenCount fields, we must not sum them, // but take the largest one: result = tmp; } } return result; } } catch (IOException e) { throw new RuntimeException(e); } }
protected Query stringContains(Operation<?> operation, QueryMetadata metadata, boolean ignoreCase) { verifyArguments(operation); Path<?> path = getPath(operation.getArg(0)); String field = toField(path); String[] terms = convertEscaped(path, operation.getArg(1), metadata); if (terms.length > 1) { BooleanQuery bq = new BooleanQuery(); for (String s : terms) { bq.add(new WildcardQuery(new Term(field, "*" + s + "*")), Occur.MUST); } return bq; } return new WildcardQuery(new Term(field, "*" + terms[0] + "*")); }
for ( LeafReaderContext leafReaderContext : searcher.getIndexReader().leaves() ) searcher.search( new TermQuery( new Term( field, termsRef ) ), collector );