org.apache.lucene.index.Term java code examples

Refine search

private static void addIfAssigned( BooleanQuery.Builder builder, long node, String field )
{
  if ( node != -1 )
  {
    builder.add( new TermQuery( new Term( field, "" + node ) ), Occur.MUST );
  }
}

private void getSuggestion(Term term, IndexReader ir,
    List<String> result) throws IOException {
  if (term == null) {
    return;
  }
  String[] toks = TABSPACE.split(term.text(), 0);
  for (String tok : toks) {
    //TODO below seems to be case insensitive ... for refs/defs this is bad
    SuggestWord[] words = checker.suggestSimilar(new Term(term.field(), tok),
      SPELLCHECK_SUGGEST_WORD_COUNT, ir, SuggestMode.SUGGEST_ALWAYS);
    for (SuggestWord w : words) {
      result.add(w.string);
    }
  }
}

public TermWeight(IndexSearcher searcher, boolean needsScores,
  float boost, TermContext termStates) throws IOException {
 super(TermQuery.this);
 if (needsScores && termStates == null) {
  throw new IllegalStateException("termStates are required when scores are needed");
 }
 this.needsScores = needsScores;
 this.termStates = termStates;
 this.similarity = searcher.getSimilarity(needsScores);
 final CollectionStatistics collectionStats;
 final TermStatistics termStats;
 if (needsScores) {
  collectionStats = searcher.collectionStatistics(term.field());
  termStats = searcher.termStatistics(term, termStates);
 } else {
  // we do not need the actual stats, use fake stats with docFreq=maxDoc and ttf=-1
  final int maxDoc = searcher.getIndexReader().maxDoc();
  collectionStats = new CollectionStatistics(term.field(), maxDoc, -1, -1, -1);
  termStats = new TermStatistics(term.bytes(), maxDoc, -1);
 }
  this.stats = similarity.computeWeight(boost, collectionStats, termStats);
}

@Override
public String toString(String field) {
 StringBuilder buffer = new StringBuilder();
 if (term.field().equals(field))
  buffer.append(term.text());
 else
  buffer.append(term.toString());
 return buffer.toString();
}

/** Prints a user-readable version of this query. */
@Override
public String toString(String field) {
 StringBuilder buffer = new StringBuilder();
 if (!term.field().equals(field)) {
  buffer.append(term.field());
  buffer.append(":");
 }
 buffer.append(term.text());
 return buffer.toString();
}

private ImageSearchHits search(String[] hashes, GlobalFeature queryFeature, IndexReader reader) throws IOException {
  IndexSearcher searcher = new IndexSearcher(reader);
  searcher.setSimilarity(new BaseSimilarity());
  BooleanQuery query = null;
  BooleanQuery.Builder builder = new BooleanQuery.Builder();
  for (int i = 0; i < hashes.length; i++) {
        builder.add(new BooleanClause(new TermQuery(new Term(hashesFieldName, hashes[i] + "")), BooleanClause.Occur.SHOULD));
    } else
      builder.add(new BooleanClause(new TermQuery(new Term(hashesFieldName, hashes[i] + "")), BooleanClause.Occur.SHOULD));
  query = builder.build();
  TopDocs docs = searcher.search(query, maxResultsHashBased);
  double tmpScore;
  for (int i = 0; i < docs.scoreDocs.length; i++) {
    feature.setByteArrayRepresentation(reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).bytes,
        reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).offset,
        reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).length);
    tmpScore = queryFeature.getDistance(feature);
    assert (tmpScore >= 0);

 @Override
 protected Object createValue(IndexReader reader, Entry entryKey)
 throws IOException {
  Entry entry = entryKey;
  String field = entry.field;
    if (reader.maxDoc() == reader.docFreq(new Term(field))) {
   return DocIdSet.EMPTY_DOCIDSET;
  }
    OpenBitSet res = new OpenBitSet(reader.maxDoc());
  TermDocs termDocs = reader.termDocs();
  TermEnum termEnum = reader.terms (new Term (field));
  try {
   do {
    Term term = termEnum.term();
    if (term==null || term.field() != field) break;
    termDocs.seek (termEnum);
    while (termDocs.next()) {
     res.fastSet(termDocs.doc());
    }
   } while (termEnum.next());
  } finally {
   termDocs.close();
   termEnum.close();
  }
  res.flip(0, reader.maxDoc());
  return res;
 }
}

public ImageSearchHits search(Document doc, IndexReader reader) throws IOException {
  GlobalFeature queryFeature = null;
  try {
    queryFeature = feature.getClass().newInstance();
  } catch (InstantiationException | IllegalAccessException e) {
    e.printStackTrace();
    return null;
  }
  if (useDocValues) {
    docValues = MultiDocValues.getBinaryValues(reader, featureFieldName);
    // find the id of the document in the reader, then do search ... TODO: find another way instead of calling the searcher every time.
    TopDocs topDocs = searcher.search(new TermQuery(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, doc.get(DocumentBuilder.FIELD_NAME_IDENTIFIER))), 1);
    if (topDocs.totalHits > 0) {
      int docID = topDocs.scoreDocs[0].doc;
      docValues.advanceExact(docID);
      queryFeature.setByteArrayRepresentation(docValues.binaryValue().bytes, docValues.binaryValue().offset, docValues.binaryValue().length);
      return search(MetricSpaces.generateBoostedQuery(queryFeature, numHashesUsedForQuery), queryFeature, searcher.getIndexReader());
    }
  } else {
    queryFeature.setByteArrayRepresentation(doc.getBinaryValue(featureFieldName).bytes,
        doc.getBinaryValue(featureFieldName).offset,
        doc.getBinaryValue(featureFieldName).length);
    return search(MetricSpaces.generateBoostedQuery(queryFeature, numHashesUsedForQuery), queryFeature, reader);
  }
  return null;
}

private BooleanQuery buildLineageQuery(final Collection<String> flowFileUuids) {
  // Create a query for all Events related to the FlowFiles of interest. We do this by adding all ID's as
  // "SHOULD" clauses and then setting the minimum required to 1.
  final BooleanQuery lineageQuery;
  if (flowFileUuids == null || flowFileUuids.isEmpty()) {
    lineageQuery = null;
  } else {
    lineageQuery = new BooleanQuery();
    for (final String flowFileUuid : flowFileUuids) {
      lineageQuery.add(new TermQuery(new Term(SearchableFields.FlowFileUUID.getSearchableFieldName(), flowFileUuid)), Occur.SHOULD);
    }
    lineageQuery.setMinimumNumberShouldMatch(1);
  }
  return lineageQuery;
}

               boolean stops, boolean clusters, boolean corners) {
BooleanQuery query = new BooleanQuery();
BooleanQuery termQuery = new BooleanQuery();
  termQuery.add(new PrefixQuery(new Term("name", queryString)), BooleanClause.Occur.SHOULD);
} else {
  List<String> list = new ArrayList<String>();
      PhraseQuery phraseQuery = new PhraseQuery();
      for (String phraseToken : token.substring(1, token.length() - 1).split(" ")) {
        phraseQuery.add(new Term("name", phraseToken.toLowerCase()));
      termQuery.add(new FuzzyQuery(new Term("name", token)), BooleanClause.Occur.SHOULD);
      termQuery.add(new TermQuery(new Term("code", token)), BooleanClause.Occur.SHOULD);
    typeQuery.add(new TermQuery(new Term("category", Category.STOP.name())), BooleanClause.Occur.SHOULD);
    typeQuery.add(new TermQuery(new Term("category", Category.CLUSTER.name())), BooleanClause.Occur.SHOULD);
    typeQuery.add(new TermQuery(new Term("category", Category.CORNER.name())), BooleanClause.Occur.SHOULD);

public ImageSearchHits search(Document doc, IndexReader reader) throws IOException {
  IndexSearcher is = new IndexSearcher(reader);
  TermQuery tq = new TermQuery(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, doc.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]));
  TopDocs topDocs = is.search(tq, 1);
  if (topDocs.totalHits > 0) {
    return search(topDocs.scoreDocs[0].doc);
  } else return null;
}

/**
 * Writes a document to contain the serialized version of {@code settings},
 * with a {@link QueryBuilder#OBJUID} value set to
 * {@link #INDEX_ANALYSIS_SETTINGS_OBJUID}. An existing version of the
 * document is first deleted.
 * @param writer a defined, target instance
 * @param settings a defined instance
 * @throws IOException if I/O error occurs while writing Lucene
 */
public void write(IndexWriter writer, IndexAnalysisSettings settings)
    throws IOException {
  byte[] objser = settings.serialize();
  writer.deleteDocuments(new Term(QueryBuilder.OBJUID,
    INDEX_ANALYSIS_SETTINGS_OBJUID));
  Document doc = new Document();
  StringField uidfield = new StringField(QueryBuilder.OBJUID,
    INDEX_ANALYSIS_SETTINGS_OBJUID, Field.Store.NO);
  doc.add(uidfield);
  doc.add(new StoredField(QueryBuilder.OBJSER, objser));
  doc.add(new StoredField(QueryBuilder.OBJVER,
    INDEX_ANALYSIS_SETTINGS_OBJVER));
  writer.addDocument(doc);
}

private Long findTruckIdFromIndex(FullTextSession session, String itemDescription) {
  FullTextQuery q = session.createFullTextQuery(
      new TermQuery( new Term( "truck.items.description", itemDescription ) ), Driver.class );
  q.setProjection( "truck.id" );
  @SuppressWarnings("unchecked")
  List<Object[]> results = q.list();
  if ( results.isEmpty() ) {
    return null;
  }
  return (Long) results.get( 0 )[0];
}

BooleanQuery.Builder builder = new BooleanQuery.Builder();
String pkey  = Token.getKeyForClass(PatternsAnnotations.ProcessedTextAnnotation.class);
 for(String en2: en.getValue()){
   if(!processedKey || !stopWords.contains(en2.toLowerCase()))
    builder.add(new BooleanClause(new TermQuery(new Term(en.getKey(), en2)), BooleanClause.Occur.MUST));
BooleanQuery query = builder.build();
TopDocs tp = searcher.search(query, Integer.MAX_VALUE);
Set<String> sentids = new HashSet<>();
if (tp.totalHits > 0) {
 for (ScoreDoc s : tp.scoreDocs) {
  int docId = s.doc;
  Document d = searcher.doc(docId);
  sentids.add(d.get("sentid"));

  flowFileIdQuery = null;
} else {
  flowFileIdQuery = new BooleanQuery();
  for (final String flowFileUuid : flowFileUuids) {
    flowFileIdQuery.add(new TermQuery(new Term(SearchableFields.FlowFileUUID.getSearchableFieldName(), flowFileUuid)), Occur.SHOULD);
  flowFileIdQuery.setMinimumNumberShouldMatch(1);
final TopDocs uuidQueryTopDocs = searcher.getIndexSearcher().search(flowFileIdQuery, MAX_QUERY_RESULTS);
final long searchEnd = System.nanoTime();
final Set<ProvenanceEventRecord> recs = docsToEventConverter.convert(uuidQueryTopDocs, searcher.getIndexSearcher().getIndexReader());

@Override
public Map<Integer, Set<E>> getPatternsForAllTokens(String sentId) {
 try {
  TermQuery query = new TermQuery(new Term("sentid", sentId));
  TopDocs tp = searcher.search(query,1);
  if (tp.totalHits > 0) {
   for (ScoreDoc s : tp.scoreDocs) {
    int docId = s.doc;
    Document d = searcher.doc(docId);
    byte[] st = d.getBinaryValue("patterns").bytes;
    ByteArrayInputStream baip = new ByteArrayInputStream(st);
    ObjectInputStream ois = new ObjectInputStream(baip);
    return (Map<Integer, Set<E>>) ois.readObject();
   }
  } else
   throw new RuntimeException("Why no patterns for sentid " + sentId + ". Number of documents in index are " + size());
 }catch(IOException e){
  throw new RuntimeException(e);
 } catch (ClassNotFoundException e) {
  throw new RuntimeException(e);
 }
 return null;
}

/** populate the list of categories by reading the values from the categoryField in the index */
protected void scanCategories() throws IOException {
 TermEnum te = indexReader.terms(new Term(categoryFieldName));
 final Set<String> c = categories;
 
 do {
  if (!te.term().field().equals(categoryFieldName)) break;
  c.add(te.term().text());
 } while (te.next());
 
 log.info("Scanned " + c.size() + " categories from index");
}

@Override
public long getTotalTokenCount() {
 LuceneSearcher luceneSearcher = getLuceneSearcher(1);
 try {
  RegexpQuery query = new RegexpQuery(new Term("totalTokenCount", ".*"));
  TopDocs docs = luceneSearcher.searcher.search(query, 1000);  // Integer.MAX_VALUE might cause OOE on wrong index
  if (docs.totalHits == 0) {
   throw new RuntimeException("Expected 'totalTokenCount' meta documents not found in 1grams index: " + luceneSearcher.directory);
  } else if (docs.totalHits > 1000) {
   throw new RuntimeException("Did not expect more than 1000 'totalTokenCount' meta documents: " + docs.totalHits + " in " + luceneSearcher.directory);
  } else {
   long result = 0;
   for (ScoreDoc scoreDoc : docs.scoreDocs) {
    long tmp = Long.parseLong(luceneSearcher.reader.document(scoreDoc.doc).get("totalTokenCount"));
    if (tmp > result) {
     // due to the way FrequencyIndexCreator adds these totalTokenCount fields, we must not sum them,
     // but take the largest one:
     result = tmp;
    }
   }
   return result;
  }
 } catch (IOException e) {
  throw new RuntimeException(e);
 }
}

protected Query stringContains(Operation<?> operation, QueryMetadata metadata, boolean ignoreCase) {
  verifyArguments(operation);
  Path<?> path = getPath(operation.getArg(0));
  String field = toField(path);
  String[] terms = convertEscaped(path, operation.getArg(1), metadata);
  if (terms.length > 1) {
    BooleanQuery bq = new BooleanQuery();
    for (String s : terms) {
      bq.add(new WildcardQuery(new Term(field, "*" + s + "*")), Occur.MUST);
    }
    return bq;
  }
  return new WildcardQuery(new Term(field, "*" + terms[0] + "*"));
}

for ( LeafReaderContext leafReaderContext : searcher.getIndexReader().leaves() )
          searcher.search( new TermQuery( new Term( field, termsRef ) ), collector );

Javadoc

A Term represents a word from text. This is the unit of search. It is composed of two elements, the text of the word, as a string, and the name of the field that the text occurred in. Note that terms may represent more than words from text fields, but also things like dates, email addresses, urls, etc.

Most used methods

<init>
Constructs a Term with the given field and the bytes from a builder.Note that a null field value res
text
Returns the text of this term. In the case of words, this is simply the text of the word. In the cas
field
Returns the field of this term, an interned string. The field indicates the part of a document which
bytes
Returns the bytes of this term, these should not be modified.
equals
Compares two terms, returning true iff they have the same field and text.
hashCode
Combines the hashCode() of the field and the text.
toString
Returns human-readable form of the term text. If the term is not unicode, the raw bytes will be prin
compareTo
Compares two terms, returning an integer which is less than zero iff this term belongs after the arg
createTerm
Optimized construction of new Terms by reusing same field as this Term - avoids field.intern() overh
generate
getCoeff
getEntitiesList

Popular in Java

Finding current android device location
requestLocationUpdates (LocationManager)
getResourceAsStream (ClassLoader)
scheduleAtFixedRate (Timer)
ObjectMapper (com.fasterxml.jackson.databind)
ObjectMapper provides functionality for reading and writing JSON, either to and from basic POJOs (Pl
FileInputStream (java.io)
An input stream that reads bytes from a file. File file = ...finally if (in != null) in.clos
String (java.lang)
Iterator (java.util)
An iterator over a sequence of objects, such as a collection.If a collection has been changed since
TreeSet (java.util)
TreeSet is an implementation of SortedSet. All optional operations (adding and removing) are support
JTextField (javax.swing)
Top Sublime Text plugins

How to useTerm in org.apache.lucene.index

Best Java code snippets using org.apache.lucene.index.Term (Showing top 20 results out of 2,907)

Refine search

How to use
Term
in
org.apache.lucene.index