org.apache.lucene.search.similarities.DefaultSimilarity.<init> java code examples

/**
 * Constructor requiring an IndexReader.
 */
public MoreLikeThis(IndexReader ir) {
 this(ir, new DefaultSimilarity());
}

/**
 * Constructor requiring an IndexReader.
 */
public MoreLikeThis(IndexReader ir) {
 this(ir, new DefaultSimilarity());
}

/**
 * Constructor requiring an IndexReader.
 */
public XMoreLikeThis(IndexReader ir) {
  this(ir, new DefaultSimilarity());
}

public Similarity getSimilarity() {
 return similarity == null ? new DefaultSimilarity() : similarity;
}

public Similarity getSimilarity() {
 return similarity == null ? new DefaultSimilarity() : similarity;
}

 public class MyCustomSimilarity extends PerFieldSimilarityWrapper {
  @Override
  public Similarity get(String fieldName) {
    if (fieldName.equals("author")) {
      return new CustomAuthorSimilarity();
    }
    else {
      return new DefaultSimilarity();
    }
  }
}

public WikiIndex(int maxHits, String indexPath, String searchField, boolean approximate) {
  this.maxHits = maxHits;
  this.indexPath = indexPath;
  this.searchField = searchField;
  this.similarity = approximate ? new ApproximateSimilarity() : new DefaultSimilarity();
}

public WikiIndex(int maxHits, String indexPath, String searchField, boolean approximate) {
  this.maxHits = maxHits;
  this.indexPath = indexPath;
  this.searchField = searchField;
  this.similarity = approximate ? new ApproximateSimilarity() : new DefaultSimilarity();
}

/**
 * uses defaultSimilarity to compute idf. DefaultSimilarity computes idf as
 * 1 + log (numDocs/ docFreq + 1)
 * 
 * @param reader
 * @param field
 * @return
 * @throws IOException
 */
public static Map<String, Float> getIdfs(IndexReader reader, String field)
    throws IOException {
  // DefaultSimilarity computes idf as 1 + log (numDocs/ docFreq + 1)
  return getIdfs(reader, field, new DefaultSimilarity());
}

/**
 * @deprecated use {@link ZoieSystem#buildDefaultInstance(File, ZoieIndexableInterpreter, ZoieConfig)}
 * @param idxDir
 * @param interpreter
 * @param batchSize
 * @param batchDelay
 * @param realtime
 * @param versionComparator
 */
@Deprecated
public static <D> ZoieSystem<IndexReader, D> buildDefaultInstance(File idxDir,
  ZoieIndexableInterpreter<D> interpreter, int batchSize, long batchDelay, boolean realtime,
  Comparator<String> versionComparator) {
 return buildDefaultInstance(idxDir, interpreter, new StandardAnalyzer(Version.LUCENE_43),
  new DefaultSimilarity(), batchSize, batchDelay, realtime, versionComparator);
}

/**
 * @deprecated use {@link ZoieSystem#buildDefaultInstance(File, ZoieIndexableInterpreter, ZoieConfig)}
 * @param idxDir
 * @param interpreter
 * @param batchSize
 * @param batchDelay
 * @param realtime
 * @param versionComparator
 */
@Deprecated
public static <D> ZoieSystem<IndexReader, D> buildDefaultInstance(File idxDir,
  ZoieIndexableInterpreter<D> interpreter, int batchSize, long batchDelay, boolean realtime,
  Comparator<String> versionComparator) {
 return buildDefaultInstance(idxDir, interpreter, new StandardAnalyzer(Version.LUCENE_43),
  new DefaultSimilarity(), batchSize, batchDelay, realtime, versionComparator);
}

public TermVectorsFilter(Fields termVectorsByField, Fields topLevelFields, Set<String> selectedFields, @Nullable AggregatedDfs dfs) {
  this.fields = termVectorsByField;
  this.topLevelFields = topLevelFields;
  this.selectedFields = selectedFields;
  this.dfs = dfs;
  this.scoreTerms = new HashMap<>();
  this.sizes = AtomicLongMap.create();
  this.similarity = new DefaultSimilarity();
}

 DefaultSimilarity similarity = new DefaultSimilarity();
int docnum = reader.numDocs();
Fields fields = MultiFields.getFields(reader);
for (String field : fields) {
  Terms terms = fields.terms(field);
  TermsEnum termsEnum = terms.iterator(null);
  while (termsEnum.next() != null) {
    double idf = similarity.idf(termsEnum.docFreq(), docnum);
    System.out.println("" + field + ":" + termsEnum.term().utf8ToString() + " idf=" + idf);
  }
}

 String field;
FieldsEnum fieldsiterator;
TermsEnum termsiterator;
//To Simplify, you can rely on DefaultSimilarity to calculate tf and idf for you.
DefaultSimilarity freqcalculator = new DefaultSimilarity()
//numDocs and maxDoc are not the same thing:
int numDocs = reader.numDocs();
int maxDoc = reader.maxDoc();

for (int i=0; i<maxDoc; i++) {
  if (reader.isDeleted(i))
    continue;
  fieldsiterator = reader.getTermVectors(i).iterator();
  while (field = fieldsiterator.next()) {
    termsiterator = fieldsiterator.terms().iterator();
    while (terms.next()) {
      //id = document id, field = field name
      //String representations of the current term
      String termtext = termsiterator.term().utf8ToString();
      //Get idf, using docfreq from the reader.
      //I haven't tested this, and I'm not quite 100% sure of the context of this method.
      //If it doesn't work, idfalternate below should.
      int idf = termsiterator.docfreq();
      int idfalternate = freqcalculator.idf(reader.docFreq(field, termsiterator.term()), numDocs);
    }
  }
}

@Override
public byte[] serializeKey(final Object obj) {
  final ITermDocKey entry = (ITermDocKey) obj;
  final String termText = entry.getToken();
  
  final double termWeight = entry.getLocalTermWeight();
  
  /*
   * See: http://lucene.apache.org/core/old_versioned_docs/versions/3_0_2/api/all/org/apache/lucene/search/Similarity.html
   * 
   * For more information on the round-trip of normalized term weight.
   */
  final DefaultSimilarity similarity = new DefaultSimilarity(); 
  final long termWeightCompact = similarity.encodeNormValue((float) termWeight);
  
  final IV docId = (IV)entry.getDocId();
  final IKeyBuilder keyBuilder = getKeyBuilder();
  keyBuilder.reset();
  // the token text (or its successor as desired).
  keyBuilder
      .appendText(termText, true/* unicode */, false/* successor */);
  keyBuilder.append(termWeightCompact);
  IVUtility.encode(keyBuilder, docId);
  final byte[] key = keyBuilder.getKey();
  if (log.isDebugEnabled()) {
    log.debug("{" + termText + "," + docId + "}, key="
        + BytesUtil.toString(key));
  }
  return key;
}

@Override
public Query rewrite(IndexReader reader) throws IOException {
  XMoreLikeThis mlt = new XMoreLikeThis(reader, similarity == null ? new DefaultSimilarity() : similarity);
  mlt.setFieldNames(moreLikeFields);
  mlt.setAnalyzer(analyzer);
  mlt.setMinTermFreq(minTermFrequency);
  mlt.setMinDocFreq(minDocFreq);
  mlt.setMaxDocFreq(maxDocFreq);
  mlt.setMaxQueryTerms(maxQueryTerms);
  mlt.setMinWordLen(minWordLen);
  mlt.setMaxWordLen(maxWordLen);
  mlt.setStopWords(stopWords);
  mlt.setBoost(boostTerms);
  mlt.setBoostFactor(boostTermsFactor);
  if (this.unlikeText != null || this.unlikeFields != null) {
    handleUnlike(mlt, this.unlikeText, this.unlikeFields);
  }
  
  return createQuery(mlt);
}

@Override
public byte[] serializeKey(final Object obj) {
  final ITermDocKey entry = (ITermDocKey) obj;
  final String termText = entry.getToken();
  
  final double termWeight = entry.getLocalTermWeight();
  
  /*
   * See: http://lucene.apache.org/core/old_versioned_docs/versions/3_0_2/api/all/org/apache/lucene/search/Similarity.html
   * 
   * For more information on the round-trip of normalized term weight.
   */
  final DefaultSimilarity similarity = new DefaultSimilarity(); 
  final long termWeightCompact = similarity.encodeNormValue((float) termWeight);
  
  final IV docId = (IV)entry.getDocId();
  final IKeyBuilder keyBuilder = getKeyBuilder();
  keyBuilder.reset();
  // the token text (or its successor as desired).
  keyBuilder
      .appendText(termText, true/* unicode */, false/* successor */);
  keyBuilder.append(termWeightCompact);
  IVUtility.encode(keyBuilder, docId);
  final byte[] key = keyBuilder.getKey();
  if (log.isDebugEnabled()) {
    log.debug("{" + termText + "," + docId + "}, key="
        + BytesUtil.toString(key));
  }
  return key;
}

final DefaultSimilarity similarity = new DefaultSimilarity();

protected static RandomIndexWriter newRandomIndexWriter(final Directory dir,
                            final Analyzer analyzer,
                            final Codec codec)
throws IOException {
 return newRandomIndexWriter(dir, analyzer, codec,
  newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)
 .setCodec(codec).setMergePolicy(newLogMergePolicy())
 .setSimilarity(new DefaultSimilarity()));
}

protected static RandomIndexWriter newRandomIndexWriter(final Directory dir,
                            final Analyzer analyzer,
                            final Codec codec)
throws IOException {
 return newRandomIndexWriter(dir, analyzer, codec,
  newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)
 .setCodec(codec).setMergePolicy(newLogMergePolicy())
 .setSimilarity(new DefaultSimilarity()));
}

Popular methods of DefaultSimilarity

Popular in Java

Start an intent from android
putExtra (Intent)
getSupportFragmentManager (FragmentActivity)
getSystemService (Context)
HttpServer (com.sun.net.httpserver)
This class implements a simple HTTP server. A HttpServer is bound to an IP address and port number a
ByteBuffer (java.nio)
A buffer for bytes. A byte buffer can be created in either one of the following ways: * #allocate
LinkedHashMap (java.util)
LinkedHashMap is an implementation of Map that guarantees iteration order. All optional operations a
BorderLayout (java.awt)
A border layout lays out a container, arranging and resizing its components to fit in five regions:
Container (java.awt)
A generic Abstract Window Toolkit(AWT) container object is a component that can contain other AWT co
JLabel (javax.swing)
CodeWhisperer alternatives

How to use org.apache.lucene.search.similarities.DefaultSimilarityconstructor

Best Java code snippets using org.apache.lucene.search.similarities.DefaultSimilarity.<init> (Showing top 20 results out of 315)

How to use
org.apache.lucene.search.similarities.DefaultSimilarity
constructor