edu.illinois.cs.cogcomp.lbjava.learn.Lexicon java code examples

/**
 * This method returns the number of features. This implementation assumes the 
 * lexicon is populated, but that's not always the case (with SVM for example appears
 * to not always have a populated lexicon). In these cases, this method may be overriden.
 * @return the number of featues.
 */
protected int getNumberFeatures() {
  return lexicon.size();
}

public Set<String> getTagValues() {
  Lexicon labelLexicon = taggerKnown.getLabelLexicon();
  Set<String> tagSet = new HashSet();
  for (int i =0; i < labelLexicon.size(); ++i) {
    tagSet.add(labelLexicon.lookupKey(i).getStringValue());
  }
  return tagSet;
}

/**
 * Returns the size of the lexicon after any pruning that may have taken place or 0 if the
 * lexicon's location isn't known.
 **/
public int getPrunedLexiconSize() {
  if ((lexicon == null || lexicon.size() == 0) && readLexiconOnDemand) {
    ExceptionlessInputStream in =
        ExceptionlessInputStream.openCompressedStream(lexFilePath);
    int result = Lexicon.readPrunedSize(in);
    in.close();
    return result;
  }
  return lexicon == null ? 0 : lexicon.getCutoff();
}

/**
 * Returns this learner's feature lexicon after discarding any feature counts it may have been
 * storing. This method is likely only useful when the lexicon and its counts are currently
 * stored on disk and {@link #readLexiconOnDemand(String)} or {@link #readLexiconOnDemand(URL)}
 * has already been called, in which case the lexicon is read from disk without wasting time
 * loading the counts.
 **/
public Lexicon getLexiconDiscardCounts() {
  if (readLexiconOnDemand && (lexicon == null || lexicon.size() == 0))
    lexicon = Lexicon.readLexicon(lexFilePath, false);
  else
    lexicon.countFeatures(Lexicon.CountPolicy.none);
  return lexicon;
}

/**
 * This function gets the feature from the lexicon using the id. Use this function if you use
 * either getFeatureId or convert to avoid off-by-one errors.
 */
public static edu.illinois.cs.cogcomp.lbjava.classify.Feature getFeature(Lexicon lexicon, int id) {
  return lexicon.lookupKey(id - 1);
}

if (labelLexicon.size() > 2 || solverType.equals("MCSVM_CS")) {
  newLabelLexicon = new Lexicon();
  boolean same = true;
  for (int i = 0; i < allExamples.size(); i++) {
    Feature label = labelLexicon.lookupKey(allLabels.get(i));
    int newLabel = newLabelLexicon.lookup(label, true);
    same &= newLabel == allLabels.get(i);
    allLabels.set(i, newLabel);
  if (same && newLabelLexicon.size() == labelLexicon.size())
    newLabelLexicon = labelLexicon;
  else if (newLabelLexicon.size() > labelLexicon.size()) {
    System.err
        .println("LBJava ERROR: SupportVectorMachine: new label lexicon is too big!");
    System.exit(1);
  } else {
    int N = newLabelLexicon.size();
    predictions = new FVector(N);
    for (int i = 0; i < N; ++i)
numClasses = newLabelLexicon.size();
for (int i = 0; i < numClasses && !conjunctiveLabels; ++i)
  conjunctiveLabels = newLabelLexicon.lookupKey(i).isConjunctive();
      new DiscretePrimitiveStringFeature(labeler.containingPackage, labeler.name, "",
          allowableValues[1], (short) 1, (short) 2);
  int p = newLabelLexicon.lookup(f);
  int positive = 0;

          countPolicy == Lexicon.CountPolicy.perClass ? labelIndexes[0] : -1;
      for (int i = 0; i < featureIndexes.length; ++i) {
        lexicon.lookup(lexicon.lookupKey(featureIndexes[i]), true, labelIndex);
lexiconSize = preExtractLearner.getLexicon().size();

/**
 * @see edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.LexiconOptimizer#identifyUselessFeatures()
 */
@Override
protected int[] identifyUselessFeatures() {
  
  // compile the whitelist
  HashSet<Feature> whitelist = compileWhitelist(lexicon);
  // look at each feature in the lexicon, any with zero weights can be safely discarded.
  int [] all = new int [this.lexicon.size()];
  int count = 0;
  for (Object e : lexicon.getMap().entrySet()) {
    @SuppressWarnings("unchecked")
    Entry<Feature, Integer> entry = (Entry<Feature, Integer>) e;
    if (!whitelist.contains(entry.getKey())) {
      int fi = entry.getValue();
      double wt = getWeight(fi);
      if (wt < this.threshold) {
        all[count] = fi;
        count++;
      }
    }
  }
  int[] useless = new int[count];
  System.arraycopy(all, 0, useless, 0, count);
  Arrays.sort(useless);
  return useless;
}

        (short) labeler.allowableValues().length);
if (labelLexicon.contains(f)) {
  int key = labelLexicon.lookup(f);
  score =
      ((BiasedRandomWeightVector) network.get(key)).dot(exampleFeatures,
    ((BiasedRandomWeightVector) network.get(l)).dot(exampleFeatures,
        exampleValues);
result.put(labelLexicon.lookupKey(l).getStringValue(), score);

  return (Object[]) example;
if (readLexiconOnDemand && (lexicon == null || lexicon.size() == 0)) {
  readLexicon(lexFilePath);
  readLexiconOnDemand = false;
Lexicon.CountPolicy countPolicy = lexicon.getCountPolicy();
int labelIndex = -1;
    Feature label = labelVector.getFeature(f);
    if (label.isDiscrete())
      labelArray[f] = labelLexicon.lookup(label, true);
    else
      labelArray[f] = labelLexicon.lookup(label.getFeatureKey(labelLexicon), true);
    labelValues[f] += label.getStrength();
    createPrediction(labelArray[f]);
  Feature feature = featureVector.getFeature(f);
  exampleArrayFeatures[f] =
      lexicon.lookup(feature.getFeatureKey(lexicon, training, labelIndex), training,
          labelIndex);
  exampleArrayValues[f] += feature.getStrength();

pruneCutoff = size();
int[] swapMap = new int[pruneCutoff];
while (pruneCutoff > 0 && isPruned(pruneCutoff - 1, policy)) {
  --pruneCutoff;
  swapMap[pruneCutoff] = pruneCutoff;
  if (isPruned(i, policy)) {
    pruneCutoff--;

/**
 * Sets the value of {@link #parentLexicon} and makes sure that any features marked for removal
 * in this lexicon are the identical objects also present in the parent. This is useful in
 * particular just after lexicons have been read from disk.
 *
 * @param p The new parent lexicon.
 **/
public void setParent(Lexicon p) {
  parentLexicon = p;
  int N = lexiconInv.size();
  for (int i = 0; i < N; ++i) {
    Feature f = lexiconInv.get(i);
    if (f != null && parents.get(i) < 0) {
      Feature pf = p.lookupKey(p.lookup(f));
      if (pf == null) {
        System.err.println("LBJava ERROR: Can't find feature " + f
            + " in parent lexicon.");
        new Exception().printStackTrace();
        System.exit(1);
      }
      lexiconInv.set(i, pf);
      if (lexicon != null)
        lexicon.put(pf, lexicon.remove(f));
    }
  }
}

id = lexicon.lookup(f, trainingMode, -1);
if (id == lexicon.getCutoff())
  return -1;
else {

Lexicon lexicon = preExtractLearner.getLexicon();
if (!policy.isNone() && lexicon.getCountPolicy() == Lexicon.CountPolicy.none)
  throw new IllegalArgumentException(
      "LBJava ERROR: BatchTrainer.pruneDataset: Can't prune with policy '" + policy
afp.setIncludePruned(true);
int[] swapMap = lexicon.prune(policy);
          && lexicon.isPruned(featureIndexes[unpruned - 1], labelIndexes[0],
              policy))
        --unpruned;
        if (lexicon.isPruned(featureIndexes[i], labelIndexes[0], policy)) {
          int t = featureIndexes[i];
          featureIndexes[i] = featureIndexes[--unpruned];
lexiconSize = lexicon.getCutoff();
preExtractLearner.saveLexicon();

if (newLabelLexicon.contains(f)) {
  key = newLabelLexicon.lookup(f);
  score = score(exampleFeatures, exampleValues, key);

          + "training with per class feature counts.");
lazyMapCreation();
Integer I = (Integer) lexicon.get(f);
    return getCutoff();
  lexicon.put(f, new Integer(key));
  lexiconInv.add(f);
  incrementCount(key, label);
  return key;
  incrementCount(index, label);
return index;

/**
 * <!-- lookup(Feature,boolean) --> Looks up a feature's index by calling
 * <code>lookup(f, training,
 * -1)</code>. See {@link #lookup(Feature,boolean,int)} for more details.
 *
 * @param f The feature to look up.
 * @param training Whether or not the learner is currently training.
 * @return The integer key that the feature maps to.
 **/
public int lookup(Feature f, boolean training) {
  return lookup(f, training, -1);
}

/**
 * Sets the labeler.
 *
 * @param l A labeling classifier.
 **/
public void setLabeler(Classifier l) {
  if (l == null || l.allowableValues().length != 2) {
    System.err.println("Error: " + name
        + ": An LTU must be given a single binary label classifier.");
    new Exception().printStackTrace();
    System.exit(1);
  }
  super.setLabeler(l);
  allowableValues = l.allowableValues();
  labelLexicon.clear();
  labelLexicon.lookup(new DiscretePrimitiveStringFeature(l.containingPackage, l.name, "",
      allowableValues[0], (short) 0, (short) 2), true);
  labelLexicon.lookup(new DiscretePrimitiveStringFeature(l.containingPackage, l.name, "",
      allowableValues[1], (short) 1, (short) 2), true);
  createPrediction(0);
  createPrediction(1);
}

/**
 * This function gets the feature from the lexicon using the id. Use this function if you use
 * either getFeatureId or convert to avoid off-by-one errors.
 */
public static edu.illinois.cs.cogcomp.lbjava.classify.Feature getFeature(Lexicon lexicon, int id) {
  return lexicon.lookupKey(id - 1);
}

int numberfeatures = lex.size();
int[] all = new int[numberfeatures];
TIntHashSet defunct = new TIntHashSet();
for (Object e : lex.getMap().entrySet()) {
  Entry<Feature, Integer> entry = (Entry<Feature, Integer>) e;
  if (!whitelist.contains(entry.getKey())) {

Javadoc

A Lexicon contains a mapping from Features to integers. The integer key of a feature is returned by the #lookup(Feature) method. If the feature is not already in the lexicon, then it will be added to the lexicon, and thus lookup calls can be made without the need to check if an entry already exists. The integer keys are incremented in ascending order starting from 0 as features are added to the lexicon.

The map is implemented as a HashMap by default and the Lexicon class has similar functionality. This class also maintains a second Vector of integers to their associated features for fast reverse lookup using the #lookupKey(int) method.

Most used methods

size
Returns the number of features currently stored in #lexicon.
lookupKey
Does a reverse lexicon lookup and returns the Featureassociated with the given integer key, and nul
getCutoff
Returns the value of #pruneCutoff, or #size() if #pruneCutoff is -1.
lookup
Looks up the given feature in the lexicon, possibly counting it and/or expanding the lexicon to acc
<init>
Creates an empty lexicon with the given encoding.
clear
Clears the data structures associated with this instance.
clone
Returns a deep clone of this lexicon implemented as a HashMap.
contains
Returns true if the given feature is already in the lexicon (whether it's past the #pruneCutoff or
countFeatures
Call this method to initialize the lexicon to count feature occurrences on each call to lookup(feat
discardPrunedFeatures
Discard features at the provided indices. This operation is performed last to first so we can do it
getChildFeature
Used to lookup the children of conjunctive and referring features during training, this method check
getCountPolicy
Returns the feature counting policy currently employed by this lexicon.

Popular in Java

Reading from database using SQL prepared statement
getResourceAsStream (ClassLoader)
notifyDataSetChanged (ArrayAdapter)
setContentView (Activity)
HttpServer (com.sun.net.httpserver)
This class implements a simple HTTP server. A HttpServer is bound to an IP address and port number a
EOFException (java.io)
Thrown when a program encounters the end of a file or stream during an input operation.
TimeUnit (java.util.concurrent)
A TimeUnit represents time durations at a given unit of granularity and provides utility methods to
StringUtils (org.apache.commons.lang)
Operations on java.lang.String that arenull safe. * IsEmpty/IsBlank - checks if a String contains
Color (java.awt)
The Color class is used to encapsulate colors in the default sRGB color space or colors in arbitrary
JTextField (javax.swing)
Top plugins for Android Studio

How to useLexicon in edu.illinois.cs.cogcomp.lbjava.learn

Best Java code snippets using edu.illinois.cs.cogcomp.lbjava.learn.Lexicon (Showing top 20 results out of 315)

How to use
Lexicon
in
edu.illinois.cs.cogcomp.lbjava.learn