/** * Sets a new feature set * * @param features * an array of features which are added to the current features. If no features have * been set yet the feature set is initialized. */ public void setFeatures(TcFeature... features) { if (this.features == null) { this.features = new TcFeatureSet(features); } else { for (TcFeature f : features) { this.features.add(f); } } }
protected void checkFeatureSpace() { if (features == null || features.isEmpty()) { throw new IllegalStateException("The feature space contains no feature extractors"); } }
private static TcFeatureSet getFeatureNamesMinusOne(TcFeature[] names, int i) { TcFeatureSet nameList = new TcFeatureSet(names); nameList.setFeatureSetName(LEFTOUT_FE + names[i].getDiscriminatorValue()); nameList.remove(i); return nameList; }
public TcFeatureSet getFeatureSet() { return new TcFeatureSet( TcFeatureFactory.create(WordNGram.class) ); }
public TcFeatureSet(TcFeature... features) { for (TcFeature f : features) { add(f); } }
@Override public Object getDiscriminatorValue() { if (featureSetName != null) { return featureSetName; } StringBuilder sb = new StringBuilder(); int size = this.size(); for (int i = 0; i < size; i++) { TcFeature tcFeature = get(i); sb.append(tcFeature.getDiscriminatorValue()); if (i + 1 < size()) { sb.append(", "); } } return sb.toString(); }
public TcFeatureSet getFeatureSet() { return new TcFeatureSet( TcFeatureFactory.create(DiffNrOfTokensPairFeatureExtractor.class)); }
public TcFeatureSet(String featureSetName, TcFeature... features) { this.featureSetName = featureSetName; for (TcFeature f : features) { add(f); } }
@Override public Object getDiscriminatorValue() { if (featureSetName != null) { return featureSetName; } StringBuilder sb = new StringBuilder(); int size = this.size(); for (int i = 0; i < size; i++) { TcFeature tcFeature = get(i); sb.append(tcFeature.getDiscriminatorValue()); if (i + 1 < size()) { sb.append(", "); } } return sb.toString(); }
/** * Sets several features to be used in an experiment. If this method is used a single * {@link TcFeatureSet} is created in the background. If multiple feature sets shall be used use * {@link #featureSets(TcFeatureSet...)} Calling this method will remove all previously set * feature configurations * * @param features * one or more features * @return the builder object */ public ExperimentBuilder features(TcFeature... features) { if (features == null) { throw new NullPointerException("The features are null"); } this.featureSets = new ArrayList<>(); TcFeatureSet set = new TcFeatureSet(); for (TcFeature f : features) { set.add(f); } this.featureSets.add(set); return this; }
public TcFeatureSet getFeatureSet() { return new TcFeatureSet(TcFeatureFactory.create(CharacterNGram.class)); }
private static TcFeatureSet getFeatureNamesMinusOne(TcFeature[] names, int i) { TcFeatureSet nameList = new TcFeatureSet(names); nameList.setFeatureSetName(LEFTOUT_FE + names[i].getDiscriminatorValue()); nameList.remove(i); return nameList; }
public TcFeatureSet(String featureSetName, TcFeature... features) { this.featureSetName = featureSetName; for (TcFeature f : features) { add(f); } }
protected void sanityCheckFeatureSet(TcFeatureSet featureSet) { if (featureSet == null) { throw new NullPointerException("The provided feature set is null"); } if (featureSet.isEmpty()) { throw new IllegalStateException("The provided feature set contains no features"); } }
/** * Sets several features to be used in an experiment. If this method is used a * single {@link TcFeatureSet} is created in the background. If multiple feature * sets shall be used use {@link #featureSets(TcFeatureSet...)} Calling this * method will remove all previously set feature configurations * * @param features one or more features * @return the builder object */ public ExperimentBuilder features(TcFeature... features) { if (features == null) { throw new NullPointerException("The features are null"); } this.featureSets = new ArrayList<>(); TcFeatureSet set = new TcFeatureSet(); for (TcFeature f : features) { set.add(f); } this.featureSets.add(set); return this; }
public TcFeatureSet getFeatureSet() { return new TcFeatureSet(TcFeatureFactory.create(TokenRatioPerDocument.class), TcFeatureFactory.create(WordNGram.class, WordNGram.PARAM_NGRAM_USE_TOP_K, 600, WordNGram.PARAM_NGRAM_MIN_N, 1, WordNGram.PARAM_NGRAM_MAX_N, 3)); }
public TcFeatureSet(TcFeature... features) { for (TcFeature f : features) { add(f); } }
private void sanityCheckFeatureSet(TcFeatureSet featureSet) { if (featureSet == null) { throw new NullPointerException("The provided feature set is null"); } if (featureSet.isEmpty()) { throw new IllegalStateException("The provided feature set contains no features"); } }
TcFeatureSet set = new TcFeatureSet(); set.add(create(TargetSurfaceFormContextFeature.class, TargetSurfaceFormContextFeature.PARAM_RELATIVE_TARGET_ANNOTATION_INDEX, -1)); set.add(create(TargetSurfaceFormContextFeature.class, TargetSurfaceFormContextFeature.PARAM_RELATIVE_TARGET_ANNOTATION_INDEX, 0)); set.add(create(TargetSurfaceFormContextFeature.class, TargetSurfaceFormContextFeature.PARAM_RELATIVE_TARGET_ANNOTATION_INDEX, +1)); set.add(create(TokenContext.class, TokenContext.PARAM_TARGET_INDEX, -1, TokenContext.PARAM_NGRAM_USE_TOP_K, 1000)); set.add(create(TokenContext.class, TokenContext.PARAM_TARGET_INDEX, 0, TokenContext.PARAM_NGRAM_USE_TOP_K, 1000)); set.add(create(TokenContext.class, TokenContext.PARAM_TARGET_INDEX, +1, TokenContext.PARAM_NGRAM_USE_TOP_K, 1000)); set.add(create(IsFirstLetterCapitalized.class)); set.add(create(LuceneCharacterNGram.class, LuceneCharacterNGram.PARAM_NGRAM_MIN_N, 1, LuceneCharacterNGram.PARAM_NGRAM_MAX_N, 1, LuceneCharacterNGram.PARAM_NGRAM_USE_TOP_K, 50)); set.add(create(LuceneCharacterNGram.class, LuceneCharacterNGram.PARAM_NGRAM_MIN_N, 2, LuceneCharacterNGram.PARAM_NGRAM_MAX_N, 2, LuceneCharacterNGram.PARAM_NGRAM_USE_TOP_K, 750)); set.add(create(LuceneCharacterNGram.class, LuceneCharacterNGram.PARAM_NGRAM_MIN_N, 3, LuceneCharacterNGram.PARAM_NGRAM_MAX_N, 3, LuceneCharacterNGram.PARAM_NGRAM_USE_TOP_K, 750)); set.add(create(LuceneCharacterNGram.class, LuceneCharacterNGram.PARAM_NGRAM_MIN_N, 4, LuceneCharacterNGram.PARAM_NGRAM_MAX_N, 4, LuceneCharacterNGram.PARAM_NGRAM_USE_TOP_K, 750));
public TcFeatureSet getFeatureSet() { return new TcFeatureSet(TcFeatureFactory.create(TokenRatioPerDocument.class), TcFeatureFactory.create(CharacterNGram.class, CharacterNGram.PARAM_NGRAM_USE_TOP_K, 50)); }