opennlp.tools.namefind.NameSampleTypeFilter java code examples

sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream);

@Test
public void testMultiFilter() throws IOException {
 final String[] types = new String[] {person, organization};
 filter = new NameSampleTypeFilter(types, sampleStream(text));
 NameSample ns = filter.read();
 Map<String, List<Span>> collect = Arrays.stream(ns.getNames())
   .collect(Collectors.groupingBy(Span::getType));
 Assert.assertEquals(2, collect.size());
 Assert.assertEquals(2, collect.get(person).size());
 Assert.assertEquals(1, collect.get(organization).size());
}

sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream);

@Test
public void testNoFilter() throws IOException {
 final String[] types = new String[] {};
 filter = new NameSampleTypeFilter(types, sampleStream(text));
 NameSample ns = filter.read();
 Assert.assertEquals(0, ns.getNames().length);
}

sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream);

@Test
public void testSingleFilter() throws IOException {
 final String[] types = new String[] {organization};
 filter = new NameSampleTypeFilter(types, sampleStream(text));
 NameSample ns = filter.read();
 Assert.assertEquals(1, ns.getNames().length);
 Assert.assertEquals(organization, ns.getNames()[0].getType());
}

private void crossEval(TrainingParameters params, String type, double expectedScore)
  throws IOException {
 try (ObjectStream<NameSample> samples = createNameSampleStream()) {
  TokenNameFinderCrossValidator cv = new TokenNameFinderCrossValidator("eng", null,
    params, new TokenNameFinderFactory());
  ObjectStream<NameSample> filteredSamples;
  if (type != null) {
   filteredSamples = new NameSampleTypeFilter(new String[] {type}, samples);
  }
  else {
   filteredSamples = samples;
  }
  cv.evaluate(filteredSamples, 5);
  Assert.assertEquals(expectedScore, cv.getFMeasure().getFMeasure(), 0.001d);
 }
}

/**
 * Construct an evaluator. It takes from the properties a model,
 * a testset and the format of the testset. Every other parameter
 * set in the training, e.g., beamsize, decoding, etc., is serialized
 * in the model.
 * @param props the properties parameter
 * @throws IOException the io exception
 */
public Evaluate(final Properties props) throws IOException {
 
 String lang = props.getProperty("language");
 String clearFeatures = props.getProperty("clearFeatures");
 String model = props.getProperty("model");
 String testSet = props.getProperty("testset");
 String corpusFormat = props.getProperty("corpusFormat");
 String netypes = props.getProperty("types");
 
 testSamples = AbstractTrainer.getNameStream(testSet, clearFeatures, corpusFormat);
 if (netypes != Flags.DEFAULT_NE_TYPES) {
  String[] neTypes = netypes.split(",");
  testSamples = new NameSampleTypeFilter(neTypes, testSamples);
 }
 nercModels.putIfAbsent(lang, new TokenNameFinderModel(new FileInputStream(model)));
 nameFinder = new NameFinderME(nercModels.get(lang));
}

/**
 * Construct a trainer with training and test data, and with options for
 * language, beamsize for decoding, sequence codec and corpus format (conll or opennlp).
 * @param params the training parameters
 * @throws IOException
 *           io exception
 */
public AbstractTrainer(final TrainingParameters params) throws IOException {
 
 this.lang = Flags.getLanguage(params);
 this.clearTrainingFeatures = Flags.getClearTrainingFeatures(params);
 this.clearEvaluationFeatures = Flags.getClearEvaluationFeatures(params);
 this.corpusFormat = Flags.getCorpusFormat(params);
 this.trainData = params.getSettings().get("TrainSet");
 this.testData = params.getSettings().get("TestSet");
 trainSamples = getNameStream(trainData, clearTrainingFeatures, corpusFormat);
 testSamples = getNameStream(testData, clearEvaluationFeatures, corpusFormat);
 this.beamSize = Flags.getBeamsize(params);
 this.sequenceCodec = Flags.getSequenceCodec(params);
 if (params.getSettings().get("Types") != null) {
  String netypes = params.getSettings().get("Types");
  String[] neTypes = netypes.split(",");
  trainSamples = new NameSampleTypeFilter(neTypes, trainSamples);
  testSamples = new NameSampleTypeFilter(neTypes, testSamples);
 }
}

public CrossValidator(final TrainingParameters params) throws IOException {
 
 this.lang = Flags.getLanguage(params);
 String clearFeatures = Flags.getClearTrainingFeatures(params);
 this.corpusFormat = Flags.getCorpusFormat(params);
 this.trainData = params.getSettings().get("TrainSet");
 trainSamples = AbstractTrainer.getNameStream(trainData, clearFeatures, corpusFormat);
 this.beamSize = Flags.getBeamsize(params);
 this.folds = Flags.getFolds(params);
 this.sequenceCodec =  TokenNameFinderFactory.instantiateSequenceCodec(getSequenceCodec(Flags.getSequenceCodec(params)));
 if (params.getSettings().get("Types") != null) {
  String netypes = params.getSettings().get("Types");
  String[] neTypes = netypes.split(",");
  trainSamples = new NameSampleTypeFilter(neTypes, trainSamples);
 }
 createNameFactory(params);
 getEvalListeners(params);
}

sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream);

sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream);

sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream);

sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream);

sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream);

sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream);

Javadoc

A stream which removes Name Samples which do not have a certain type.

Most used methods

Popular in Java

Finding current android device location
setContentView (Activity)
getExternalFilesDir (Context)
getSystemService (Context)
SecureRandom (java.security)
This class generates cryptographically secure pseudo-random numbers. It is best to invoke SecureRand
Collections (java.util)
This class consists exclusively of static methods that operate on or return collections. It contains
AtomicInteger (java.util.concurrent.atomic)
An int value that may be updated atomically. See the java.util.concurrent.atomic package specificati
StringUtils (org.apache.commons.lang)
Operations on java.lang.String that arenull safe. * IsEmpty/IsBlank - checks if a String contains
Logger (org.apache.log4j)
This is the central class in the log4j package. Most logging operations, except configuration, are d
JComboBox (javax.swing)
Best IntelliJ plugins

How to useNameSampleTypeFilter in opennlp.tools.namefind

Best Java code snippets using opennlp.tools.namefind.NameSampleTypeFilter (Showing top 16 results out of 315)

How to use
NameSampleTypeFilter
in
opennlp.tools.namefind