opennlp.tools.namefind.NameContextGenerator java code examples

/**
 * Creates a new name finder event stream using the specified data stream and context generator.
 * @param dataStream The data stream of events.
 * @param type null or overrides the type parameter in the provided samples
 * @param contextGenerator The context generator used to generate features for the event stream.
 */
public NameFinderEventStream(ObjectStream<NameSample> dataStream, String type,
               NameContextGenerator contextGenerator, SequenceCodec<String> codec) {
 super(dataStream);
 this.codec = codec;
 if (codec == null) {
  this.codec = new BioCodec();
 }
 this.contextGenerator = contextGenerator;
 this.contextGenerator.addFeatureGenerator(
   new WindowFeatureGenerator(additionalContextFeatureGenerator, 8, 8));
 this.defaultType = type;
}

/**
 * Forgets all adaptive data which was collected during previous calls to one
 * of the find methods.
 *
 * This method is typical called at the end of a document.
 */
public void clearAdaptiveData() {
 contextGenerator.clearAdaptiveData();
}

public static List<Event> generateEvents(String[] sentence, String[] outcomes,
                     NameContextGenerator cg) {
 List<Event> events = new ArrayList<>(outcomes.length);
 for (int i = 0; i < outcomes.length; i++) {
  events.add(new Event(outcomes[i], cg.getContext(i, sentence, outcomes,null)));
 }
 cg.updateAdaptiveData(sentence, outcomes);
 return events;
}

/**
 * Generates name tags for the given sequence, typically a sentence, returning
 * token spans for any identified names.
 *
 * @param tokens an array of the tokens or words of the sequence, typically a sentence.
 * @param additionalContext features which are based on context outside of the
 *     sentence but which should also be used.
 *
 * @return an array of spans for each of the names identified.
 */
public Span[] find(String[] tokens, String[][] additionalContext) {
 additionalContextFeatureGenerator.setCurrentContext(additionalContext);
 bestSequence = model.bestSequence(tokens, additionalContext, contextGenerator, sequenceValidator);
 List<String> c = bestSequence.getOutcomes();
 contextGenerator.updateAdaptiveData(tokens, c.toArray(new String[c.size()]));
 Span[] spans = seqCodec.decode(c);
 spans = setProbs(spans);
 return spans;
}

@Override
public Sequence read() throws IOException {
 NameSample sample = psi.read();
 if (sample != null) {
  String[] sentence = sample.getSentence();
  String[] tags = seqCodec.encode(sample.getNames(), sentence.length);
  Event[] events = new Event[sentence.length];
  for (int i = 0; i < sentence.length; i++) {
   // it is safe to pass the tags as previous tags because
   // the context generator does not look for non predicted tags
   String[] context;
   if (useOutcomes) {
    context = pcg.getContext(i, sentence, tags, null);
   }
   else {
    context = pcg.getContext(i, sentence, null, null);
   }
   events[i] = new Event(tags[i], context);
  }
  return new Sequence<>(events,sample);
 }
 else {
  return null;
 }
}

/**
 * Generates name tags for the given sequence, typically a sentence, returning
 * token spans for any identified names.
 *
 * @param tokens an array of the tokens or words of the sequence, typically a sentence.
 * @param additionalContext features which are based on context outside of the
 *     sentence but which should also be used.
 *
 * @return an array of spans for each of the names identified.
 */
public Span[] find(String[] tokens, String[][] additionalContext) {
 additionalContextFeatureGenerator.setCurrentContext(additionalContext);
 bestSequence = model.bestSequence(tokens, additionalContext, contextGenerator, sequenceValidator);
 List<String> c = bestSequence.getOutcomes();
 contextGenerator.updateAdaptiveData(tokens, c.toArray(new String[c.size()]));
 Span[] spans = seqCodec.decode(c);
 spans = setProbs(spans);
 return spans;
}

@Override
public Sequence read() throws IOException {
 NameSample sample = psi.read();
 if (sample != null) {
  String[] sentence = sample.getSentence();
  String[] tags = seqCodec.encode(sample.getNames(), sentence.length);
  Event[] events = new Event[sentence.length];
  for (int i = 0; i < sentence.length; i++) {
   // it is safe to pass the tags as previous tags because
   // the context generator does not look for non predicted tags
   String[] context;
   if (useOutcomes) {
    context = pcg.getContext(i, sentence, tags, null);
   }
   else {
    context = pcg.getContext(i, sentence, null, null);
   }
   events[i] = new Event(tags[i], context);
  }
  return new Sequence<>(events,sample);
 }
 else {
  return null;
 }
}

public static List<Event> generateEvents(String[] sentence, String[] outcomes,
                     NameContextGenerator cg) {
 List<Event> events = new ArrayList<>(outcomes.length);
 for (int i = 0; i < outcomes.length; i++) {
  events.add(new Event(outcomes[i], cg.getContext(i, sentence, outcomes,null)));
 }
 cg.updateAdaptiveData(sentence, outcomes);
 return events;
}

@Override
protected Iterator<Event> createEvents(NameSample sample) {
 if (sample.isClearAdaptiveDataSet()) {
  contextGenerator.clearAdaptiveData();
 }
 Span[] names = sample.getNames();
 if (!Objects.isNull(this.defaultType)) {
  overrideType(names);
 }
 String[] outcomes = codec.encode(names, sample.getSentence().length);
 // String outcomes[] = generateOutcomes(sample.getNames(), type, sample.getSentence().length);
 additionalContextFeatureGenerator.setCurrentContext(sample.getAdditionalContext());
 String[] tokens = new String[sample.getSentence().length];
 for (int i = 0; i < sample.getSentence().length; i++) {
  tokens[i] = sample.getSentence()[i];
 }
 return generateEvents(tokens, outcomes, contextGenerator).iterator();
}

public NameFinderME(TokenNameFinderModel model) {
 TokenNameFinderFactory factory = model.getFactory();
 seqCodec = factory.createSequenceCodec();
 sequenceValidator = seqCodec.createSequenceValidator();
 this.model = model.getNameFinderSequenceModel();
 contextGenerator = factory.createContextGenerator();
 // TODO: We should deprecate this. And come up with a better solution!
 contextGenerator.addFeatureGenerator(
     new WindowFeatureGenerator(additionalContextFeatureGenerator, 8, 8));
}

/**
 * Generates name tags for the given sequence, typically a sentence, returning
 * token spans for any identified names.
 *
 * @param tokens an array of the tokens or words of the sequence, typically a sentence.
 * @param additionalContext features which are based on context outside of the
 *     sentence but which should also be used.
 *
 * @return an array of spans for each of the names identified.
 */
public Span[] find(String[] tokens, String[][] additionalContext) {
 additionalContextFeatureGenerator.setCurrentContext(additionalContext);
 bestSequence = model.bestSequence(tokens, additionalContext, contextGenerator, sequenceValidator);
 List<String> c = bestSequence.getOutcomes();
 contextGenerator.updateAdaptiveData(tokens, c.toArray(new String[c.size()]));
 Span[] spans = seqCodec.decode(c);
 spans = setProbs(spans);
 return spans;
}

@Override
public Sequence read() throws IOException {
 NameSample sample = psi.read();
 if (sample != null) {
  String[] sentence = sample.getSentence();
  String[] tags = seqCodec.encode(sample.getNames(), sentence.length);
  Event[] events = new Event[sentence.length];
  for (int i = 0; i < sentence.length; i++) {
   // it is safe to pass the tags as previous tags because
   // the context generator does not look for non predicted tags
   String[] context;
   if (useOutcomes) {
    context = pcg.getContext(i, sentence, tags, null);
   }
   else {
    context = pcg.getContext(i, sentence, null, null);
   }
   events[i] = new Event(tags[i], context);
  }
  return new Sequence<>(events,sample);
 }
 else {
  return null;
 }
}

public static List<Event> generateEvents(String[] sentence, String[] outcomes,
                     NameContextGenerator cg) {
 List<Event> events = new ArrayList<>(outcomes.length);
 for (int i = 0; i < outcomes.length; i++) {
  events.add(new Event(outcomes[i], cg.getContext(i, sentence, outcomes,null)));
 }
 cg.updateAdaptiveData(sentence, outcomes);
 return events;
}

/**
 * Forgets all adaptive data which was collected during previous calls to one
 * of the find methods.
 *
 * This method is typical called at the end of a document.
 */
public void clearAdaptiveData() {
 contextGenerator.clearAdaptiveData();
}

/**
 * Creates a new name finder event stream using the specified data stream and context generator.
 * @param dataStream The data stream of events.
 * @param type null or overrides the type parameter in the provided samples
 * @param contextGenerator The context generator used to generate features for the event stream.
 */
public NameFinderEventStream(ObjectStream<NameSample> dataStream, String type,
               NameContextGenerator contextGenerator, SequenceCodec<String> codec) {
 super(dataStream);
 this.codec = codec;
 if (codec == null) {
  this.codec = new BioCodec();
 }
 this.contextGenerator = contextGenerator;
 this.contextGenerator.addFeatureGenerator(
   new WindowFeatureGenerator(additionalContextFeatureGenerator, 8, 8));
 this.defaultType = type;
}

/**
 * Forgets all adaptive data which was collected during previous calls to one
 * of the find methods.
 *
 * This method is typical called at the end of a document.
 */
public void clearAdaptiveData() {
 contextGenerator.clearAdaptiveData();
}

/**
 * Creates a new name finder event stream using the specified data stream and context generator.
 * @param dataStream The data stream of events.
 * @param type null or overrides the type parameter in the provided samples
 * @param contextGenerator The context generator used to generate features for the event stream.
 */
public NameFinderEventStream(ObjectStream<NameSample> dataStream, String type,
               NameContextGenerator contextGenerator, SequenceCodec<String> codec) {
 super(dataStream);
 this.codec = codec;
 if (codec == null) {
  this.codec = new BioCodec();
 }
 this.contextGenerator = contextGenerator;
 this.contextGenerator.addFeatureGenerator(
   new WindowFeatureGenerator(additionalContextFeatureGenerator, 8, 8));
 this.defaultType = type;
}

@Override
protected Iterator<Event> createEvents(NameSample sample) {
 if (sample.isClearAdaptiveDataSet()) {
  contextGenerator.clearAdaptiveData();
 }
 Span[] names = sample.getNames();
 if (!Objects.isNull(this.defaultType)) {
  overrideType(names);
 }
 String[] outcomes = codec.encode(names, sample.getSentence().length);
 // String outcomes[] = generateOutcomes(sample.getNames(), type, sample.getSentence().length);
 additionalContextFeatureGenerator.setCurrentContext(sample.getAdditionalContext());
 String[] tokens = new String[sample.getSentence().length];
 for (int i = 0; i < sample.getSentence().length; i++) {
  tokens[i] = sample.getSentence()[i];
 }
 return generateEvents(tokens, outcomes, contextGenerator).iterator();
}

public NameFinderME(TokenNameFinderModel model) {
 TokenNameFinderFactory factory = model.getFactory();
 seqCodec = factory.createSequenceCodec();
 sequenceValidator = seqCodec.createSequenceValidator();
 this.model = model.getNameFinderSequenceModel();
 contextGenerator = factory.createContextGenerator();
 // TODO: We should deprecate this. And come up with a better solution!
 contextGenerator.addFeatureGenerator(
     new WindowFeatureGenerator(additionalContextFeatureGenerator, 8, 8));
}

@Override
protected Iterator<Event> createEvents(NameSample sample) {
 if (sample.isClearAdaptiveDataSet()) {
  contextGenerator.clearAdaptiveData();
 }
 Span[] names = sample.getNames();
 if (!Objects.isNull(this.defaultType)) {
  overrideType(names);
 }
 String[] outcomes = codec.encode(names, sample.getSentence().length);
 // String outcomes[] = generateOutcomes(sample.getNames(), type, sample.getSentence().length);
 additionalContextFeatureGenerator.setCurrentContext(sample.getAdditionalContext());
 String[] tokens = new String[sample.getSentence().length];
 for (int i = 0; i < sample.getSentence().length; i++) {
  tokens[i] = sample.getSentence()[i];
 }
 return generateEvents(tokens, outcomes, contextGenerator).iterator();
}

Javadoc

Interface for generating the context for an name finder by specifying a set of geature generators.

Most used methods

addFeatureGenerator
Adds a feature generator to this set of feature generators.
clearAdaptiveData
Informs all the feature generators for a name finder that the context of the adaptive data (typicall
getContext
updateAdaptiveData
Informs all the feature generators for a name finder that the specified tokens have been classified

Popular in Java

Reading from database using SQL prepared statement
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
findViewById (Activity)
getSupportFragmentManager (FragmentActivity)
MessageFormat (java.text)
Produces concatenated messages in language-neutral way. New code should probably use java.util.Forma
Dictionary (java.util)
Note: Do not use this class since it is obsolete. Please use the Map interface for new implementatio
TreeMap (java.util)
Walk the nodes of the tree left-to-right or right-to-left. Note that in descending iterations, next
XPath (javax.xml.xpath)
XPath provides access to the XPath evaluation environment and expressions. Evaluation of XPath Expr
StringUtils (org.apache.commons.lang)
Operations on java.lang.String that arenull safe. * IsEmpty/IsBlank - checks if a String contains
GridLayout (java.awt)
The GridLayout class is a layout manager that lays out a container's components in a rectangular gri
Top plugins for Android Studio

How to useNameContextGenerator in opennlp.tools.namefind

Best Java code snippets using opennlp.tools.namefind.NameContextGenerator (Showing top 20 results out of 315)

How to use
NameContextGenerator
in
opennlp.tools.namefind