/** * Creates a new name finder event stream using the specified data stream and context generator. * @param dataStream The data stream of events. * @param type null or overrides the type parameter in the provided samples * @param contextGenerator The context generator used to generate features for the event stream. */ public NameFinderEventStream(ObjectStream<NameSample> dataStream, String type, NameContextGenerator contextGenerator, SequenceCodec<String> codec) { super(dataStream); this.codec = codec; if (codec == null) { this.codec = new BioCodec(); } this.contextGenerator = contextGenerator; this.contextGenerator.addFeatureGenerator( new WindowFeatureGenerator(additionalContextFeatureGenerator, 8, 8)); this.defaultType = type; }
/** * Forgets all adaptive data which was collected during previous calls to one * of the find methods. * * This method is typical called at the end of a document. */ public void clearAdaptiveData() { contextGenerator.clearAdaptiveData(); }
public static List<Event> generateEvents(String[] sentence, String[] outcomes, NameContextGenerator cg) { List<Event> events = new ArrayList<>(outcomes.length); for (int i = 0; i < outcomes.length; i++) { events.add(new Event(outcomes[i], cg.getContext(i, sentence, outcomes,null))); } cg.updateAdaptiveData(sentence, outcomes); return events; }
/** * Generates name tags for the given sequence, typically a sentence, returning * token spans for any identified names. * * @param tokens an array of the tokens or words of the sequence, typically a sentence. * @param additionalContext features which are based on context outside of the * sentence but which should also be used. * * @return an array of spans for each of the names identified. */ public Span[] find(String[] tokens, String[][] additionalContext) { additionalContextFeatureGenerator.setCurrentContext(additionalContext); bestSequence = model.bestSequence(tokens, additionalContext, contextGenerator, sequenceValidator); List<String> c = bestSequence.getOutcomes(); contextGenerator.updateAdaptiveData(tokens, c.toArray(new String[c.size()])); Span[] spans = seqCodec.decode(c); spans = setProbs(spans); return spans; }
@Override public Sequence read() throws IOException { NameSample sample = psi.read(); if (sample != null) { String[] sentence = sample.getSentence(); String[] tags = seqCodec.encode(sample.getNames(), sentence.length); Event[] events = new Event[sentence.length]; for (int i = 0; i < sentence.length; i++) { // it is safe to pass the tags as previous tags because // the context generator does not look for non predicted tags String[] context; if (useOutcomes) { context = pcg.getContext(i, sentence, tags, null); } else { context = pcg.getContext(i, sentence, null, null); } events[i] = new Event(tags[i], context); } return new Sequence<>(events,sample); } else { return null; } }
/** * Generates name tags for the given sequence, typically a sentence, returning * token spans for any identified names. * * @param tokens an array of the tokens or words of the sequence, typically a sentence. * @param additionalContext features which are based on context outside of the * sentence but which should also be used. * * @return an array of spans for each of the names identified. */ public Span[] find(String[] tokens, String[][] additionalContext) { additionalContextFeatureGenerator.setCurrentContext(additionalContext); bestSequence = model.bestSequence(tokens, additionalContext, contextGenerator, sequenceValidator); List<String> c = bestSequence.getOutcomes(); contextGenerator.updateAdaptiveData(tokens, c.toArray(new String[c.size()])); Span[] spans = seqCodec.decode(c); spans = setProbs(spans); return spans; }
@Override public Sequence read() throws IOException { NameSample sample = psi.read(); if (sample != null) { String[] sentence = sample.getSentence(); String[] tags = seqCodec.encode(sample.getNames(), sentence.length); Event[] events = new Event[sentence.length]; for (int i = 0; i < sentence.length; i++) { // it is safe to pass the tags as previous tags because // the context generator does not look for non predicted tags String[] context; if (useOutcomes) { context = pcg.getContext(i, sentence, tags, null); } else { context = pcg.getContext(i, sentence, null, null); } events[i] = new Event(tags[i], context); } return new Sequence<>(events,sample); } else { return null; } }
public static List<Event> generateEvents(String[] sentence, String[] outcomes, NameContextGenerator cg) { List<Event> events = new ArrayList<>(outcomes.length); for (int i = 0; i < outcomes.length; i++) { events.add(new Event(outcomes[i], cg.getContext(i, sentence, outcomes,null))); } cg.updateAdaptiveData(sentence, outcomes); return events; }
@Override protected Iterator<Event> createEvents(NameSample sample) { if (sample.isClearAdaptiveDataSet()) { contextGenerator.clearAdaptiveData(); } Span[] names = sample.getNames(); if (!Objects.isNull(this.defaultType)) { overrideType(names); } String[] outcomes = codec.encode(names, sample.getSentence().length); // String outcomes[] = generateOutcomes(sample.getNames(), type, sample.getSentence().length); additionalContextFeatureGenerator.setCurrentContext(sample.getAdditionalContext()); String[] tokens = new String[sample.getSentence().length]; for (int i = 0; i < sample.getSentence().length; i++) { tokens[i] = sample.getSentence()[i]; } return generateEvents(tokens, outcomes, contextGenerator).iterator(); }
public NameFinderME(TokenNameFinderModel model) { TokenNameFinderFactory factory = model.getFactory(); seqCodec = factory.createSequenceCodec(); sequenceValidator = seqCodec.createSequenceValidator(); this.model = model.getNameFinderSequenceModel(); contextGenerator = factory.createContextGenerator(); // TODO: We should deprecate this. And come up with a better solution! contextGenerator.addFeatureGenerator( new WindowFeatureGenerator(additionalContextFeatureGenerator, 8, 8)); }
/** * Generates name tags for the given sequence, typically a sentence, returning * token spans for any identified names. * * @param tokens an array of the tokens or words of the sequence, typically a sentence. * @param additionalContext features which are based on context outside of the * sentence but which should also be used. * * @return an array of spans for each of the names identified. */ public Span[] find(String[] tokens, String[][] additionalContext) { additionalContextFeatureGenerator.setCurrentContext(additionalContext); bestSequence = model.bestSequence(tokens, additionalContext, contextGenerator, sequenceValidator); List<String> c = bestSequence.getOutcomes(); contextGenerator.updateAdaptiveData(tokens, c.toArray(new String[c.size()])); Span[] spans = seqCodec.decode(c); spans = setProbs(spans); return spans; }
@Override public Sequence read() throws IOException { NameSample sample = psi.read(); if (sample != null) { String[] sentence = sample.getSentence(); String[] tags = seqCodec.encode(sample.getNames(), sentence.length); Event[] events = new Event[sentence.length]; for (int i = 0; i < sentence.length; i++) { // it is safe to pass the tags as previous tags because // the context generator does not look for non predicted tags String[] context; if (useOutcomes) { context = pcg.getContext(i, sentence, tags, null); } else { context = pcg.getContext(i, sentence, null, null); } events[i] = new Event(tags[i], context); } return new Sequence<>(events,sample); } else { return null; } }
public static List<Event> generateEvents(String[] sentence, String[] outcomes, NameContextGenerator cg) { List<Event> events = new ArrayList<>(outcomes.length); for (int i = 0; i < outcomes.length; i++) { events.add(new Event(outcomes[i], cg.getContext(i, sentence, outcomes,null))); } cg.updateAdaptiveData(sentence, outcomes); return events; }
/** * Forgets all adaptive data which was collected during previous calls to one * of the find methods. * * This method is typical called at the end of a document. */ public void clearAdaptiveData() { contextGenerator.clearAdaptiveData(); }
/** * Creates a new name finder event stream using the specified data stream and context generator. * @param dataStream The data stream of events. * @param type null or overrides the type parameter in the provided samples * @param contextGenerator The context generator used to generate features for the event stream. */ public NameFinderEventStream(ObjectStream<NameSample> dataStream, String type, NameContextGenerator contextGenerator, SequenceCodec<String> codec) { super(dataStream); this.codec = codec; if (codec == null) { this.codec = new BioCodec(); } this.contextGenerator = contextGenerator; this.contextGenerator.addFeatureGenerator( new WindowFeatureGenerator(additionalContextFeatureGenerator, 8, 8)); this.defaultType = type; }
/** * Forgets all adaptive data which was collected during previous calls to one * of the find methods. * * This method is typical called at the end of a document. */ public void clearAdaptiveData() { contextGenerator.clearAdaptiveData(); }
/** * Creates a new name finder event stream using the specified data stream and context generator. * @param dataStream The data stream of events. * @param type null or overrides the type parameter in the provided samples * @param contextGenerator The context generator used to generate features for the event stream. */ public NameFinderEventStream(ObjectStream<NameSample> dataStream, String type, NameContextGenerator contextGenerator, SequenceCodec<String> codec) { super(dataStream); this.codec = codec; if (codec == null) { this.codec = new BioCodec(); } this.contextGenerator = contextGenerator; this.contextGenerator.addFeatureGenerator( new WindowFeatureGenerator(additionalContextFeatureGenerator, 8, 8)); this.defaultType = type; }
@Override protected Iterator<Event> createEvents(NameSample sample) { if (sample.isClearAdaptiveDataSet()) { contextGenerator.clearAdaptiveData(); } Span[] names = sample.getNames(); if (!Objects.isNull(this.defaultType)) { overrideType(names); } String[] outcomes = codec.encode(names, sample.getSentence().length); // String outcomes[] = generateOutcomes(sample.getNames(), type, sample.getSentence().length); additionalContextFeatureGenerator.setCurrentContext(sample.getAdditionalContext()); String[] tokens = new String[sample.getSentence().length]; for (int i = 0; i < sample.getSentence().length; i++) { tokens[i] = sample.getSentence()[i]; } return generateEvents(tokens, outcomes, contextGenerator).iterator(); }
public NameFinderME(TokenNameFinderModel model) { TokenNameFinderFactory factory = model.getFactory(); seqCodec = factory.createSequenceCodec(); sequenceValidator = seqCodec.createSequenceValidator(); this.model = model.getNameFinderSequenceModel(); contextGenerator = factory.createContextGenerator(); // TODO: We should deprecate this. And come up with a better solution! contextGenerator.addFeatureGenerator( new WindowFeatureGenerator(additionalContextFeatureGenerator, 8, 8)); }
@Override protected Iterator<Event> createEvents(NameSample sample) { if (sample.isClearAdaptiveDataSet()) { contextGenerator.clearAdaptiveData(); } Span[] names = sample.getNames(); if (!Objects.isNull(this.defaultType)) { overrideType(names); } String[] outcomes = codec.encode(names, sample.getSentence().length); // String outcomes[] = generateOutcomes(sample.getNames(), type, sample.getSentence().length); additionalContextFeatureGenerator.setCurrentContext(sample.getAdditionalContext()); String[] tokens = new String[sample.getSentence().length]; for (int i = 0; i < sample.getSentence().length; i++) { tokens[i] = sample.getSentence()[i]; } return generateEvents(tokens, outcomes, contextGenerator).iterator(); }