Refine search
@Test public void testOnlyWithNamesTypeOverride() throws Exception { // train the name finder ObjectStream<NameSample> sampleStream = new NameSampleDataStream( new PlainTextByLineStream(new MockInputStreamFactory( new File("opennlp/tools/namefind/OnlyWithNames.train")), "UTF-8")); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 70); params.put(TrainingParameters.CUTOFF_PARAM, 1); TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", TYPE_OVERRIDE, sampleStream, params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel); // now test if it can detect the sample sentences String[] sentence = ("Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman " + "Robert Aderholt Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander").split("\\s+"); Span[] names1 = nameFinder.find(sentence); Assert.assertEquals(new Span(0, 2, TYPE_OVERRIDE), names1[0]); Assert.assertEquals(new Span(2, 4, TYPE_OVERRIDE), names1[1]); Assert.assertEquals(new Span(4, 6, TYPE_OVERRIDE), names1[2]); Assert.assertTrue(!hasOtherAsOutcome(nameFinderModel)); }
/** * finds names from given array of tokens * @param tokens the tokens array * @return map of EntityType -> set of entity names */ public Map<String, Set<String>> findNames(String[] tokens) { Span[] nameSpans = nameFinder.find(tokens); String[] names = Span.spansToStrings(nameSpans, tokens); Map<String, Set<String>> result = new HashMap<>(); if (names != null && names.length > 0) { result.put(nameType, new HashSet<>(Arrays.asList(names))); } nameFinder.clearAdaptiveData(); return result; } }
nameFinders[i] = new NameFinderME(model); nameFinder.clearAdaptiveData(); Span[] reducedNames = NameFinderME.dropOverlappingSpans( names.toArray(new Span[names.size()]));
@Override public void annotate(Document document) { Collection<TokenNameFinderModel> models = loadModels(document.getLanguage()); for (Annotation sentence : document.sentences()) { List<Annotation> tokenList = sentence.tokens(); String[] tokens = tokenList.stream().map(Object::toString).toArray(String[]::new); for (TokenNameFinderModel model : models) { NameFinderME finder = new NameFinderME(model); opennlp.tools.util.Span[] spans = finder.find(tokens); double[] probs = finder.probs(spans); for (int i = 0; i < spans.length; i++) { opennlp.tools.util.Span span = spans[i]; document.annotationBuilder() .type(OPENNLP_ENTITY) .bounds(tokenList.get(span.getStart()).union(tokenList.get(span.getEnd() - 1))) .attribute(Types.ENTITY_TYPE, EntityType.create(span.getType().toUpperCase())) .attribute(Types.CONFIDENCE, probs[i]) .createAttached(); } } } }
NameFinderME finder = new NameFinderME(nameFinderModel); Tokenizer tokenizer = openNLP.getTokenizer(language); Map<String,List<NameOccurrence>> nameOccurrences = new LinkedHashMap<String,List<NameOccurrence>>(); for (int i = 0; i < sentenceSpans.length; i++) { String sentence = sentenceSpans[i].getCoveredText(text).toString().trim(); CharSequence previousSentence = sentenceSpans[i - 1].getCoveredText(text); contextElements.add(previousSentence.toString().trim()); CharSequence nextSentence = sentenceSpans[i + 1].getCoveredText(text); contextElements.add(nextSentence.toString().trim()); String[] tokens = Span.spansToStrings(tokenSpans, sentence); Span[] nameSpans = finder.find(tokens); double[] probs = finder.probs(); finder.clearAdaptiveData(); log.debug("{} name occurrences found: {}", nameOccurrences.size(), nameOccurrences); return nameOccurrences;
@Override @SuppressWarnings("all") //<start id="maxent.examples.features"/> public Collection extractFeatures(String[] text) { NameFinderME[] finders = factory.getNameFinders(); //<co id="nffg.engine"/> String[] modelNames = factory.getModelNames(); Collection<String> features = new ArrayList<String>(); StringBuilder builder = new StringBuilder(); for (int i=0; i < finders.length; i++) { Span[] spans = finders[i].find(text); //<co id="nffg.find"/> String model = modelNames[i]; for (int j=0; j < spans.length; j++) { int start = spans[j].getStart(); //<co id="nffg.combine"/> int end = spans[j].getEnd(); builder.setLength(0); builder.append(model).append("="); for (int k = start; k < end; k++ ) { builder.append(text[k]).append('_'); } builder.setLength(builder.length()-1); features.add(builder.toString()); //<co id="nffg.return"/> } } return features; } /*
private void evalNameFinder(TokenNameFinderModel model, BigInteger expectedHash) throws Exception { MessageDigest digest = MessageDigest.getInstance(HASH_ALGORITHM); TokenNameFinder nameFinder = new NameFinderME(model); try (ObjectStream<LeipzigTestSample> lines = createLineWiseStream()) { LeipzigTestSample line; while ((line = lines.read()) != null) { Span[] names = nameFinder.find(line.getText()); for (Span name : names) { digest.update((name.getType() + name.getStart() + name.getEnd()).getBytes(StandardCharsets.UTF_8)); } } } Assert.assertEquals(expectedHash, new BigInteger(1, digest.digest())); }
String sentence = plainText.substring(sentenceSpan.getStart(), sentenceSpan.getEnd()); Span names[] = nameFinder.find(tokens); int beginningOfFirstWord = tokenPositionsWithinSentence[name.getStart()].getStart(); int endOfLastWord = tokenPositionsWithinSentence[name.getEnd() - 1].getEnd(); nameFinder.clearAdaptiveData();
new RecommendationException("Key [" + KEY_MODEL + "] not found in context")); NameFinderME finder = new NameFinderME(model); .toArray(String[]::new); for (Span prediction : finder.find(tokens)) { String label = prediction.getType(); if ("default".equals(label)) { continue; int begin = tokenAnnotations.get(prediction.getStart()).getBegin(); int end = tokenAnnotations.get(prediction.getEnd() - 1).getEnd(); AnnotationFS annotation = aCas.createAnnotation(predictionType, begin, end); annotation.setDoubleValue(confidenceFeature, prediction.getProb());
final Span nameSpans[] = model.getNameFinder().find(tokens); final double[] spanProbs = model.getNameFinder().probs(nameSpans); log.debug("Span: " + nameSpans[i].toString()); log.debug("Covered text is: " + tokens[nameSpans[i].getStart()]); log.debug("Probability is: " + spanProbs[i]); probabilityList.add(new Probability(tokens[nameSpans[i].getStart()], spanProbs[i])); model.getNameFinder().clearAdaptiveData();
tokens = Span.spansToStrings(spans, text); foundNames = new Span[finders.length][]; for (int i = 0; i < finders.length; i++) { foundNames[i] = finders[i].find(tokens); Span[] spans = foundNames[i]; for (int j = 0; j < spans.length; j++) { int start = spans[j].getStart(); int end = spans[j].getEnd(); for (int k = start; k < end; k++) { tokenTypes[k][i] = true;
opennlp.tools.util.Span reducedNames[] = NameFinderME.dropOverlappingSpans(names); String[] extractedEntities = opennlp.tools.util.Span.spansToStrings(reducedNames, tokens); probabilities = ((NameFinderME) nameFinder).probs(reducedNames); } else { entity.setSpan(new ai.idylnlp.model.entity.Span(name.getStart(), name.getEnd())); entity.setContext(entityExtractionRequest.getContext()); entity.setExtractionDate(System.currentTimeMillis());
/** * Creates OpenNLP name finder * @param nameType the entity type recognised by the given NER model * @param nerModelPath path to ner model */ public OpenNLPNameFinder(String nameType, String nerModelPath) { this.nameTypes = Collections.singleton(nameType); this.nameType = nameType; InputStream nerModelStream = getClass().getClassLoader().getResourceAsStream(nerModelPath); try { if (nerModelStream != null){ TokenNameFinderModel model = new TokenNameFinderModel(nerModelStream); this.nameFinder = new NameFinderME(model); this.available = true; } else { LOG.warn("Couldn't find model from {} using class loader", nerModelPath); } } catch (IOException e) { LOG.error(e.getMessage(), e); } finally { IOUtils.closeQuietly(nerModelStream); } LOG.info("{} NER : Available for service ? {}", nameType, available); }
SpanUtils.concatenateSpans(allSpans, numericSpans); Span[] allSpansArray = NameFinderME.dropOverlappingSpans(allSpans .toArray(new Span[allSpans.size()])); List<Name> names = new ArrayList<Name>(); Integer startIndex = name.getSpan().getStart(); Integer endIndex = name.getSpan().getEnd(); List<String> wfIds = Arrays .asList(Arrays.copyOfRange(tokenIds, startIndex, endIndex));
TokenNameFinderModel tokenNameFinderModel = NameFinderME.train("en", slot, combinedNameSampleStream, trainingParams, new TokenNameFinderFactory()); combinedNameSampleStream.close(); tokenNameFinderModels.add(tokenNameFinderModel); NameFinderME[] nameFinderMEs = new NameFinderME[tokenNameFinderModels.size()]; for (int i = 0; i < tokenNameFinderModels.size(); i++) { nameFinderMEs[i] = new NameFinderME(tokenNameFinderModels.get(i)); Span[] spans = nameFinderME.find(tokens); String[] names = Span.spansToStrings(spans, tokens); for (int i = 0; i < spans.length; i++) { if(i > 0) { System.out.print(", "); } System.out.print(spans[i].getType() + ": '" + names[i] + "' ");
params.put(TrainingParameters.CUTOFF_PARAM, 1); TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream, params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); TokenNameFinder nameFinder = new NameFinderME(nameFinderModel); Assert.assertEquals(new Span(0, 1, DEFAULT), names[0]); Assert.assertEquals(new Span(1, 2, DEFAULT), names[0]); Assert.assertEquals(new Span(4, 6, DEFAULT), names[1]);