public void getAllNameEntitiesfromInput(InputStream stream) throws IOException { String[] in = IOUtils.toString(stream, UTF_8).split(" "); Span nameE[]; //name finder is not thread safe https://opennlp.apache.org/documentation/1.5.2-incubating/manual/opennlp.html#tools.namefind synchronized (nameFinder) { nameE = nameFinder.find(in); //the same name finder is reused, so clear adaptive data nameFinder.clearAdaptiveData(); } String spanNames = Arrays.toString(Span.spansToStrings(nameE, in)); spanNames = spanNames.substring(1, spanNames.length() - 1); String[] tmp = spanNames.split(","); for (String name : tmp) { name = name.trim(); this.locationNameEntities.add(name); } }
/** * finds names from given array of tokens * @param tokens the tokens array * @return map of EntityType -> set of entity names */ public Map<String, Set<String>> findNames(String[] tokens) { Span[] nameSpans = nameFinder.find(tokens); String[] names = Span.spansToStrings(nameSpans, tokens); Map<String, Set<String>> result = new HashMap<>(); if (names != null && names.length > 0) { result.put(nameType, new HashSet<>(Arrays.asList(names))); } nameFinder.clearAdaptiveData(); return result; } }
protected void documentDone(CAS cas) { // TODO: Create confidence FS // contains String name type // contains Double prob if (documentConfidenceType != null) { FeatureStructure confidenceFS = cas.createFS(documentConfidenceType); confidenceFS.setDoubleValue(documentConfidenceFeature, documentConfidence.mean()); confidenceFS.setStringValue(documentConfidenceNameTypeFeature, mNameType.getName()); cas.addFsToIndexes(confidenceFS); } // Clears the adaptive data which was created for the current document mNameFinder.clearAdaptiveData(); documentConfidence = new Mean(); }
nameFinder.clearAdaptiveData();
private static void clearPrevTokenMaps(TreebankNameFinder[] finders) { for (int mi = 0; mi < finders.length; mi++) { finders[mi].nameFinder.clearAdaptiveData(); } }
private static void clearPrevTokenMaps(TreebankNameFinder[] finders) { for (int mi = 0; mi < finders.length; mi++) { finders[mi].nameFinder.clearAdaptiveData(); } }
/** * Forgets all adaptive data which was collected during previous calls to one * of the find methods. This method is typically called at the end of a * document. * * From Apache OpenNLP documentation: "After every document clearAdaptiveData * must be called to clear the adaptive data in the feature generators. Not * calling clearAdaptiveData can lead to a sharp drop in the detection rate * after a few documents." */ public final void clearAdaptiveData() { nameFinder.clearAdaptiveData(); }
public void getAllNameEntitiesfromInput(InputStream stream) throws IOException { String[] in = IOUtils.toString(stream, UTF_8).split(" "); Span nameE[]; //name finder is not thread safe https://opennlp.apache.org/documentation/1.5.2-incubating/manual/opennlp.html#tools.namefind synchronized (nameFinder) { nameE = nameFinder.find(in); //the same name finder is reused, so clear adaptive data nameFinder.clearAdaptiveData(); } String spanNames = Arrays.toString(Span.spansToStrings(nameE, in)); spanNames = spanNames.substring(1, spanNames.length() - 1); String[] tmp = spanNames.split(","); for (String name : tmp) { name = name.trim(); this.locationNameEntities.add(name); } }
/** * finds names from given array of tokens * @param tokens the tokens array * @return map of EntityType -> set of entity names */ public Map<String, Set<String>> findNames(String[] tokens) { Span[] nameSpans = nameFinder.find(tokens); String[] names = Span.spansToStrings(nameSpans, tokens); Map<String, Set<String>> result = new HashMap<>(); if (names != null && names.length > 0) { result.put(nameType, new HashSet<>(Arrays.asList(names))); } nameFinder.clearAdaptiveData(); return result; } }
public void getAllNameEntitiesfromInput(InputStream stream) throws IOException { String[] in = IOUtils.toString(stream, UTF_8).split(" "); Span nameE[]; //name finder is not thread safe https://opennlp.apache.org/documentation/1.5.2-incubating/manual/opennlp.html#tools.namefind synchronized (nameFinder) { nameE = nameFinder.find(in); //the same name finder is reused, so clear adaptive data nameFinder.clearAdaptiveData(); } String spanNames = Arrays.toString(Span.spansToStrings(nameE, in)); spanNames = spanNames.substring(1, spanNames.length() - 1); String[] tmp = spanNames.split(","); for (String name : tmp) { name = name.trim(); this.locationNameEntities.add(name); } }
/** * finds names from given array of tokens * @param tokens the tokens array * @return map of EntityType -> set of entity names */ public Map<String, Set<String>> findNames(String[] tokens) { Span[] nameSpans = nameFinder.find(tokens); String[] names = Span.spansToStrings(nameSpans, tokens); Map<String, Set<String>> result = new HashMap<>(); if (names != null && names.length > 0) { result.put(nameType, new HashSet<>(Arrays.asList(names))); } nameFinder.clearAdaptiveData(); return result; } }
personFinder.clearAdaptiveData(); blacklistWriter.flush(); knownEntityWriter.flush();
finder.clearAdaptiveData(); log.debug("{} name occurrences found: {}", nameOccurrences.size(), nameOccurrences); return nameOccurrences;
model.getNameFinder().clearAdaptiveData();
finder.clearAdaptiveData(); log.debug("{} name occurrences found: {}", nameOccurrences.size(), nameOccurrences); return nameOccurrences;
protected void documentDone(CAS cas) { // TODO: Create confidence FS // contains String name type // contains Double prob if (documentConfidenceType != null) { FeatureStructure confidenceFS = cas.createFS(documentConfidenceType); confidenceFS.setDoubleValue(documentConfidenceFeature, documentConfidence.mean()); confidenceFS.setStringValue(documentConfidenceNameTypeFeature, mNameType.getName()); cas.addFsToIndexes(confidenceFS); } // Clears the adaptive data which was created for the current document mNameFinder.clearAdaptiveData(); documentConfidence = new Mean(); }
nameFinder.clearAdaptiveData();
nameFinder.clearAdaptiveData();