/** * Auxiliary method to print spans * * @param spans * the span list * @param toks * the tokens array * @return the spans as string */ private String print(List<Span> spans, String[] toks) { return Arrays.toString(Span.spansToStrings( spans.toArray(new Span[spans.size()]), toks)); }
public String[] sentDetect(String s) { return Span.spansToStrings(sentPosDetect(s), s); }
public void getAllNameEntitiesfromInput(InputStream stream) throws IOException { String[] in = IOUtils.toString(stream, UTF_8).split(" "); Span nameE[]; //name finder is not thread safe https://opennlp.apache.org/documentation/1.5.2-incubating/manual/opennlp.html#tools.namefind synchronized (nameFinder) { nameE = nameFinder.find(in); //the same name finder is reused, so clear adaptive data nameFinder.clearAdaptiveData(); } String spanNames = Arrays.toString(Span.spansToStrings(nameE, in)); spanNames = spanNames.substring(1, spanNames.length() - 1); String[] tmp = spanNames.split(","); for (String name : tmp) { name = name.trim(); this.locationNameEntities.add(name); } }
/** * finds names from given array of tokens * @param tokens the tokens array * @return map of EntityType -> set of entity names */ public Map<String, Set<String>> findNames(String[] tokens) { Span[] nameSpans = nameFinder.find(tokens); String[] names = Span.spansToStrings(nameSpans, tokens); Map<String, Set<String>> result = new HashMap<>(); if (names != null && names.length > 0) { result.put(nameType, new HashSet<>(Arrays.asList(names))); } nameFinder.clearAdaptiveData(); return result; } }
@Override protected TokenSample processSample(TokenSample reference) { String[] tokens = Span.spansToStrings(reference.getTokenSpans(), reference.getText()); String tokensstring = detokenizer.detokenize(tokens, null); ArrayList<String> predictionsArray = new ArrayList<>(); ArrayList<String> referencesArray = new ArrayList<>(); predictionsArray.add(tokensstring); referencesArray.add(reference.getText()); Object[] references = referencesArray.toArray(); Object[] predictions = predictionsArray.toArray(); fmeasure.updateScores(references, predictions); return new TokenSample(tokensstring, reference.getTokenSpans()); }
String[] tokens = Span.spansToStrings(tokenSpans, sentenceText);
samples.add(new NameSample(sample.getId(), Span.spansToStrings(tokens, sentenceText), names.toArray(new Span[names.size()]), null, samples.size() == 0));
/** * Auxiliary method to print spans * * @param spans * the span list * @param toks * the tokens array * @return the spans as string */ private String print(List<Span> spans, String[] toks) { return Arrays.toString(Span.spansToStrings( spans.toArray(new Span[spans.size()]), toks)); }
/** * Auxiliary method to print spans * * @param spans * the span list * @param toks * the tokens array * @return the spans as string */ private String print(List<Span> spans, String[] toks) { return Arrays.toString(Span.spansToStrings( spans.toArray(new Span[spans.size()]), toks)); }
public String[] tokenize(String s) { return Span.spansToStrings(tokenizePos(s), s); } }
@Test public void testRegions() throws IOException { InputStreamFactory in = new ResourceAsStreamFactory(getClass(), "/opennlp/tools/chunker/output.txt"); DummyChunkSampleStream predictedSample = new DummyChunkSampleStream( new PlainTextByLineStream(in, StandardCharsets.UTF_8), false); ChunkSample cs1 = predictedSample.read(); String[] g1 = Span.spansToStrings(cs1.getPhrasesAsSpanList(), cs1.getSentence()); Assert.assertEquals(15, g1.length); ChunkSample cs2 = predictedSample.read(); String[] g2 = Span.spansToStrings(cs2.getPhrasesAsSpanList(), cs2.getSentence()); Assert.assertEquals(10, g2.length); ChunkSample cs3 = predictedSample.read(); String[] g3 = Span.spansToStrings(cs3.getPhrasesAsSpanList(), cs3.getSentence()); Assert.assertEquals(7, g3.length); Assert.assertEquals("United", g3[0]); Assert.assertEquals("'s directors", g3[1]); Assert.assertEquals("voted", g3[2]); Assert.assertEquals("themselves", g3[3]); Assert.assertEquals("their spouses", g3[4]); Assert.assertEquals("lifetime access", g3[5]); Assert.assertEquals("to", g3[6]); predictedSample.close(); }
public String[] tokenize(String s) { return Span.spansToStrings(tokenizePos(s), s); } }
public String[] tokenize(String s) { return Span.spansToStrings(tokenizePos(s), s); } }
public String[] sentDetect(String s) { return Span.spansToStrings(sentPosDetect(s), s); }
public String[] tokenize(String s) { return Span.spansToStrings(tokenizePos(s), s); }
public String[] sentDetect(String s) { return Span.spansToStrings(sentPosDetect(s), s); }
public String[] tokenize(String s) { return Span.spansToStrings(tokenizePos(s), s); }
public void getAllNameEntitiesfromInput(InputStream stream) throws IOException { String[] in = IOUtils.toString(stream, UTF_8).split(" "); Span nameE[]; //name finder is not thread safe https://opennlp.apache.org/documentation/1.5.2-incubating/manual/opennlp.html#tools.namefind synchronized (nameFinder) { nameE = nameFinder.find(in); //the same name finder is reused, so clear adaptive data nameFinder.clearAdaptiveData(); } String spanNames = Arrays.toString(Span.spansToStrings(nameE, in)); spanNames = spanNames.substring(1, spanNames.length() - 1); String[] tmp = spanNames.split(","); for (String name : tmp) { name = name.trim(); this.locationNameEntities.add(name); } }
/** * finds names from given array of tokens * @param tokens the tokens array * @return map of EntityType -> set of entity names */ public Map<String, Set<String>> findNames(String[] tokens) { Span[] nameSpans = nameFinder.find(tokens); String[] names = Span.spansToStrings(nameSpans, tokens); Map<String, Set<String>> result = new HashMap<>(); if (names != null && names.length > 0) { result.put(nameType, new HashSet<>(Arrays.asList(names))); } nameFinder.clearAdaptiveData(); return result; } }
@Override protected TokenSample processSample(TokenSample reference) { String[] tokens = Span.spansToStrings(reference.getTokenSpans(), reference.getText()); String tokensstring = detokenizer.detokenize(tokens, null); ArrayList<String> predictionsArray = new ArrayList<>(); ArrayList<String> referencesArray = new ArrayList<>(); predictionsArray.add(tokensstring); referencesArray.add(reference.getText()); Object[] references = referencesArray.toArray(); Object[] predictions = predictionsArray.toArray(); fmeasure.updateScores(references, predictions); return new TokenSample(tokensstring, reference.getTokenSpans()); }