opennlp.tools.util.Span.spansToStrings java code examples

/**
 * Auxiliary method to print spans
 *
 * @param spans
 *          the span list
 * @param toks
 *          the tokens array
 * @return the spans as string
 */
private String print(List<Span> spans, String[] toks) {
 return Arrays.toString(Span.spansToStrings(
   spans.toArray(new Span[spans.size()]), toks));
}

public String[] sentDetect(String s) {
 return Span.spansToStrings(sentPosDetect(s), s);
}

public void getAllNameEntitiesfromInput(InputStream stream) throws IOException {
  String[] in = IOUtils.toString(stream, UTF_8).split(" ");
  Span nameE[];
  
  //name finder is not thread safe https://opennlp.apache.org/documentation/1.5.2-incubating/manual/opennlp.html#tools.namefind
  synchronized (nameFinder) {
    nameE = nameFinder.find(in);
    //the same name finder is reused, so clear adaptive data
    nameFinder.clearAdaptiveData();
  }
  String spanNames = Arrays.toString(Span.spansToStrings(nameE, in));
  spanNames = spanNames.substring(1, spanNames.length() - 1);
  String[] tmp = spanNames.split(",");
  for (String name : tmp) {
    name = name.trim();
    this.locationNameEntities.add(name);
  }
}

  /**
   * finds names from given array of tokens
   * @param tokens the tokens array
   * @return map of EntityType -&gt; set of entity names
   */
  public Map<String, Set<String>> findNames(String[] tokens) {
    Span[] nameSpans = nameFinder.find(tokens);
    String[] names = Span.spansToStrings(nameSpans, tokens);
    Map<String, Set<String>> result = new HashMap<>();
    if (names != null && names.length > 0) {
      result.put(nameType, new HashSet<>(Arrays.asList(names)));
    }
    nameFinder.clearAdaptiveData();
    return result;
  }
}

@Override
protected TokenSample processSample(TokenSample reference) {
 String[] tokens = Span.spansToStrings(reference.getTokenSpans(), reference.getText());
 String tokensstring = detokenizer.detokenize(tokens, null);
 ArrayList<String> predictionsArray = new ArrayList<>();
 ArrayList<String> referencesArray = new ArrayList<>();
 predictionsArray.add(tokensstring);
 referencesArray.add(reference.getText());
 Object[] references = referencesArray.toArray();
 Object[] predictions = predictionsArray.toArray();
 fmeasure.updateScores(references, predictions);
 return new TokenSample(tokensstring, reference.getTokenSpans());
}

String[] tokens = Span.spansToStrings(tokenSpans, sentenceText);

samples.add(new NameSample(sample.getId(), Span.spansToStrings(tokens, sentenceText),
  names.toArray(new Span[names.size()]), null, samples.size() == 0));

/**
 * Auxiliary method to print spans
 *
 * @param spans
 *          the span list
 * @param toks
 *          the tokens array
 * @return the spans as string
 */
private String print(List<Span> spans, String[] toks) {
 return Arrays.toString(Span.spansToStrings(
   spans.toArray(new Span[spans.size()]), toks));
}

/**
 * Auxiliary method to print spans
 *
 * @param spans
 *          the span list
 * @param toks
 *          the tokens array
 * @return the spans as string
 */
private String print(List<Span> spans, String[] toks) {
 return Arrays.toString(Span.spansToStrings(
   spans.toArray(new Span[spans.size()]), toks));
}

 public String[] tokenize(String s) {
  return Span.spansToStrings(tokenizePos(s), s);
 }
}

@Test
public void testRegions() throws IOException {
 InputStreamFactory in = new ResourceAsStreamFactory(getClass(),
   "/opennlp/tools/chunker/output.txt");
 DummyChunkSampleStream predictedSample = new DummyChunkSampleStream(
   new PlainTextByLineStream(in, StandardCharsets.UTF_8), false);
 ChunkSample cs1 = predictedSample.read();
 String[] g1 = Span.spansToStrings(cs1.getPhrasesAsSpanList(), cs1.getSentence());
 Assert.assertEquals(15, g1.length);
 ChunkSample cs2 = predictedSample.read();
 String[] g2 = Span.spansToStrings(cs2.getPhrasesAsSpanList(), cs2.getSentence());
 Assert.assertEquals(10, g2.length);
 ChunkSample cs3 = predictedSample.read();
 String[] g3 = Span.spansToStrings(cs3.getPhrasesAsSpanList(), cs3.getSentence());
 Assert.assertEquals(7, g3.length);
 Assert.assertEquals("United", g3[0]);
 Assert.assertEquals("'s directors", g3[1]);
 Assert.assertEquals("voted", g3[2]);
 Assert.assertEquals("themselves", g3[3]);
 Assert.assertEquals("their spouses", g3[4]);
 Assert.assertEquals("lifetime access", g3[5]);
 Assert.assertEquals("to", g3[6]);
 predictedSample.close();
}

 public String[] tokenize(String s) {
  return Span.spansToStrings(tokenizePos(s), s);
 }
}

 public String[] tokenize(String s) {
  return Span.spansToStrings(tokenizePos(s), s);
 }
}

public String[] sentDetect(String s) {
 return Span.spansToStrings(sentPosDetect(s), s);
}

public String[] tokenize(String s) {
  return Span.spansToStrings(tokenizePos(s), s);
}

public String[] sentDetect(String s) {
 return Span.spansToStrings(sentPosDetect(s), s);
}

public String[] tokenize(String s) {
  return Span.spansToStrings(tokenizePos(s), s);
}

public void getAllNameEntitiesfromInput(InputStream stream) throws IOException {
  String[] in = IOUtils.toString(stream, UTF_8).split(" ");
  Span nameE[];
  
  //name finder is not thread safe https://opennlp.apache.org/documentation/1.5.2-incubating/manual/opennlp.html#tools.namefind
  synchronized (nameFinder) {
    nameE = nameFinder.find(in);
    //the same name finder is reused, so clear adaptive data
    nameFinder.clearAdaptiveData();
  }
  String spanNames = Arrays.toString(Span.spansToStrings(nameE, in));
  spanNames = spanNames.substring(1, spanNames.length() - 1);
  String[] tmp = spanNames.split(",");
  for (String name : tmp) {
    name = name.trim();
    this.locationNameEntities.add(name);
  }
}

  /**
   * finds names from given array of tokens
   * @param tokens the tokens array
   * @return map of EntityType -&gt; set of entity names
   */
  public Map<String, Set<String>> findNames(String[] tokens) {
    Span[] nameSpans = nameFinder.find(tokens);
    String[] names = Span.spansToStrings(nameSpans, tokens);
    Map<String, Set<String>> result = new HashMap<>();
    if (names != null && names.length > 0) {
      result.put(nameType, new HashSet<>(Arrays.asList(names)));
    }
    nameFinder.clearAdaptiveData();
    return result;
  }
}

@Override
protected TokenSample processSample(TokenSample reference) {
 String[] tokens = Span.spansToStrings(reference.getTokenSpans(), reference.getText());
 String tokensstring = detokenizer.detokenize(tokens, null);
 ArrayList<String> predictionsArray = new ArrayList<>();
 ArrayList<String> referencesArray = new ArrayList<>();
 predictionsArray.add(tokensstring);
 referencesArray.add(reference.getText());
 Object[] references = referencesArray.toArray();
 Object[] predictions = predictionsArray.toArray();
 fmeasure.updateScores(references, predictions);
 return new TokenSample(tokensstring, reference.getTokenSpans());
}

Javadoc

Converts an array of Spans to an array of Strings.

Popular methods of Span

getStart
Return the start of a span.
getEnd
Return the end of a span. Note: that the returned index is one past the actual end of the span in th
<init>
Initializes a new Span object with an existing Span which is shifted by an offset.
getType
Retrieves the type of the span.
getCoveredText
Retrieves the string covered by the current span of the specified text.
contains
Returns true if the specified span is contained by this span. Identical spans are considered to cont
length
Returns the length of this span.
compareTo
Compares the specified span to the current span.
equals
Checks if the specified span is equal to the current span.
intersects
Returns true if the specified span intersects with this span.
toString
Generates a human readable string.
getProb

Popular in Java

Reactive rest calls using spring rest template
notifyDataSetChanged (ArrayAdapter)
requestLocationUpdates (LocationManager)
compareTo (BigDecimal)
HttpURLConnection (java.net)
An URLConnection for HTTP (RFC 2616 [http://tools.ietf.org/html/rfc2616]) used to send and receive d
SocketException (java.net)
This SocketException may be thrown during socket creation or setting options, and is the superclass
Enumeration (java.util)
A legacy iteration interface.New code should use Iterator instead. Iterator replaces the enumeration
SortedMap (java.util)
A map that has its keys ordered. The sorting is according to either the natural ordering of its keys
UUID (java.util)
UUID is an immutable representation of a 128-bit universally unique identifier (UUID). There are mul
Logger (org.apache.log4j)
This is the central class in the log4j package. Most logging operations, except configuration, are d
Top Sublime Text plugins

How to use spansToStringsmethodin opennlp.tools.util.Span

Best Java code snippets using opennlp.tools.util.Span.spansToStrings (Showing top 20 results out of 315)

How to use
spansToStrings
method
in
opennlp.tools.util.Span