opennlp.tools.sentdetect.SentenceDetectorME.<init> java code examples

private String[] testOpenNLP(String text) throws Exception {
  try (InputStream modelIn = this.getClass().getResourceAsStream(RESOURCES_EN_SENT_BIN)) {
    SentenceModel model = new SentenceModel(modelIn);
    SentenceDetectorME sentenceDetector = new SentenceDetectorME(model);
    return sentenceDetector.sentDetect(text);
  }
}

private Span[] testOpenNLPPosition(String text) throws Exception {
  try (InputStream modelIn = this.getClass().getResourceAsStream(RESOURCES_EN_SENT_BIN)) {
    SentenceModel model = new SentenceModel(modelIn);
    SentenceDetectorME sentenceDetector = new SentenceDetectorME(model);
    return sentenceDetector.sentPosDetect(text);
  }
}

/**
 * Creates a new {@link BratNameSampleStream}.
 * @param sentModel a {@link SentenceModel} model
 * @param tokenModel a {@link TokenizerModel} model
 * @param samples a {@link BratDocument} {@link ObjectStream}
 * @param nameTypes the name types to use or null if all name types
 */
public BratNameSampleStream(SentenceModel sentModel, TokenizerModel tokenModel,
  ObjectStream<BratDocument> samples, Set<String> nameTypes) {
 super(samples);
 // TODO: We can pass in custom validators here ...
 this.parser = new BratDocumentParser(new SentenceDetectorME(sentModel),
   new TokenizerME(tokenModel), nameTypes);
}

/**
 * Creates a new {@link BratNameSampleStream}.
 * @param sentModel a {@link SentenceModel} model
 * @param tokenModel a {@link TokenizerModel} model
 * @param samples a {@link BratDocument} {@link ObjectStream}
 */
public BratNameSampleStream(SentenceModel sentModel, TokenizerModel tokenModel,
              ObjectStream<BratDocument> samples) {
 super(samples);
 // TODO: We can pass in custom validators here ...
 this.parser = new BratDocumentParser(new SentenceDetectorME(sentModel),
   new TokenizerME(tokenModel), null);
}

/**
 * Initializes the current instance with the given context.
 * <p>
 * Note: Do all initialization in this method, do not use the constructor.
 */
public void initialize(UimaContext context)
  throws ResourceInitializationException {
 super.initialize(context);
 SentenceModel model;
 try {
  SentenceModelResource modelResource = (SentenceModelResource) context
    .getResourceObject(UimaUtil.MODEL_PARAMETER);
  model = modelResource.getModel();
 } catch (ResourceAccessException e) {
  throw new ResourceInitializationException(e);
 }
 sentenceDetector = new SentenceDetectorME(model);
}

sentenceDetector = new SentenceDetectorME(
  new SentenceModel(new File(args[sentenceModelIndex])));

new SentenceDetectorME(model), errorListener);

/**
 * Starts the evaluation.
 *
 * @param samples
 *          the data to train and test
 * @param nFolds
 *          number of folds
 *
 * @throws IOException
 */
public void evaluate(ObjectStream<SentenceSample> samples, int nFolds) throws IOException {
 CrossValidationPartitioner<SentenceSample> partitioner =
   new CrossValidationPartitioner<>(samples, nFolds);
 while (partitioner.hasNext()) {
  CrossValidationPartitioner.TrainingSampleStream<SentenceSample> trainingSampleStream =
    partitioner.next();
  SentenceModel model;
  model = SentenceDetectorME.train(languageCode, trainingSampleStream,
    sdFactory, params);
  // do testing
  SentenceDetectorEvaluator evaluator = new SentenceDetectorEvaluator(
    new SentenceDetectorME(model), listeners);
  evaluator.evaluate(trainingSampleStream.getTestSampleStream());
  fmeasure.mergeInto(evaluator.getFMeasure());
 }
}

@Test
public void evalSentenceModel() throws Exception {
 SentenceModel model = new SentenceModel(
     new File(getOpennlpDataDir(), "models-sf/en-sent.bin"));
 MessageDigest digest = MessageDigest.getInstance(HASH_ALGORITHM);
 SentenceDetector sentenceDetector = new SentenceDetectorME(model);
 StringBuilder text = new StringBuilder();
 try (ObjectStream<LeipzigTestSample> lineBatches = new LeipzigTestSampleStream(25,
     SimpleTokenizer.INSTANCE,
     new MarkableFileInputStreamFactory(new File(getOpennlpDataDir(),
         "leipzig/eng_news_2010_300K-sentences.txt")))) {
  LeipzigTestSample lineBatch;
  while ((lineBatch = lineBatches.read()) != null) {
   text.append(String.join(" ", lineBatch.getText())).append(" ");
  }
 }
 String[] sentences = sentenceDetector.sentDetect(text.toString());
 for (String sentence : sentences) {
  digest.update(sentence.getBytes(StandardCharsets.UTF_8));
 }
 Assert.assertEquals(new BigInteger("228544068397077998410949364710969159291"),
     new BigInteger(1, digest.digest()));
}

SentenceDetectorME sdetector = new SentenceDetectorME(model);

 sentDetector = new SentenceDetectorME(new SentenceModel(params.getSentenceDetectorModel()));
} catch (IOException e) {
 throw new TerminateToolException(-1, "Failed to load sentence detector model!", e);

  @Override
  protected SentenceDetectorME produceResource(InputStream aStream)
    throws Exception
  {
    SentenceModel model = new SentenceModel(aStream);
    return new SentenceDetectorME(model);
  }
};

/**
 * For serialization.
 */
public OpenNlpSentenceTokenizer() {
  sentenceModel = null;
  sentenceTokenizer = ThreadLocal.withInitial(() -> new SentenceDetectorME(this.sentenceModel));
  readBufferSize = 1024;
  textBufferSize = 4096;
}

public BratNameSampleStream(SentenceModel sentModel, TokenizerModel tokenModel,
  ObjectStream<BratDocument> samples) {
 super(samples);
 // TODO: We can pass in custom validators here ...
 this.parser = new BratDocumentParser(new SentenceDetectorME(sentModel), new TokenizerME(tokenModel));
}

  @Override
  public List<RawSentence> tokenize( final String sentenceSource ) {
    if ( Strings.isNullOrEmpty(sentenceSource) ) {
      return Collections.emptyList();
    }
    final Span[] sentencesStrings = new SentenceDetectorME(sentenceModel).sentPosDetect(sentenceSource);
    return Arrays.stream(sentencesStrings)
           .map(span -> new RawSentence(span.getCoveredText(sentenceSource).toString(),
                         span.getStart(), span.getEnd()))
           .collect(Collectors.toList());
  }
}

public static SentenceDetector getDefaultSentenceDetector()
    throws IOException {
  return new SentenceDetectorME(new SentenceModel(
      getResourceAsStream(sentDetectorModelFile)));
}

public static SentenceDetector getDefaultSentenceDetector()
    throws IOException {
  return new SentenceDetectorME(new SentenceModel(
      getResourceAsStream(sentDetectorModelFile)));
}

/**
 * Builds an {@link ApacheExtractor} by instantiating the OpenNLP
 * Name Finder and Tokenizer.
 * 
 * @throws IOException 
 */
public ApacheExtractor() throws IOException {
  nameFinder = new NameFinderME(new TokenNameFinderModel(ApacheExtractor.class.getResourceAsStream(pathToNERModel)));
  tokenizer = new TokenizerME(new TokenizerModel(ApacheExtractor.class.getResourceAsStream(pathToTokenizerModel)));
  sentenceDetector = new SentenceDetectorME(new SentenceModel(ApacheExtractor.class.getResourceAsStream(pathToSentenceDetectorModel)));
}

/**
 * Builds an {@link ApacheExtractor} by instantiating the OpenNLP
 * Name Finder and Tokenizer.
 * 
 * @throws IOException 
 */
public ApacheExtractor() throws IOException {
  nameFinder = new NameFinderME(new TokenNameFinderModel(ApacheExtractor.class.getResourceAsStream(pathToNERModel)));
  tokenizer = new TokenizerME(new TokenizerModel(ApacheExtractor.class.getResourceAsStream(pathToTokenizerModel)));
  sentenceDetector = new SentenceDetectorME(new SentenceModel(ApacheExtractor.class.getResourceAsStream(pathToSentenceDetectorModel)));
}

SentenceDetectorME sentDetect = new SentenceDetectorME(sentdetectModel);

Javadoc

Initializes the current instance.

Popular methods of SentenceDetectorME

sentPosDetect
Detect the position of the first words of sentences in a String.
sentDetect
Detect sentences in a String.
train
getAbbreviations
getFirstNonWS
getFirstWS
getSentenceProbabilities
Returns the probabilities associated with the most recent calls to sentDetect().
isAcceptableBreak
Allows subclasses to check an overzealous (read: poorly trained) model from flagging obvious non-bre

Popular in Java

Finding current android device location
getExternalFilesDir (Context)
setRequestProperty (URLConnection)
scheduleAtFixedRate (ScheduledExecutorService)
ByteBuffer (java.nio)
A buffer for bytes. A byte buffer can be created in either one of the following ways: * #allocate
Connection (java.sql)
A connection represents a link from a Java application to a database. All SQL statements and results
MessageFormat (java.text)
Produces concatenated messages in language-neutral way. New code should probably use java.util.Forma
TreeMap (java.util)
Walk the nodes of the tree left-to-right or right-to-left. Note that in descending iterations, next
Reference (javax.naming)
Filter (javax.servlet)
A filter is an object that performs filtering tasks on either the request to a resource (a servlet o
From CI to AI: The AI layer in your organization

How to use opennlp.tools.sentdetect.SentenceDetectorMEconstructor

Best Java code snippets using opennlp.tools.sentdetect.SentenceDetectorME.<init> (Showing top 20 results out of 315)

How to use
opennlp.tools.sentdetect.SentenceDetectorME
constructor