private String[] testOpenNLP(String text) throws Exception { try (InputStream modelIn = this.getClass().getResourceAsStream(RESOURCES_EN_SENT_BIN)) { SentenceModel model = new SentenceModel(modelIn); SentenceDetectorME sentenceDetector = new SentenceDetectorME(model); return sentenceDetector.sentDetect(text); } }
private Span[] testOpenNLPPosition(String text) throws Exception { try (InputStream modelIn = this.getClass().getResourceAsStream(RESOURCES_EN_SENT_BIN)) { SentenceModel model = new SentenceModel(modelIn); SentenceDetectorME sentenceDetector = new SentenceDetectorME(model); return sentenceDetector.sentPosDetect(text); } }
/** * Creates a new {@link BratNameSampleStream}. * @param sentModel a {@link SentenceModel} model * @param tokenModel a {@link TokenizerModel} model * @param samples a {@link BratDocument} {@link ObjectStream} * @param nameTypes the name types to use or null if all name types */ public BratNameSampleStream(SentenceModel sentModel, TokenizerModel tokenModel, ObjectStream<BratDocument> samples, Set<String> nameTypes) { super(samples); // TODO: We can pass in custom validators here ... this.parser = new BratDocumentParser(new SentenceDetectorME(sentModel), new TokenizerME(tokenModel), nameTypes); }
/** * Creates a new {@link BratNameSampleStream}. * @param sentModel a {@link SentenceModel} model * @param tokenModel a {@link TokenizerModel} model * @param samples a {@link BratDocument} {@link ObjectStream} */ public BratNameSampleStream(SentenceModel sentModel, TokenizerModel tokenModel, ObjectStream<BratDocument> samples) { super(samples); // TODO: We can pass in custom validators here ... this.parser = new BratDocumentParser(new SentenceDetectorME(sentModel), new TokenizerME(tokenModel), null); }
/** * Initializes the current instance with the given context. * <p> * Note: Do all initialization in this method, do not use the constructor. */ public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); SentenceModel model; try { SentenceModelResource modelResource = (SentenceModelResource) context .getResourceObject(UimaUtil.MODEL_PARAMETER); model = modelResource.getModel(); } catch (ResourceAccessException e) { throw new ResourceInitializationException(e); } sentenceDetector = new SentenceDetectorME(model); }
sentenceDetector = new SentenceDetectorME( new SentenceModel(new File(args[sentenceModelIndex])));
new SentenceDetectorME(model), errorListener);
/** * Starts the evaluation. * * @param samples * the data to train and test * @param nFolds * number of folds * * @throws IOException */ public void evaluate(ObjectStream<SentenceSample> samples, int nFolds) throws IOException { CrossValidationPartitioner<SentenceSample> partitioner = new CrossValidationPartitioner<>(samples, nFolds); while (partitioner.hasNext()) { CrossValidationPartitioner.TrainingSampleStream<SentenceSample> trainingSampleStream = partitioner.next(); SentenceModel model; model = SentenceDetectorME.train(languageCode, trainingSampleStream, sdFactory, params); // do testing SentenceDetectorEvaluator evaluator = new SentenceDetectorEvaluator( new SentenceDetectorME(model), listeners); evaluator.evaluate(trainingSampleStream.getTestSampleStream()); fmeasure.mergeInto(evaluator.getFMeasure()); } }
@Test public void evalSentenceModel() throws Exception { SentenceModel model = new SentenceModel( new File(getOpennlpDataDir(), "models-sf/en-sent.bin")); MessageDigest digest = MessageDigest.getInstance(HASH_ALGORITHM); SentenceDetector sentenceDetector = new SentenceDetectorME(model); StringBuilder text = new StringBuilder(); try (ObjectStream<LeipzigTestSample> lineBatches = new LeipzigTestSampleStream(25, SimpleTokenizer.INSTANCE, new MarkableFileInputStreamFactory(new File(getOpennlpDataDir(), "leipzig/eng_news_2010_300K-sentences.txt")))) { LeipzigTestSample lineBatch; while ((lineBatch = lineBatches.read()) != null) { text.append(String.join(" ", lineBatch.getText())).append(" "); } } String[] sentences = sentenceDetector.sentDetect(text.toString()); for (String sentence : sentences) { digest.update(sentence.getBytes(StandardCharsets.UTF_8)); } Assert.assertEquals(new BigInteger("228544068397077998410949364710969159291"), new BigInteger(1, digest.digest())); }
SentenceDetectorME sdetector = new SentenceDetectorME(model);
sentDetector = new SentenceDetectorME(new SentenceModel(params.getSentenceDetectorModel())); } catch (IOException e) { throw new TerminateToolException(-1, "Failed to load sentence detector model!", e);
@Override protected SentenceDetectorME produceResource(InputStream aStream) throws Exception { SentenceModel model = new SentenceModel(aStream); return new SentenceDetectorME(model); } };
/** * For serialization. */ public OpenNlpSentenceTokenizer() { sentenceModel = null; sentenceTokenizer = ThreadLocal.withInitial(() -> new SentenceDetectorME(this.sentenceModel)); readBufferSize = 1024; textBufferSize = 4096; }
public BratNameSampleStream(SentenceModel sentModel, TokenizerModel tokenModel, ObjectStream<BratDocument> samples) { super(samples); // TODO: We can pass in custom validators here ... this.parser = new BratDocumentParser(new SentenceDetectorME(sentModel), new TokenizerME(tokenModel)); }
@Override public List<RawSentence> tokenize( final String sentenceSource ) { if ( Strings.isNullOrEmpty(sentenceSource) ) { return Collections.emptyList(); } final Span[] sentencesStrings = new SentenceDetectorME(sentenceModel).sentPosDetect(sentenceSource); return Arrays.stream(sentencesStrings) .map(span -> new RawSentence(span.getCoveredText(sentenceSource).toString(), span.getStart(), span.getEnd())) .collect(Collectors.toList()); } }
public static SentenceDetector getDefaultSentenceDetector() throws IOException { return new SentenceDetectorME(new SentenceModel( getResourceAsStream(sentDetectorModelFile))); }
public static SentenceDetector getDefaultSentenceDetector() throws IOException { return new SentenceDetectorME(new SentenceModel( getResourceAsStream(sentDetectorModelFile))); }
/** * Builds an {@link ApacheExtractor} by instantiating the OpenNLP * Name Finder and Tokenizer. * * @throws IOException */ public ApacheExtractor() throws IOException { nameFinder = new NameFinderME(new TokenNameFinderModel(ApacheExtractor.class.getResourceAsStream(pathToNERModel))); tokenizer = new TokenizerME(new TokenizerModel(ApacheExtractor.class.getResourceAsStream(pathToTokenizerModel))); sentenceDetector = new SentenceDetectorME(new SentenceModel(ApacheExtractor.class.getResourceAsStream(pathToSentenceDetectorModel))); }
/** * Builds an {@link ApacheExtractor} by instantiating the OpenNLP * Name Finder and Tokenizer. * * @throws IOException */ public ApacheExtractor() throws IOException { nameFinder = new NameFinderME(new TokenNameFinderModel(ApacheExtractor.class.getResourceAsStream(pathToNERModel))); tokenizer = new TokenizerME(new TokenizerModel(ApacheExtractor.class.getResourceAsStream(pathToTokenizerModel))); sentenceDetector = new SentenceDetectorME(new SentenceModel(ApacheExtractor.class.getResourceAsStream(pathToSentenceDetectorModel))); }
SentenceDetectorME sentDetect = new SentenceDetectorME(sentdetectModel);