private Span[] testOpenNLPPosition(String text) throws Exception { try (InputStream modelIn = this.getClass().getResourceAsStream(RESOURCES_EN_SENT_BIN)) { SentenceModel model = new SentenceModel(modelIn); SentenceDetectorME sentenceDetector = new SentenceDetectorME(model); return sentenceDetector.sentPosDetect(text); } }
private String[] testOpenNLP(String text) throws Exception { try (InputStream modelIn = this.getClass().getResourceAsStream(RESOURCES_EN_SENT_BIN)) { SentenceModel model = new SentenceModel(modelIn); SentenceDetectorME sentenceDetector = new SentenceDetectorME(model); return sentenceDetector.sentDetect(text); } }
public char[] getEosCharacters() { if (getFactory() != null) { return getFactory().getEOSCharacters(); } return null; } }
@Override protected SentenceModel loadModel(InputStream modelIn) throws IOException, InvalidFormatException { return new SentenceModel(modelIn); }
@Override protected SentenceSample processSample(SentenceSample sample) { Span[] predictions = trimSpans(sample.getDocument(), sentenceDetector.sentPosDetect(sample.getDocument())); Span[] references = trimSpans(sample.getDocument(), sample.getSentences()); fmeasure.updateScores(references, predictions); return new SentenceSample(sample.getDocument(), predictions); }
/** * Initializes the current instance. * * @param model the {@link SentenceModel} */ public SentenceDetectorME(SentenceModel model) { SentenceDetectorFactory sdFactory = model.getFactory(); this.model = model.getMaxentModel(); cgen = sdFactory.getSDContextGenerator(); scanner = sdFactory.getEndOfSentenceScanner(); useTokenEnd = sdFactory.isUseTokenEnd(); }
/** * @deprecated Use * {@link #train(String, ObjectStream, SentenceDetectorFactory, TrainingParameters)} * and pass in af {@link SentenceDetectorFactory}. */ public static SentenceModel train(String languageCode, ObjectStream<SentenceSample> samples, boolean useTokenEnd, Dictionary abbreviations, TrainingParameters mlParams) throws IOException { SentenceDetectorFactory sdFactory = new SentenceDetectorFactory( languageCode, useTokenEnd, abbreviations, null); return train(languageCode, samples, sdFactory, mlParams); }
/** * @deprecated Use * {@link #SDCrossValidator(String, TrainingParameters, * SentenceDetectorFactory, SentenceDetectorEvaluationMonitor...)} * and pass in a {@link SentenceDetectorFactory}. */ public SDCrossValidator(String languageCode, TrainingParameters params) { this(languageCode, params, new SentenceDetectorFactory(languageCode, true, null, null)); }
private static SentenceModel train(SentenceDetectorFactory factory) throws IOException { return SentenceDetectorME.train("eng", createSampleStream(), factory, TrainingParameters.defaultParams()); }
public Dictionary getAbbreviations() { if (getFactory() != null) { return getFactory().getAbbreviationDictionary(); } return null; }
public boolean useTokenEnd() { return getFactory() == null || getFactory().isUseTokenEnd(); }
@Override public SDContextGenerator getSDContextGenerator() { return new DummySDContextGenerator(getAbbreviationDictionary() .asStringSet(), getEOSCharacters()); }
@Override protected void init(String languageCode, boolean useTokenEnd, Dictionary abbreviationDictionary, char[] eosCharacters) { super.init(languageCode, useTokenEnd, abbreviationDictionary, eosCharacters); this.dict = new DummyDictionary(abbreviationDictionary); }
/** * Creates a {@link SentenceDetectorFactory}. Use this constructor to * programmatically create a factory. * * @param languageCode * @param abbreviationDictionary * @param eosCharacters */ public SentenceDetectorFactory(String languageCode, boolean useTokenEnd, Dictionary abbreviationDictionary, char[] eosCharacters) { this.init(languageCode, useTokenEnd, abbreviationDictionary, eosCharacters); }
@Override public EndOfSentenceScanner getEndOfSentenceScanner() { return new DummyEOSScanner(getEOSCharacters()); }
@Override @SuppressWarnings("rawtypes") public Map<String, ArtifactSerializer> createArtifactSerializersMap() { Map<String, ArtifactSerializer> serializers = super.createArtifactSerializersMap(); serializers.put(DUMMY_DICT, new DummyDictionarySerializer()); return serializers; }
public void serialize(DummyDictionary artifact, OutputStream out) throws IOException { artifact.serialize(out); } }
@Override protected SentenceModel loadModel(InputStream in) throws IOException { return new SentenceModel(in); } }
/** * TODO: was added in 1.5.3 -> remove * @deprecated Use * {@link #SentenceModel(String, MaxentModel, Map, SentenceDetectorFactory)} * instead and pass in a {@link SentenceDetectorFactory} */ public SentenceModel(String languageCode, MaxentModel sentModel, boolean useTokenEnd, Dictionary abbreviations, char[] eosCharacters, Map<String, String> manifestInfoEntries) { this(languageCode, sentModel, manifestInfoEntries, new SentenceDetectorFactory(languageCode, useTokenEnd, abbreviations, eosCharacters)); }
/** * @deprecated use * {@link #SDCrossValidator(String, TrainingParameters, SentenceDetectorFactory, * SentenceDetectorEvaluationMonitor...)} * instead and pass in a TrainingParameters object. */ public SDCrossValidator(String languageCode, TrainingParameters params, SentenceDetectorEvaluationMonitor... listeners) { this(languageCode, params, new SentenceDetectorFactory(languageCode, true, null, null), listeners); }