/** * @deprecated Use * {@link #SDCrossValidator(String, TrainingParameters, * SentenceDetectorFactory, SentenceDetectorEvaluationMonitor...)} * and pass in a {@link SentenceDetectorFactory}. */ public SDCrossValidator(String languageCode, TrainingParameters params) { this(languageCode, params, new SentenceDetectorFactory(languageCode, true, null, null)); }
@Test public void testCreateDummyFactory() throws IOException { Dictionary dic = loadAbbDictionary(); char[] eos = {'.', '?'}; SentenceDetectorFactory factory = SentenceDetectorFactory.create( DummySentenceDetectorFactory.class.getCanonicalName(), "spa", false, dic, eos); Assert.assertTrue(factory.getAbbreviationDictionary() instanceof DummyDictionary); Assert.assertTrue(factory.getSDContextGenerator() instanceof DummySDContextGenerator); Assert.assertTrue(factory.getEndOfSentenceScanner() instanceof DummyEOSScanner); Assert.assertTrue(Arrays.equals(eos, factory.getEOSCharacters())); }
public static SentenceDetectorFactory create(String subclassName, String languageCode, boolean useTokenEnd, Dictionary abbreviationDictionary, char[] eosCharacters) throws InvalidFormatException { if (subclassName == null) { // will create the default factory return new SentenceDetectorFactory(languageCode, useTokenEnd, abbreviationDictionary, eosCharacters); } try { SentenceDetectorFactory theFactory = ExtensionLoader .instantiateExtension(SentenceDetectorFactory.class, subclassName); theFactory.init(languageCode, useTokenEnd, abbreviationDictionary, eosCharacters); return theFactory; } catch (Exception e) { String msg = "Could not instantiate the " + subclassName + ". The initialization throw an exception."; System.err.println(msg); e.printStackTrace(); throw new InvalidFormatException(msg, e); } }
@Override public Map<String, String> createManifestEntries() { Map<String, String> manifestEntries = super.createManifestEntries(); manifestEntries.put(TOKEN_END_PROPERTY, Boolean.toString(isUseTokenEnd())); // EOS characters are optional if (getEOSCharacters() != null) manifestEntries.put(EOS_CHARACTERS_PROPERTY, eosCharArrayToString(getEOSCharacters())); return manifestEntries; }
/** * Initializes the current instance. * * @param model the {@link SentenceModel} */ public SentenceDetectorME(SentenceModel model) { SentenceDetectorFactory sdFactory = model.getFactory(); this.model = model.getMaxentModel(); cgen = sdFactory.getSDContextGenerator(); scanner = sdFactory.getEndOfSentenceScanner(); useTokenEnd = sdFactory.isUseTokenEnd(); }
SentenceDetectorFactory sdFactory = SentenceDetectorFactory.create( params.getFactory(), params.getLang(), true, abbreviations, eos); validator = new SDCrossValidator(params.getLang(), mlParams, sdFactory,
public static SentenceModel train(String languageCode, ObjectStream<SentenceSample> samples, SentenceDetectorFactory sdFactory, TrainingParameters mlParams) throws IOException { Map<String, String> manifestInfoEntries = new HashMap<>(); // TODO: Fix the EventStream to throw exceptions when training goes wrong ObjectStream<Event> eventStream = new SDEventStream(samples, sdFactory.getSDContextGenerator(), sdFactory.getEndOfSentenceScanner()); EventTrainer trainer = TrainerFactory.getEventTrainer(mlParams, manifestInfoEntries); MaxentModel sentModel = trainer.train(eventStream); return new SentenceModel(languageCode, sentModel, manifestInfoEntries, sdFactory); }
public SDContextGenerator getSDContextGenerator() { Factory f = new Factory(); char[] eosChars = getEOSCharacters(); Set<String> abbs; Dictionary abbDict = getAbbreviationDictionary(); if (abbDict != null) { abbs = abbDict.asStringSet(); } else { abbs = Collections.emptySet(); } if (eosChars != null && eosChars.length > 0) { return f.createSentenceContextGenerator(abbs, eosChars); } else { return f.createSentenceContextGenerator(this.languageCode, abbs); } }
public char[] getEosCharacters() { if (getFactory() != null) { return getFactory().getEOSCharacters(); } return null; } }
/** * Creates a {@link SentenceDetectorFactory}. Use this constructor to * programmatically create a factory. * * @param languageCode * @param abbreviationDictionary * @param eosCharacters */ public SentenceDetectorFactory(String languageCode, boolean useTokenEnd, Dictionary abbreviationDictionary, char[] eosCharacters) { this.init(languageCode, useTokenEnd, abbreviationDictionary, eosCharacters); }
public Dictionary getAbbreviations() { if (getFactory() != null) { return getFactory().getAbbreviationDictionary(); } return null; }
public char[] getEOSCharacters() { if (this.eosCharacters == null) { if (artifactProvider != null) { String prop = this.artifactProvider .getManifestProperty(EOS_CHARACTERS_PROPERTY); if (prop != null) { this.eosCharacters = eosStringToCharArray(prop); } } else { // get from language dependent factory Factory f = new Factory(); this.eosCharacters = f.getEOSCharacters(languageCode); } } return this.eosCharacters; }
/** * Initializes the current instance. * * @param model the {@link SentenceModel} */ public SentenceDetectorME(SentenceModel model) { SentenceDetectorFactory sdFactory = model.getFactory(); this.model = model.getMaxentModel(); cgen = sdFactory.getSDContextGenerator(); scanner = sdFactory.getEndOfSentenceScanner(); useTokenEnd = sdFactory.isUseTokenEnd(); }
@Override public Map<String, String> createManifestEntries() { Map<String, String> manifestEntries = super.createManifestEntries(); manifestEntries.put(TOKEN_END_PROPERTY, Boolean.toString(isUseTokenEnd())); // EOS characters are optional if (getEOSCharacters() != null) manifestEntries.put(EOS_CHARACTERS_PROPERTY, eosCharArrayToString(getEOSCharacters())); return manifestEntries; }
SentenceDetectorFactory sdFactory = SentenceDetectorFactory.create( params.getFactory(), params.getLang(), true, dict, eos); model = SentenceDetectorME.train(params.getLang(), sampleStream,
public static SentenceModel train(String languageCode, ObjectStream<SentenceSample> samples, SentenceDetectorFactory sdFactory, TrainingParameters mlParams) throws IOException { Map<String, String> manifestInfoEntries = new HashMap<>(); // TODO: Fix the EventStream to throw exceptions when training goes wrong ObjectStream<Event> eventStream = new SDEventStream(samples, sdFactory.getSDContextGenerator(), sdFactory.getEndOfSentenceScanner()); EventTrainer trainer = TrainerFactory.getEventTrainer(mlParams, manifestInfoEntries); MaxentModel sentModel = trainer.train(eventStream); return new SentenceModel(languageCode, sentModel, manifestInfoEntries, sdFactory); }
public SDContextGenerator getSDContextGenerator() { Factory f = new Factory(); char[] eosChars = getEOSCharacters(); Set<String> abbs; Dictionary abbDict = getAbbreviationDictionary(); if (abbDict != null) { abbs = abbDict.asStringSet(); } else { abbs = Collections.emptySet(); } if (eosChars != null && eosChars.length > 0) { return f.createSentenceContextGenerator(abbs, eosChars); } else { return f.createSentenceContextGenerator(this.languageCode, abbs); } }
public EndOfSentenceScanner getEndOfSentenceScanner() { Factory f = new Factory(); char[] eosChars = getEOSCharacters(); if (eosChars != null && eosChars.length > 0) { return f.createEndOfSentenceScanner(eosChars); } else { return f.createEndOfSentenceScanner(this.languageCode); } }
@Override protected void init(String languageCode, boolean useTokenEnd, Dictionary abbreviationDictionary, char[] eosCharacters) { super.init(languageCode, useTokenEnd, abbreviationDictionary, eosCharacters); this.dict = new DummyDictionary(abbreviationDictionary); }
public Dictionary getAbbreviations() { if (getFactory() != null) { return getFactory().getAbbreviationDictionary(); } return null; }