public Dictionary getAbbreviations() { if (getFactory() != null) { return getFactory().getAbbreviationDictionary(); } return null; }
public SDContextGenerator getSDContextGenerator() { Factory f = new Factory(); char[] eosChars = getEOSCharacters(); Set<String> abbs; Dictionary abbDict = getAbbreviationDictionary(); if (abbDict != null) { abbs = abbDict.asStringSet(); } else { abbs = Collections.emptySet(); } if (eosChars != null && eosChars.length > 0) { return f.createSentenceContextGenerator(abbs, eosChars); } else { return f.createSentenceContextGenerator(this.languageCode, abbs); } }
@Test public void testCreateDummyFactory() throws IOException { Dictionary dic = loadAbbDictionary(); char[] eos = {'.', '?'}; SentenceDetectorFactory factory = SentenceDetectorFactory.create( DummySentenceDetectorFactory.class.getCanonicalName(), "spa", false, dic, eos); Assert.assertTrue(factory.getAbbreviationDictionary() instanceof DummyDictionary); Assert.assertTrue(factory.getSDContextGenerator() instanceof DummySDContextGenerator); Assert.assertTrue(factory.getEndOfSentenceScanner() instanceof DummyEOSScanner); Assert.assertTrue(Arrays.equals(eos, factory.getEOSCharacters())); }
@Test public void testDummyFactory() throws IOException { Dictionary dic = loadAbbDictionary(); char[] eos = {'.', '?'}; SentenceModel sdModel = train(new DummySentenceDetectorFactory("eng", true, dic, eos)); SentenceDetectorFactory factory = sdModel.getFactory(); Assert.assertTrue(factory.getAbbreviationDictionary() instanceof DummyDictionary); Assert.assertTrue(factory.getSDContextGenerator() instanceof DummySDContextGenerator); Assert.assertTrue(factory.getEndOfSentenceScanner() instanceof DummyEOSScanner); Assert.assertTrue(Arrays.equals(eos, factory.getEOSCharacters())); ByteArrayOutputStream out = new ByteArrayOutputStream(); sdModel.serialize(out); ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); SentenceModel fromSerialized = new SentenceModel(in); factory = fromSerialized.getFactory(); Assert.assertTrue(factory.getAbbreviationDictionary() instanceof DummyDictionary); Assert.assertTrue(factory.getSDContextGenerator() instanceof DummySDContextGenerator); Assert.assertTrue(factory.getEndOfSentenceScanner() instanceof DummyEOSScanner); Assert.assertTrue(Arrays.equals(eos, factory.getEOSCharacters())); Assert.assertEquals(factory.getAbbreviationDictionary(), sdModel.getAbbreviations()); Assert.assertTrue(Arrays.equals(factory.getEOSCharacters(), sdModel.getEosCharacters())); }
@Test public void testNullDict() throws IOException { Dictionary dic = null; char[] eos = {'.', '?'}; SentenceModel sdModel = train(new SentenceDetectorFactory("eng", true, dic, eos)); SentenceDetectorFactory factory = sdModel.getFactory(); Assert.assertNull(factory.getAbbreviationDictionary()); Assert.assertTrue(factory.getSDContextGenerator() instanceof DefaultSDContextGenerator); Assert.assertTrue(factory.getEndOfSentenceScanner() instanceof DefaultEndOfSentenceScanner); Assert.assertTrue(Arrays.equals(eos, factory.getEOSCharacters())); ByteArrayOutputStream out = new ByteArrayOutputStream(); sdModel.serialize(out); ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); SentenceModel fromSerialized = new SentenceModel(in); factory = fromSerialized.getFactory(); Assert.assertNull(factory.getAbbreviationDictionary()); Assert.assertTrue(factory.getSDContextGenerator() instanceof DefaultSDContextGenerator); Assert.assertTrue(factory.getEndOfSentenceScanner() instanceof DefaultEndOfSentenceScanner); Assert.assertTrue(Arrays.equals(eos, factory.getEOSCharacters())); }
@Test public void testDefaultEOS() throws IOException { Dictionary dic = null; char[] eos = null; SentenceModel sdModel = train(new SentenceDetectorFactory("eng", true, dic, eos)); SentenceDetectorFactory factory = sdModel.getFactory(); Assert.assertNull(factory.getAbbreviationDictionary()); Assert.assertTrue(factory.getSDContextGenerator() instanceof DefaultSDContextGenerator); Assert.assertTrue(factory.getEndOfSentenceScanner() instanceof DefaultEndOfSentenceScanner); Assert.assertTrue(Arrays.equals(Factory.defaultEosCharacters, factory.getEOSCharacters())); ByteArrayOutputStream out = new ByteArrayOutputStream(); sdModel.serialize(out); ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); SentenceModel fromSerialized = new SentenceModel(in); factory = fromSerialized.getFactory(); Assert.assertNull(factory.getAbbreviationDictionary()); Assert.assertTrue(factory.getSDContextGenerator() instanceof DefaultSDContextGenerator); Assert.assertTrue(factory.getEndOfSentenceScanner() instanceof DefaultEndOfSentenceScanner); Assert.assertTrue(Arrays.equals(Factory.defaultEosCharacters, factory.getEOSCharacters())); }
public Dictionary getAbbreviations() { if (getFactory() != null) { return getFactory().getAbbreviationDictionary(); } return null; }
public Dictionary getAbbreviations() { if (getFactory() != null) { return getFactory().getAbbreviationDictionary(); } return null; }
public SDContextGenerator getSDContextGenerator() { Factory f = new Factory(); char[] eosChars = getEOSCharacters(); Set<String> abbs; Dictionary abbDict = getAbbreviationDictionary(); if (abbDict != null) { abbs = abbDict.asStringSet(); } else { abbs = Collections.emptySet(); } if (eosChars != null && eosChars.length > 0) { return f.createSentenceContextGenerator(abbs, eosChars); } else { return f.createSentenceContextGenerator(this.languageCode, abbs); } }
public SDContextGenerator getSDContextGenerator() { Factory f = new Factory(); char[] eosChars = getEOSCharacters(); Set<String> abbs; Dictionary abbDict = getAbbreviationDictionary(); if (abbDict != null) { abbs = abbDict.asStringSet(); } else { abbs = Collections.emptySet(); } if (eosChars != null && eosChars.length > 0) { return f.createSentenceContextGenerator(abbs, eosChars); } else { return f.createSentenceContextGenerator(this.languageCode, abbs); } }