private static SentenceModel train(SentenceDetectorFactory factory) throws IOException { return SentenceDetectorME.train("eng", createSampleStream(), factory, TrainingParameters.defaultParams()); }
@Test public void testDefault() throws IOException { Dictionary dic = loadAbbDictionary(); char[] eos = {'.', '?'}; SentenceModel sdModel = train(new SentenceDetectorFactory("eng", true, dic, eos)); SentenceDetectorFactory factory = sdModel.getFactory(); Assert.assertTrue(factory.getSDContextGenerator() instanceof DefaultSDContextGenerator); Assert.assertTrue(factory.getEndOfSentenceScanner() instanceof DefaultEndOfSentenceScanner); Assert.assertTrue(Arrays.equals(eos, factory.getEOSCharacters())); ByteArrayOutputStream out = new ByteArrayOutputStream(); sdModel.serialize(out); ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); SentenceModel fromSerialized = new SentenceModel(in); factory = fromSerialized.getFactory(); Assert.assertTrue(factory.getSDContextGenerator() instanceof DefaultSDContextGenerator); Assert.assertTrue(factory.getEndOfSentenceScanner() instanceof DefaultEndOfSentenceScanner); Assert.assertTrue(Arrays.equals(eos, factory.getEOSCharacters())); }
@Test public void testNullDict() throws IOException { Dictionary dic = null; char[] eos = {'.', '?'}; SentenceModel sdModel = train(new SentenceDetectorFactory("eng", true, dic, eos)); SentenceDetectorFactory factory = sdModel.getFactory(); Assert.assertNull(factory.getAbbreviationDictionary()); Assert.assertTrue(factory.getSDContextGenerator() instanceof DefaultSDContextGenerator); Assert.assertTrue(factory.getEndOfSentenceScanner() instanceof DefaultEndOfSentenceScanner); Assert.assertTrue(Arrays.equals(eos, factory.getEOSCharacters())); ByteArrayOutputStream out = new ByteArrayOutputStream(); sdModel.serialize(out); ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); SentenceModel fromSerialized = new SentenceModel(in); factory = fromSerialized.getFactory(); Assert.assertNull(factory.getAbbreviationDictionary()); Assert.assertTrue(factory.getSDContextGenerator() instanceof DefaultSDContextGenerator); Assert.assertTrue(factory.getEndOfSentenceScanner() instanceof DefaultEndOfSentenceScanner); Assert.assertTrue(Arrays.equals(eos, factory.getEOSCharacters())); }
@Test public void testCreateDummyFactory() throws IOException { Dictionary dic = loadAbbDictionary(); char[] eos = {'.', '?'}; SentenceDetectorFactory factory = SentenceDetectorFactory.create( DummySentenceDetectorFactory.class.getCanonicalName(), "spa", false, dic, eos); Assert.assertTrue(factory.getAbbreviationDictionary() instanceof DummyDictionary); Assert.assertTrue(factory.getSDContextGenerator() instanceof DummySDContextGenerator); Assert.assertTrue(factory.getEndOfSentenceScanner() instanceof DummyEOSScanner); Assert.assertTrue(Arrays.equals(eos, factory.getEOSCharacters())); }
@Test public void testDummyFactory() throws IOException { Dictionary dic = loadAbbDictionary(); char[] eos = {'.', '?'}; SentenceModel sdModel = train(new DummySentenceDetectorFactory("eng", true, dic, eos)); SentenceDetectorFactory factory = sdModel.getFactory(); Assert.assertTrue(factory.getAbbreviationDictionary() instanceof DummyDictionary); Assert.assertTrue(factory.getSDContextGenerator() instanceof DummySDContextGenerator); Assert.assertTrue(factory.getEndOfSentenceScanner() instanceof DummyEOSScanner); Assert.assertTrue(Arrays.equals(eos, factory.getEOSCharacters())); ByteArrayOutputStream out = new ByteArrayOutputStream(); sdModel.serialize(out); ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); SentenceModel fromSerialized = new SentenceModel(in); factory = fromSerialized.getFactory(); Assert.assertTrue(factory.getAbbreviationDictionary() instanceof DummyDictionary); Assert.assertTrue(factory.getSDContextGenerator() instanceof DummySDContextGenerator); Assert.assertTrue(factory.getEndOfSentenceScanner() instanceof DummyEOSScanner); Assert.assertTrue(Arrays.equals(eos, factory.getEOSCharacters())); Assert.assertEquals(factory.getAbbreviationDictionary(), sdModel.getAbbreviations()); Assert.assertTrue(Arrays.equals(factory.getEOSCharacters(), sdModel.getEosCharacters())); }
@Test public void testDefaultEOS() throws IOException { Dictionary dic = null; char[] eos = null; SentenceModel sdModel = train(new SentenceDetectorFactory("eng", true, dic, eos)); SentenceDetectorFactory factory = sdModel.getFactory(); Assert.assertNull(factory.getAbbreviationDictionary()); Assert.assertTrue(factory.getSDContextGenerator() instanceof DefaultSDContextGenerator); Assert.assertTrue(factory.getEndOfSentenceScanner() instanceof DefaultEndOfSentenceScanner); Assert.assertTrue(Arrays.equals(Factory.defaultEosCharacters, factory.getEOSCharacters())); ByteArrayOutputStream out = new ByteArrayOutputStream(); sdModel.serialize(out); ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); SentenceModel fromSerialized = new SentenceModel(in); factory = fromSerialized.getFactory(); Assert.assertNull(factory.getAbbreviationDictionary()); Assert.assertTrue(factory.getSDContextGenerator() instanceof DefaultSDContextGenerator); Assert.assertTrue(factory.getEndOfSentenceScanner() instanceof DefaultEndOfSentenceScanner); Assert.assertTrue(Arrays.equals(Factory.defaultEosCharacters, factory.getEOSCharacters())); }