public SDContextGenerator createSentenceContextGenerator( Set<String> abbreviations, char[] customEOSCharacters) { return new DefaultSDContextGenerator(abbreviations, customEOSCharacters); }
public SDContextGenerator createSentenceContextGenerator(String languageCode, Set<String> abbreviations) { if ("th".equals(languageCode) || "tha".equals(languageCode)) { return new SentenceContextGenerator(); } else if ("pt".equals(languageCode) || "por".equals(languageCode)) { return new DefaultSDContextGenerator(abbreviations, ptEosCharacters); } return new DefaultSDContextGenerator(abbreviations, defaultEosCharacters); }
@Test public void testGetContextWithAbbreviations() throws Exception { SDContextGenerator sdContextGenerator = new DefaultSDContextGenerator(new HashSet<>(Arrays.asList("Mr./Inc.".split("/"))), Factory.defaultEosCharacters); String[] context = sdContextGenerator.getContext( "Mr. Smith joined RONDHUIT Inc. as a manager of sales department.", 2); Assert.assertArrayEquals("sn/eos=./x=Mr/2/xcap/xabbrev/v=/s=/n=Smith/ncap".split("/"), context); context = sdContextGenerator.getContext( "Mr. Smith joined RONDHUIT Inc. as a manager of sales department.", 29); Assert.assertArrayEquals("sn/eos=./x=Inc/3/xcap/xabbrev/v=RONDHUIT/vcap/s=/n=as".split("/"), context); } }
@Test public void testGetContext() throws Exception { SDContextGenerator sdContextGenerator = new DefaultSDContextGenerator(Collections.<String>emptySet(), Factory.defaultEosCharacters); String[] context = sdContextGenerator.getContext( "Mr. Smith joined RONDHUIT Inc. as a manager of sales department.", 2); Assert.assertArrayEquals("sn/eos=./x=Mr/2/xcap/v=/s=/n=Smith/ncap".split("/"), context); context = sdContextGenerator.getContext( "Mr. Smith joined RONDHUIT Inc. as a manager of sales department.", 29); Assert.assertArrayEquals("sn/eos=./x=Inc/3/xcap/v=RONDHUIT/vcap/s=/n=as".split("/"), context); }
public SDContextGenerator createSentenceContextGenerator( Set<String> abbreviations, char[] customEOSCharacters) { return new DefaultSDContextGenerator(abbreviations, customEOSCharacters); }
public SDContextGenerator createSentenceContextGenerator( Set<String> abbreviations, char[] customEOSCharacters) { return new DefaultSDContextGenerator(abbreviations, customEOSCharacters); }
public SDContextGenerator createSentenceContextGenerator(String languageCode, Set<String> abbreviations) { if ("th".equals(languageCode) || "tha".equals(languageCode)) { return new SentenceContextGenerator(); } else if ("pt".equals(languageCode) || "por".equals(languageCode)) { return new DefaultSDContextGenerator(abbreviations, ptEosCharacters); } return new DefaultSDContextGenerator(abbreviations, defaultEosCharacters); }
public SDContextGenerator createSentenceContextGenerator(String languageCode, Set<String> abbreviations) { if ("th".equals(languageCode) || "tha".equals(languageCode)) { return new SentenceContextGenerator(); } else if ("pt".equals(languageCode) || "por".equals(languageCode)) { return new DefaultSDContextGenerator(abbreviations, ptEosCharacters); } return new DefaultSDContextGenerator(abbreviations, defaultEosCharacters); }
@Override public void initialize(UimaContext aContext) throws ResourceInitializationException { super.initialize(aContext); try (InputStream is = FileLocator.getAsStream(sdModelPath)){ logger.info("Sentence detector model file: " + sdModelPath); sdmodel = new SentenceModel(is); EndOfSentenceScannerImpl eoss = new EndOfSentenceScannerImpl(); DefaultSDContextGenerator cg = new DefaultSDContextGenerator(eoss.getEndOfSentenceCharacters()); sentenceDetector = new SentenceDetectorCtakes(sdmodel.getMaxentModel(), cg, eoss); skipSegmentsSet = new HashSet<>(); if(skipSegmentsArray != null){ Collections.addAll(skipSegmentsSet, skipSegmentsArray); } } catch (IOException e) { e.printStackTrace(); throw new ResourceInitializationException(e); } }
@Override public void initialize(UimaContext aContext) throws ResourceInitializationException { super.initialize(aContext); try (InputStream is = FileLocator.getAsStream(sdModelPath)){ logger.info("Sentence detector model file: " + sdModelPath); sdmodel = new SentenceModel(is); EndOfSentenceScannerImpl eoss = new EndOfSentenceScannerImpl(); DefaultSDContextGenerator cg = new DefaultSDContextGenerator(eoss.getEndOfSentenceCharacters()); sentenceDetector = new SentenceDetectorCtakes(sdmodel.getMaxentModel(), cg, eoss); skipSegmentsSet = new HashSet<>(); if(skipSegmentsArray != null){ Collections.addAll(skipSegmentsSet, skipSegmentsArray); } } catch (IOException e) { e.printStackTrace(); throw new ResourceInitializationException(e); } }
/** * Reads configuration parameters. * * @throws ResourceAccessException * @throws IOException * @throws InvalidFormatException */ private void configInit() throws ResourceAccessException, InvalidFormatException, IOException { String sdModelPath = (String) context .getConfigParameterValue(SD_MODEL_FILE_PARAM); InputStream is = FileLocator.getAsStream(sdModelPath); logger.info("Sentence detector model file: " + sdModelPath); sdmodel = new SentenceModel(is); is.close(); EndOfSentenceScannerImpl eoss = new EndOfSentenceScannerImpl(); char[] eosc = eoss.getEndOfSentenceCharacters(); // SentenceDContextGenerator cg = new SentenceDContextGenerator(); DefaultSDContextGenerator cg = new DefaultSDContextGenerator(eosc); sentenceDetector = new SentenceDetectorCtakes(sdmodel.getMaxentModel(), cg, eoss); skipSegmentsSet = ParamUtil.getStringParameterValuesSet( PARAM_SEGMENTS_TO_SKIP, context); // vng change begin paragraphPattern = compilePatternCheck("paragraphPattern", PARAGRAPH_PATTERN); splitPattern = compilePatternCheck("splitPattern", SPLIT_PATTERN); periodPattern = compilePatternCheck("periodPattern", PERIOD_PATTERN); acronymPattern = compilePatternCheck("acronymPattern", ACRONYM_PATTERN); // vng change end } /**
/** * Reads configuration parameters. * * @throws ResourceAccessException * @throws IOException * @throws InvalidFormatException */ private void configInit() throws ResourceAccessException, InvalidFormatException, IOException { String sdModelPath = (String) context .getConfigParameterValue(SD_MODEL_FILE_PARAM); InputStream is = FileLocator.getAsStream(sdModelPath); logger.info("Sentence detector model file: " + sdModelPath); sdmodel = new SentenceModel(is); is.close(); EndOfSentenceScannerImpl eoss = new EndOfSentenceScannerImpl(); char[] eosc = eoss.getEndOfSentenceCharacters(); // SentenceDContextGenerator cg = new SentenceDContextGenerator(); DefaultSDContextGenerator cg = new DefaultSDContextGenerator(eosc); sentenceDetector = new SentenceDetectorCtakes(sdmodel.getMaxentModel(), cg, eoss); skipSegmentsSet = ParamUtil.getStringParameterValuesSet( PARAM_SEGMENTS_TO_SKIP, context); // vng change begin paragraphPattern = compilePatternCheck("paragraphPattern", PARAGRAPH_PATTERN); splitPattern = compilePatternCheck("splitPattern", SPLIT_PATTERN); periodPattern = compilePatternCheck("periodPattern", PERIOD_PATTERN); acronymPattern = compilePatternCheck("acronymPattern", ACRONYM_PATTERN); // vng change end } /**