@Override protected void validateArtifactMap() throws InvalidFormatException { super.validateArtifactMap(); if (!(artifactMap.get(MAXENT_MODEL_ENTRY_NAME) instanceof MaxentModel)) { throw new InvalidFormatException("Unable to find " + MAXENT_MODEL_ENTRY_NAME + " maxent model!"); } if (!ModelUtil.validateOutcomes(getMaxentModel(), SentenceDetectorME.SPLIT, SentenceDetectorME.NO_SPLIT)) { throw new InvalidFormatException("The maxent model is not compatible " + "with the sentence detector!"); } }
/** * Initializes the current instance. * * @param model the {@link SentenceModel} */ public SentenceDetectorME(SentenceModel model) { SentenceDetectorFactory sdFactory = model.getFactory(); this.model = model.getMaxentModel(); cgen = sdFactory.getSDContextGenerator(); scanner = sdFactory.getEndOfSentenceScanner(); useTokenEnd = sdFactory.isUseTokenEnd(); }
/** * @deprecated Use a {@link SentenceDetectorFactory} to extend * SentenceDetector functionality. */ public SentenceDetectorME(SentenceModel model, Factory factory) { this.model = model.getMaxentModel(); // if the model has custom EOS characters set, use this to get the context // generator and the EOS scanner; otherwise use language-specific defaults char[] customEOSCharacters = model.getEosCharacters(); if (customEOSCharacters == null) { cgen = factory.createSentenceContextGenerator(model.getLanguage(), getAbbreviations(model.getAbbreviations())); scanner = factory.createEndOfSentenceScanner(model.getLanguage()); } else { cgen = factory.createSentenceContextGenerator( getAbbreviations(model.getAbbreviations()), customEOSCharacters); scanner = factory.createEndOfSentenceScanner(customEOSCharacters); } useTokenEnd = model.useTokenEnd(); }
@Override protected void validateArtifactMap() throws InvalidFormatException { super.validateArtifactMap(); if (!(artifactMap.get(MAXENT_MODEL_ENTRY_NAME) instanceof MaxentModel)) { throw new InvalidFormatException("Unable to find " + MAXENT_MODEL_ENTRY_NAME + " maxent model!"); } if (!ModelUtil.validateOutcomes(getMaxentModel(), SentenceDetectorME.SPLIT, SentenceDetectorME.NO_SPLIT)) { throw new InvalidFormatException("The maxent model is not compatible " + "with the sentence detector!"); } }
@Override protected void validateArtifactMap() throws InvalidFormatException { super.validateArtifactMap(); if (!(artifactMap.get(MAXENT_MODEL_ENTRY_NAME) instanceof MaxentModel)) { throw new InvalidFormatException("Unable to find " + MAXENT_MODEL_ENTRY_NAME + " maxent model!"); } if (!ModelUtil.validateOutcomes(getMaxentModel(), SentenceDetectorME.SPLIT, SentenceDetectorME.NO_SPLIT)) { throw new InvalidFormatException("The maxent model is not compatible " + "with the sentence detector!"); } }
@Override public void initialize(UimaContext aContext) throws ResourceInitializationException { super.initialize(aContext); try (InputStream is = FileLocator.getAsStream(sdModelPath)){ logger.info("Sentence detector model file: " + sdModelPath); sdmodel = new SentenceModel(is); EndOfSentenceScannerImpl eoss = new EndOfSentenceScannerImpl(); DefaultSDContextGenerator cg = new DefaultSDContextGenerator(eoss.getEndOfSentenceCharacters()); sentenceDetector = new SentenceDetectorCtakes(sdmodel.getMaxentModel(), cg, eoss); skipSegmentsSet = new HashSet<>(); if(skipSegmentsArray != null){ Collections.addAll(skipSegmentsSet, skipSegmentsArray); } } catch (IOException e) { e.printStackTrace(); throw new ResourceInitializationException(e); } }
@Override public void initialize(UimaContext aContext) throws ResourceInitializationException { super.initialize(aContext); try (InputStream is = FileLocator.getAsStream(sdModelPath)){ logger.info("Sentence detector model file: " + sdModelPath); sdmodel = new SentenceModel(is); EndOfSentenceScannerImpl eoss = new EndOfSentenceScannerImpl(); DefaultSDContextGenerator cg = new DefaultSDContextGenerator(eoss.getEndOfSentenceCharacters()); sentenceDetector = new SentenceDetectorCtakes(sdmodel.getMaxentModel(), cg, eoss); skipSegmentsSet = new HashSet<>(); if(skipSegmentsArray != null){ Collections.addAll(skipSegmentsSet, skipSegmentsArray); } } catch (IOException e) { e.printStackTrace(); throw new ResourceInitializationException(e); } }
/** * Initializes the current instance. * * @param model the {@link SentenceModel} */ public SentenceDetectorME(SentenceModel model) { SentenceDetectorFactory sdFactory = model.getFactory(); this.model = model.getMaxentModel(); cgen = sdFactory.getSDContextGenerator(); scanner = sdFactory.getEndOfSentenceScanner(); useTokenEnd = sdFactory.isUseTokenEnd(); }
/** * Initializes the current instance. * * @param model the {@link SentenceModel} */ public SentenceDetectorME(SentenceModel model) { SentenceDetectorFactory sdFactory = model.getFactory(); this.model = model.getMaxentModel(); cgen = sdFactory.getSDContextGenerator(); scanner = sdFactory.getEndOfSentenceScanner(); useTokenEnd = sdFactory.isUseTokenEnd(); }
/** * Reads configuration parameters. * * @throws ResourceAccessException * @throws IOException * @throws InvalidFormatException */ private void configInit() throws ResourceAccessException, InvalidFormatException, IOException { String sdModelPath = (String) context .getConfigParameterValue(SD_MODEL_FILE_PARAM); InputStream is = FileLocator.getAsStream(sdModelPath); logger.info("Sentence detector model file: " + sdModelPath); sdmodel = new SentenceModel(is); is.close(); EndOfSentenceScannerImpl eoss = new EndOfSentenceScannerImpl(); char[] eosc = eoss.getEndOfSentenceCharacters(); // SentenceDContextGenerator cg = new SentenceDContextGenerator(); DefaultSDContextGenerator cg = new DefaultSDContextGenerator(eosc); sentenceDetector = new SentenceDetectorCtakes(sdmodel.getMaxentModel(), cg, eoss); skipSegmentsSet = ParamUtil.getStringParameterValuesSet( PARAM_SEGMENTS_TO_SKIP, context); // vng change begin paragraphPattern = compilePatternCheck("paragraphPattern", PARAGRAPH_PATTERN); splitPattern = compilePatternCheck("splitPattern", SPLIT_PATTERN); periodPattern = compilePatternCheck("periodPattern", PERIOD_PATTERN); acronymPattern = compilePatternCheck("acronymPattern", ACRONYM_PATTERN); // vng change end } /**
/** * Reads configuration parameters. * * @throws ResourceAccessException * @throws IOException * @throws InvalidFormatException */ private void configInit() throws ResourceAccessException, InvalidFormatException, IOException { String sdModelPath = (String) context .getConfigParameterValue(SD_MODEL_FILE_PARAM); InputStream is = FileLocator.getAsStream(sdModelPath); logger.info("Sentence detector model file: " + sdModelPath); sdmodel = new SentenceModel(is); is.close(); EndOfSentenceScannerImpl eoss = new EndOfSentenceScannerImpl(); char[] eosc = eoss.getEndOfSentenceCharacters(); // SentenceDContextGenerator cg = new SentenceDContextGenerator(); DefaultSDContextGenerator cg = new DefaultSDContextGenerator(eosc); sentenceDetector = new SentenceDetectorCtakes(sdmodel.getMaxentModel(), cg, eoss); skipSegmentsSet = ParamUtil.getStringParameterValuesSet( PARAM_SEGMENTS_TO_SKIP, context); // vng change begin paragraphPattern = compilePatternCheck("paragraphPattern", PARAGRAPH_PATTERN); splitPattern = compilePatternCheck("splitPattern", SPLIT_PATTERN); periodPattern = compilePatternCheck("periodPattern", PERIOD_PATTERN); acronymPattern = compilePatternCheck("acronymPattern", ACRONYM_PATTERN); // vng change end } /**
/** * @deprecated Use a {@link SentenceDetectorFactory} to extend * SentenceDetector functionality. */ public SentenceDetectorME(SentenceModel model, Factory factory) { this.model = model.getMaxentModel(); // if the model has custom EOS characters set, use this to get the context // generator and the EOS scanner; otherwise use language-specific defaults char[] customEOSCharacters = model.getEosCharacters(); if (customEOSCharacters == null) { cgen = factory.createSentenceContextGenerator(model.getLanguage(), getAbbreviations(model.getAbbreviations())); scanner = factory.createEndOfSentenceScanner(model.getLanguage()); } else { cgen = factory.createSentenceContextGenerator( getAbbreviations(model.getAbbreviations()), customEOSCharacters); scanner = factory.createEndOfSentenceScanner(customEOSCharacters); } useTokenEnd = model.useTokenEnd(); }
/** * @deprecated Use a {@link SentenceDetectorFactory} to extend * SentenceDetector functionality. */ public SentenceDetectorME(SentenceModel model, Factory factory) { this.model = model.getMaxentModel(); // if the model has custom EOS characters set, use this to get the context // generator and the EOS scanner; otherwise use language-specific defaults char[] customEOSCharacters = model.getEosCharacters(); if (customEOSCharacters == null) { cgen = factory.createSentenceContextGenerator(model.getLanguage(), getAbbreviations(model.getAbbreviations())); scanner = factory.createEndOfSentenceScanner(model.getLanguage()); } else { cgen = factory.createSentenceContextGenerator( getAbbreviations(model.getAbbreviations()), customEOSCharacters); scanner = factory.createEndOfSentenceScanner(customEOSCharacters); } useTokenEnd = model.useTokenEnd(); }