/** * 构造 * * @param mode 模式{@link SegMode} */ public MmsegEngine(SegMode mode) { final Dictionary dict = Dictionary.getInstance(); final ComplexSeg seg = new ComplexSeg(dict); this.mmSeg = new MMSeg(new StringReader(""), seg); }
maxMatch(cns, 0, chs, offsets[0], tailLens, 0); for(int aIdx=tailLens[0].size()-1; aIdx>=0; aIdx--) { maxMatch(cns, 1, chs, offsets[1], tailLens, 1); for(int bIdx=tailLens[1].size()-1; bIdx>=0; bIdx--) { tailLen[2] = maxMatch(cns, 2, chs, offsets[2]); if(sumChunkLen >= maxLen) { ck = createChunk(sen, chs, tailLen, offsets, cns); mmr.addChunk(ck); ck = createChunk(sen, chs, tailLen, offsets, cns); mmr.addChunk(ck); if(showChunk) { System.out.println("-------filter before "+rule+"----------"); printChunk(chunks); printChunk(chunks);
public Chunk seg(Sentence sen) { Chunk chunk = super.seg(sen); if(chunk != null) { List<Word> cks = new ArrayList<Word>();
maxMatch(cns, 0, chs, offsets[0], tailLens, 0); for(int aIdx=tailLens[0].size()-1; aIdx>=0; aIdx--) { maxMatch(cns, 1, chs, offsets[1], tailLens, 1); for(int bIdx=tailLens[1].size()-1; bIdx>=0; bIdx--) { tailLen[2] = maxMatch(cns, 2, chs, offsets[2]); if(sumChunkLen >= maxLen) { ck = createChunk(sen, chs, tailLen, offsets, cns); mmr.addChunk(ck); ck = createChunk(sen, chs, tailLen, offsets, cns); mmr.addChunk(ck); if(showChunk) { System.out.println("-------filter before "+rule+"----------"); printChunk(chunks); printChunk(chunks);
public Chunk seg(Sentence sen) { Chunk chunk = super.seg(sen); if(chunk != null) { List<Word> cks = new ArrayList<Word>();
/** * 构造 * * @param mode 模式{@link SegMode} */ public MmsegEngine(SegMode mode) { final Dictionary dict = Dictionary.getInstance(); final ComplexSeg seg = new ComplexSeg(dict); this.mmSeg = new MMSeg(new StringReader(""), seg); }
maxMatch(cns, 0, chs, offsets[0], tailLens, 0); for(int aIdx=tailLens[0].size()-1; aIdx>=0; aIdx--) { maxMatch(cns, 1, chs, offsets[1], tailLens, 1); for(int bIdx=tailLens[1].size()-1; bIdx>=0; bIdx--) { tailLen[2] = maxMatch(cns, 2, chs, offsets[2]); if(sumChunkLen >= maxLen) { ck = createChunk(sen, chs, tailLen, offsets, cns); mmr.addChunk(ck); ck = createChunk(sen, chs, tailLen, offsets, cns); mmr.addChunk(ck); if(showChunk) { LOGGER.info("-------filter before "+rule+"----------"); printChunk(chunks); printChunk(chunks);
public Chunk seg(Sentence sen) { Chunk chunk = super.seg(sen); if(chunk != null) { List<Word> cks = new ArrayList<Word>();
public MMSeg4JAnalyzer(String mode) { if (!SUPPORT_MODES.contains(mode)) { throw new ConfigException( "Unsupported segment mode '%s' for mmseg4j analyzer, " + "the available values are %s", mode, SUPPORT_MODES); } int index = SUPPORT_MODES.indexOf(mode); switch (index) { case 0: this.seg = new SimpleSeg(DIC); break; case 1: this.seg = new ComplexSeg(DIC); break; case 2: this.seg = new MaxWordSeg(DIC); break; default: throw new AssertionError(String.format( "Unsupported segment mode '%s'", this.seg)); } }
protected Seg newSeg() { return new ComplexSeg(dic); } }
protected Seg newSeg() { return new ComplexSeg(dic); } }
protected Seg newSeg() { return new ComplexSeg(dic); } }
protected Seg getSeg() { return new ComplexSeg(dic); }
protected Seg getSeg() { return new ComplexSeg(dic); }
/** * 构造 * * @param mode 模式{@link SegMode} */ public MmsegEngine(SegMode mode) { final Dictionary dict = Dictionary.getInstance(); final ComplexSeg seg = new ComplexSeg(dict); this.mmSeg = new MMSeg(new StringReader(""), seg); }
private Seg newSeg(Map<String, String> args) { Seg seg = null; logger.info("create new Seg ..."); //default max-word String mode = args.get("mode"); if("simple".equals(mode)) { logger.info("use simple mode"); seg = new SimpleSeg(dic); } else if("complex".equals(mode)) { logger.info("use complex mode"); seg = new ComplexSeg(dic); } else { logger.info("use max-word mode"); seg = new MaxWordSeg(dic); } return seg; }
private Seg newSeg(Map<String, String> args) { Seg seg = null; logger.info("create new Seg ..."); //default max-word String mode = args.get("mode"); if("simple".equals(mode)) { logger.info("use simple mode"); seg = new SimpleSeg(dic); } else if("complex".equals(mode)) { logger.info("use complex mode"); seg = new ComplexSeg(dic); } else { logger.info("use max-word mode"); seg = new MaxWordSeg(dic); } return seg; }
public static TokenizerFactory getComplex(IndexSettings indexSettings, Environment environment, String s, Settings settings) { return new MMsegTokenizerFactory(indexSettings,environment,s,settings,new ComplexSeg(Dictionary.getInstance(environment.configFile()))); }
public MMSeg4JAnalyzer(String mode) { if (!SUPPORT_MODES.contains(mode)) { throw new ConfigException( "Unsupported segment mode '%s' for mmseg4j analyzer, " + "the available values are %s", mode, SUPPORT_MODES); } int index = SUPPORT_MODES.indexOf(mode); switch (index) { case 0: this.seg = new SimpleSeg(DIC); break; case 1: this.seg = new ComplexSeg(DIC); break; case 2: this.seg = new MaxWordSeg(DIC); break; default: throw new AssertionError(String.format( "Unsupported segment mode '%s'", this.seg)); } }
public MmsegTextAnalyzer(int type, String dictionaryPath, Map<String, ? extends Object> keywordDefinitions, Map<Integer, ? extends Object> lengthDefinitions){ super(dictionaryPath, keywordDefinitions, lengthDefinitions); if (dictionaryPath == null){ dict = com.chenlb.mmseg4j.Dictionary.getInstance(); }else{ dict = com.chenlb.mmseg4j.Dictionary.getInstance(dictionaryPath); } switch (type){ case TYPE_MMSEG_SIMPLE: seg = new SimpleSeg(dict); break; case TYPE_MMSEG_COMPLEX: seg = new ComplexSeg(dict); break; case TYPE_MMSEG_MAXWORD: seg = new MaxWordSeg(dict); break; default: throw new IllegalArgumentException("Supported types are: TYPE_MMSEG_SIMPLE, TYPE_MMSEG_COMPLEX, TYPE_MMSEG_MAXWORD"); } mmSeg = new MMSeg(new StringReader(""), seg); kwMatcher = keywordDefinitions == null ? null : new KeywordMatcher(keywordDefinitions); }