@Override public String getLemma(JCas jCas, Token token) { return token.getLemma(); }
public UimaTokenizer(String tokens,UimaResource resource,boolean checkForLabel) { this.checkForLabel = checkForLabel; this.tokens = new ArrayList<>(); try { CAS cas = resource.process(tokens); Collection<Token> tokenList = JCasUtil.select(cas.getJCas(), Token.class); for(Token t : tokenList) { if(!checkForLabel || valid(t.getCoveredText())) if(t.getLemma() != null) this.tokens.add(t.getLemma()); else if(t.getStem() != null) this.tokens.add(t.getStem()); else this.tokens.add(t.getCoveredText()); } resource.release(cas); } catch (Exception e) { e.printStackTrace(); throw new RuntimeException(e); } }
public UimaTokenizer(String tokens, UimaResource resource, boolean checkForLabel) { this.checkForLabel = checkForLabel; this.tokens = new ArrayList<>(); try { CAS cas = resource.process(tokens); Collection<Token> tokenList = JCasUtil.select(cas.getJCas(), Token.class); for (Token t : tokenList) { if (!checkForLabel || valid(t.getCoveredText())) if (t.getLemma() != null) this.tokens.add(t.getLemma()); else if (t.getStem() != null) this.tokens.add(t.getStem()); else this.tokens.add(t.getCoveredText()); } resource.release(cas); } catch (Exception e) { e.printStackTrace(); throw new RuntimeException(e); } }
public UimaTokenizer(String tokens,UimaResource resource,boolean checkForLabel) { this.checkForLabel = checkForLabel; this.tokens = new ArrayList<>(); try { CAS cas = resource.process(tokens); Collection<Token> tokenList = JCasUtil.select(cas.getJCas(), Token.class); for(Token t : tokenList) { if(!checkForLabel || valid(t.getCoveredText())) if(t.getLemma() != null) this.tokens.add(t.getLemma()); else if(t.getStem() != null) this.tokens.add(t.getStem()); else this.tokens.add(t.getCoveredText()); } resource.release(cas); } catch (Exception e) { e.printStackTrace(); throw new RuntimeException(e); } }
public PosUimaTokenizer(String tokens,AnalysisEngine engine,Collection<String> allowedPosTags) { if(engine == null) PosUimaTokenizer.engine = engine; this.allowedPosTags = allowedPosTags; this.tokens = new ArrayList<>(); try { if(cas == null) cas = engine.newCAS(); cas.reset(); cas.setDocumentText(tokens); PosUimaTokenizer.engine.process(cas); for(Sentence s : JCasUtil.select(cas.getJCas(), Sentence.class)) { for(Token t : JCasUtil.selectCovered(Token.class,s)) { //add NONE for each invalid token if(valid(t)) if(t.getLemma() != null) this.tokens.add(t.getLemma()); else if(t.getStem() != null) this.tokens.add(t.getStem()); else this.tokens.add(t.getCoveredText()); else this.tokens.add("NONE"); } } } catch (Exception e) { throw new RuntimeException(e); } }
public PosUimaTokenizer(String tokens, AnalysisEngine engine, Collection<String> allowedPosTags) { if (engine == null) PosUimaTokenizer.engine = engine; this.allowedPosTags = allowedPosTags; this.tokens = new ArrayList<>(); try { if (cas == null) cas = engine.newCAS(); cas.reset(); cas.setDocumentText(tokens); PosUimaTokenizer.engine.process(cas); for (Sentence s : JCasUtil.select(cas.getJCas(), Sentence.class)) { for (Token t : JCasUtil.selectCovered(Token.class, s)) { //add NONE for each invalid token if (valid(t)) if (t.getLemma() != null) this.tokens.add(t.getLemma()); else if (t.getStem() != null) this.tokens.add(t.getStem()); else this.tokens.add(t.getCoveredText()); else this.tokens.add("NONE"); } } } catch (Exception e) { throw new RuntimeException(e); } }
public PosUimaTokenizer(String tokens,AnalysisEngine engine,Collection<String> allowedPosTags) { if(engine == null) PosUimaTokenizer.engine = engine; this.allowedPosTags = allowedPosTags; this.tokens = new ArrayList<>(); try { if(cas == null) cas = engine.newCAS(); cas.reset(); cas.setDocumentText(tokens); PosUimaTokenizer.engine.process(cas); for(Sentence s : JCasUtil.select(cas.getJCas(), Sentence.class)) { for(Token t : JCasUtil.selectCovered(Token.class,s)) { //add NONE for each invalid token if(valid(t)) if(t.getLemma() != null) this.tokens.add(t.getLemma()); else if(t.getStem() != null) this.tokens.add(t.getStem()); else this.tokens.add(t.getCoveredText()); else this.tokens.add("NONE"); } } } catch (Exception e) { throw new RuntimeException(e); } }