/** * @param aView * @return */ private String getLemmasAsStringSequence(JCas aView) { String result=""; // get all Lemmas Collection<Lemma> lemmas = JCasUtil.select(aView, Lemma.class); for(Lemma cur : lemmas ) { result += cur.getValue() + " "; } return result; }
/** * @return the lemma value if there is a {@link Lemma} annotation linked to this token. */ public String getLemmaValue() { Lemma lemma = getLemma(); return lemma != null ? lemma.getValue() : null; }
/** * * @param view * the view for which the type-token-ratio is computed * @return type-token-ratio */ private double getTypeTokenRatio(JCas view) { Set<String> types = new HashSet<String>(); for (Lemma lemma : JCasUtil.select(view, Lemma.class)) { types.add(lemma.getValue()); } return types.size() / (double) JCasUtil.select(view, Lemma.class).size(); } }
/** * @return the lemma value if there is a {@link Lemma} annotation linked to this token. */ public String getLemmaValue() { Lemma lemma = getLemma(); return lemma != null ? lemma.getValue() : null; }
@Override public String getTokenBaseForm(Token token) { return token.getLemma().getValue(); }
@Override public String getTokenBaseForm(Token token) { return token.getLemma().getValue(); }
private static String getChunkString(JCas jcas, List<? extends Chunk> chunkList) { String chunkString = ""; if (chunkList.size() > 0) { Chunk chunk = chunkList.get(0); // get rightmost lemma in chunk List<Lemma> lemmas = JCasUtil.selectCovered(jcas, Lemma.class, chunk); Set<String> lemmaStrings = new HashSet<String>(); for (Lemma lemma : lemmas) { lemmaStrings.add(lemma.getValue()); } chunkString = StringUtils.join(lemmaStrings, "_"); // if (lemmas.size() > 0) { // chunkString = lemmas.get(lemmas.size()-1).getCoveredText(); // } } return chunkString; }
@Override public String getSemanticTag(Token token) throws ResourceAccessException { try { if (keySemanticTagMap.containsKey(token.getLemma().getValue())) { return keySemanticTagMap.get(token.getLemma().getValue()); } else { return "UNKNOWN"; } } catch (Exception e) { throw new ResourceAccessException(e); } }
@Override public String getSemanticTag(Token token) throws ResourceAccessException { try { if (keySemanticTagMap.containsKey(token.getLemma().getValue())) { return keySemanticTagMap.get(token.getLemma().getValue()); } else { return "UNKNOWN"; } } catch (Exception e) { throw new ResourceAccessException(e); } }
private static String getChunkString(JCas jcas, List<? extends Chunk> chunkList) { String chunkString = ""; if (chunkList.size() > 0) { Chunk chunk = chunkList.get(0); // get rightmost lemma in chunk List<Lemma> lemmas = JCasUtil.selectCovered(jcas, Lemma.class, chunk); Set<String> lemmaStrings = new HashSet<String>(); for (Lemma lemma : lemmas) { lemmaStrings.add(lemma.getValue()); } chunkString = StringUtils.join(lemmaStrings, "_"); // if (lemmas.size() > 0) { // chunkString = lemmas.get(lemmas.size()-1).getCoveredText(); // } } return chunkString; }
@Override public String getSemanticTag(List<Token> tokens) throws ResourceAccessException { List<String> lemmas = new ArrayList<String>(); for (Token token : tokens) { lemmas.add(token.getLemma().getValue()); } String lemmaString = StringUtils.join(lemmas, " "); try { if (keySemanticTagMap.containsKey(lemmaString)) { return keySemanticTagMap.get(lemmaString); } else { return "UNKNOWN"; } } catch (Exception e) { throw new ResourceAccessException(e); } }
@Override public String getSemanticTag(List<Token> tokens) throws ResourceAccessException { List<String> lemmas = new ArrayList<String>(); for (Token token : tokens) { lemmas.add(token.getLemma().getValue()); } String lemmaString = StringUtils.join(lemmas, " "); try { if (keySemanticTagMap.containsKey(lemmaString)) { return keySemanticTagMap.get(lemmaString); } else { return "UNKNOWN"; } } catch (Exception e) { throw new ResourceAccessException(e); } }
/** * Produce token lemma, return the original string if the lemma is null; * converts the string to lower case. * * @param tok * @return */ public String getTokenLemma(Token tok) { Lemma l = tok.getLemma(); // For some weird reason, Clear NLP lemma is sometimes NULL return (l!=null) ? l.getValue() : tok.getCoveredText().toLowerCase(); }
public static void assertLemma(String[] aExpected, Collection<Lemma> aActual) { if (aExpected == null) { return; } List<String> expected = asList(aExpected); List<String> actual = new ArrayList<String>(); for (Lemma a : aActual) { actual.add(a.getValue()); } System.out.printf("%-20s - Expected: %s%n", "Lemmas", asCopyableString(expected)); System.out.printf("%-20s - Actual : %s%n", "Lemmas", asCopyableString(actual)); assertEquals(asCopyableString(expected, true), asCopyableString(actual, true)); }
public static void assertLemma(String[] aExpected, Collection<Lemma> aActual) { if (aExpected == null) { return; } List<String> expected = asList(aExpected); List<String> actual = new ArrayList<String>(); for (Lemma a : aActual) { actual.add(a.getValue()); } System.out.printf("%-20s - Expected: %s%n", "Lemmas", asCopyableString(expected)); System.out.printf("%-20s - Actual : %s%n", "Lemmas", asCopyableString(actual)); assertEquals(asCopyableString(expected, true), asCopyableString(actual, true)); }
protected Map<String, Integer> countTokenPoses(JCas text) { Map<String, Integer> tokenNumMap = new HashMap<String, Integer>(); Iterator<Annotation> tokenIter = text.getAnnotationIndex(Token.type) .iterator(); while (tokenIter.hasNext()) { Token curr = (Token) tokenIter.next(); String tokenText = curr.getLemma().getValue().replace("#", "\\#") + " ### " + curr.getPos().getPosValue(); Integer num = tokenNumMap.get(tokenText); if (null == num) { tokenNumMap.put(tokenText, 1); } else { tokenNumMap.put(tokenText, num + 1); } } return tokenNumMap; }
public List<String> getSubstitutions(JCas jcas, Annotation coveringAnnotation) { List<String> tokens = new ArrayList<String>(); List<String> postags = new ArrayList<String>();; for (Token t : JCasUtil.selectCovered(jcas, Token.class, coveringAnnotation)) { try { tokens.add(t.getLemma().getValue().toLowerCase()); postags.add(t.getPos().getPosValue()); } catch (NullPointerException e) { System.err.println("Couldn't read lemma value for token \"" + t.getCoveredText() + "\""); } } return getSubstitutions(tokens, postags); }
protected Map<String, String> indexLemmaDepTree(JCas text) { Map<String, String> depTree = new HashMap<String, String>(); for (Dependency dep : JCasUtil.select(text, Dependency.class)) { Token child = dep.getDependent(); Token parent = dep.getGovernor(); depTree.put(child.getBegin() + " ### " + child.getLemma().getValue().replace("#", "\\#") + " ### " + child.getPos().getPosValue(), dep.getDependencyType() + " ## " + parent.getBegin() + " ### " + parent.getLemma().getValue().replace("#", "\\#") + " ### " + parent.getPos().getPosValue()); } return depTree; } }
private static NodeInfo buildNodeInfo(JCas jcas, Token tokenAnno, int serial) throws CasTreeConverterException, UnsupportedPosTagStringException { String word = tokenAnno.getCoveredText(); String lemma = tokenAnno.getLemma().getValue(); String pos = tokenAnno.getPos().getPosValue(); // We rely on the fact the NamedEntity enum values have the same names as the ones // specified in the DKPro mapping (e.g. PERSON, ORGANIZATION) eu.excitementproject.eop.common.representation.parse.representation.basic.NamedEntity namedEntity=null; List<NamedEntity> namedEntities = JCasUtil.selectCovered(NamedEntity.class, tokenAnno); switch (namedEntities.size()) { case 0: break; // if no NER - ignore and move on case 1: namedEntity = eu.excitementproject.eop.common.representation.parse.representation.basic.NamedEntity.valueOf(namedEntities.get(0).getValue()); break; default: throw new CasTreeConverterException(String.format("Got %d NamedEntity annotations for token %s", namedEntities.size(), tokenAnno)); } return new DefaultNodeInfo(word, lemma, serial, namedEntity, new DefaultSyntacticInfo(new PennPartOfSpeech(pos))); }
public static CoreLabel tokenToWord(Token aToken) { CoreLabel t = new CoreLabel(); t.setOriginalText(aToken.getCoveredText()); t.setWord(aToken.getText()); t.setBeginPosition(aToken.getBegin()); t.setEndPosition(aToken.getEnd()); if (aToken.getLemma() != null) { t.setLemma(aToken.getLemma().getValue()); } else { t.setLemma(aToken.getText()); } if (aToken.getPos() != null) { t.setTag(aToken.getPos().getPosValue()); } return t; }