public static void assignCoarseValue(POS pos) { if (pos == null) { return; } String shortName = pos.getType().getShortName(); if (!StringUtils.equals(pos.getType().getName(), POS.class.getName())) { if (!shortName.startsWith(POS_TYPE_PREFIX)) { throw new IllegalArgumentException("The type " + shortName + "of the given POS annotation does not fulfill the convention of starting with prefix '" + POS_TYPE_PREFIX + "'"); } pos.setCoarseValue(shortName.substring(POS_TYPE_PREFIX.length()).intern()); } } }
private static Boolean containsOnlyNonContentPOSes(Token[] tokenArr) throws AlignmentComponentException { logger.debug("checking non content POSes only or not: "); String logline=""; Boolean nonContentPOSesOnly = true; for(Token t : tokenArr) { POS p = t.getPos(); if (p == null) { throw new AlignmentComponentException("Unable to Process this CAS: There is one (or more) token without POS annotation. The process requires POS and Lemma annotated."); } String s = p.getType().toString(); String typeString = s.substring(s.lastIndexOf(".") + 1); logline += t.getCoveredText() + "/" + typeString + ", "; if (!(isNonContentPos.containsKey(typeString)) ) { nonContentPOSesOnly = false; // break; // no need to continue. } } logger.debug(logline + " => " + nonContentPOSesOnly.toString()); return nonContentPOSesOnly; }
/** * This utility checks if the token is one of non-content token type. * (e.g. "PUNC", "PP", "O", "CONJ", "ART"). Actual definition of non content POSes * are borrowed from a static definition set in IdenticalLemmaPhraseLinker. * * @param t The token to be checked. * @return */ private boolean isNonContentToken(Token t) throws ScoringComponentException { POS p = t.getPos(); if (p == null) { throw new ScoringComponentException("The module requires POS annotated for the Tokens, to check non-content words"); } String s = p.getType().toString(); String typeString = s.substring(s.lastIndexOf(".") + 1); //String logline = t.getCoveredText() + "/" + typeString + ", "; Boolean result = IdenticalLemmaPhraseLinker.isNonContentPos.containsKey(typeString); logger.debug(t.getCoveredText() + "/" + typeString + ": isNonContentToken: " + result); return result; }
String s = p.getType().toString(); String typeString = s.substring(s.lastIndexOf(".") + 1); lemPos += "/" + typeString;
source.get(i-1).getPos().getType().getName().equals(target.get(j-1).getPos().getType().getName()) && getRulesFromResource(getTokenBaseForm(source.get(i-1)), new ByCanonicalPartOfSpeech(source.get(i-1).getPos().getType().getShortName()), getTokenBaseForm(target.get(j-1)), new ByCanonicalPartOfSpeech(target.get(j-1).getPos().getType().getShortName())))
actualMapped.add(posAnnotation.getType().getShortName());
actualMapped.add(posAnnotation.getType().getShortName());
int posCount = countAnnotation(aJCas, pos.getType());
if ((token.getPos().getType().getShortName().equals("V") || token.getPos().getType().getShortName().matches("N.*") || token.getPos().getType().getShortName().equals("ADJ")) && !auxiliariesAndModals.contains(token.getLemma().getValue())) { mfs = getMostFrequentSense(uby.getLexicalEntries(token.getLemma().getValue(), null, null)); String syntacticBehaviour = getSyntacticBehaviour(token.getPos().getType().getShortName(),uby.getLexicalEntries(token.getLemma().getValue(), EPartOfSpeech.verb, null)); List<SemanticField> semanticFieldAnnotations = JCasUtil.selectCovering(jcas, + token.getPos().getType().getShortName() + "\n" + "\t syntax: " +syntacticBehaviour + "\n" writeTokenAndSemanticField(token.getCoveredText() + "\t" + token.getLemma().getValue() + "\t" + token.getPos().getType().getShortName() + "\n" );
if (pos != null && !pos.getType().equals(type)) {