/** * Annotates the CAS and checks if it's good quality. * * @param jcas An input CAS that will be annotated. * @param minTokQty The minimum number of tokens present to be considered good. * @return true if the CAS contains a high-quality text. * @throws AnalysisEngineProcessException */ public static boolean checkCAS(JCas jcas, int minTokQty) throws AnalysisEngineProcessException { boolean hasNoun = false, hasVerb = false; for (POS p: JCasUtil.select(jcas, POS.class)) { if (p.getPosValue().startsWith("NN")) hasNoun = true; if (p.getPosValue().startsWith("VB")) hasVerb = true; } Collection<Token> toks = JCasUtil.select(jcas, Token.class); return toks.size() >= minTokQty && hasNoun && hasVerb; } }
POS outPos = new POS(aJCas, outToken.getBegin(), outToken.getEnd()); outPos.setPosValue(pos.get(i)); outPos.addToIndexes(); outToken.setPos(outPos);
if (pos != null && !pos.getType().equals(type)) { pos.addToIndexes(); pos.setBegin(aToken.getBegin()); pos.setEnd(aToken.getEnd()); pos.setPosValue(posValue); pos.setCoarseValue(cposValue); aToken.setPos(pos);
public static void assignCoarseValue(POS pos) { if (pos == null) { return; } String shortName = pos.getType().getShortName(); if (!StringUtils.equals(pos.getType().getName(), POS.class.getName())) { if (!shortName.startsWith(POS_TYPE_PREFIX)) { throw new IllegalArgumentException("The type " + shortName + "of the given POS annotation does not fulfill the convention of starting with prefix '" + POS_TYPE_PREFIX + "'"); } pos.setCoarseValue(shortName.substring(POS_TYPE_PREFIX.length()).intern()); } } }
private POS createPartOfSpeechAnnotationFromOutcome(JCas aJCas, int begin, int end, String aOutcome) { Type posTag = mappingProvider.getTagType(aOutcome); POS posAnno = (POS) aJCas.getCas().createAnnotation(posTag, begin, end); posAnno.setPosValue(aOutcome); posAnno.addToIndexes(); return posAnno; }
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { try { for (POS pos : JCasUtil.select(aJCas, POS.class)) { String text = pos.getCoveredText().toLowerCase(); String posTag = pos.getPosValue(); String wnss = ""; try { wnss = mWordNet.getLexName(text, posTag); } catch (IllegalArgumentException e) { logger.warn("Failed to process word '" + text + "' tag: '" + posTag); } if (!wnss.isEmpty()) { WNNS annot = new WNNS(aJCas, pos.getBegin(), pos.getEnd()); annot.setSuperSense(wnss); annot.addToIndexes(); //System.out.println(annot.getSuperSense() + " " + annot.getBegin() + ":" + annot.getEnd()); } } } catch (Exception e) { throw new AnalysisEngineProcessException(e); } } }
private void convertPos(JCas aJCas, TextCorpus aCorpusData, Map<String, Token> aTokens) { if (aCorpusData.getPosTagsLayer() == null) { return; } for (int i = 0; i < aCorpusData.getPosTagsLayer().size(); i++) { eu.clarin.weblicht.wlfxb.tc.api.Token[] posTokens = aCorpusData.getPosTagsLayer() .getTokens(aCorpusData.getPosTagsLayer().getTag(i)); String value = aCorpusData.getPosTagsLayer().getTag(i).getString(); POS outPos = new POS(aJCas); outPos.setBegin(aTokens.get(posTokens[0].getID()).getBegin()); outPos.setEnd(aTokens.get(posTokens[0].getID()).getEnd()); outPos.setPosValue(value); outPos.addToIndexes(); // Set the POS to the token aTokens.get(posTokens[0].getID()).setPos(outPos); } }
for (POS pos : JCasUtil.selectCovered(getJCas(), POS.class, token.getBegin(), token.getEnd())) { if (pos.getBegin() == token.getBegin() && pos.getEnd() == token.getEnd()) { if (pos.getPosValue().equals(this.posTag)) { duplicate = true; break; POS pos = new POS(getJCas(), token.getBegin(), token.getEnd()); pos.setPosValue(this.posTag); pos.addToIndexes(); token.setPos(pos); token.addToIndexes();
String newTag = posMap.getProperty(oldPos.getPosValue()); int begin = oldPos.getBegin(); int end = oldPos.getEnd(); newPos.setPosValue(newTag); POSUtils.assignCoarseValue(newPos); oldPos.removeFromIndexes(); newPos.addToIndexes(); t.setPos(newPos); String newTag = posMap.getProperty(pos.getPosValue()); if (newTag != null) { pos.setPosValue(newTag);
@Test public void thatRelationAttachmentBehaviorOnCreateWorks() throws Exception { TokenBuilder<Token, Sentence> builder = new TokenBuilder<>(Token.class, Sentence.class); builder.buildTokens(jcas, "This is a test ."); for (Token t : select(jcas, Token.class)) { POS pos = new POS(jcas, t.getBegin(), t.getEnd()); t.setPos(pos); pos.addToIndexes(); } RelationAdapter sut = new RelationAdapter(featureSupportRegistry, null, depLayer, FEAT_REL_TARGET, FEAT_REL_SOURCE, asList(dependencyLayerGovernor, dependencyLayerDependent), behaviors); List<POS> posAnnotations = new ArrayList<>(select(jcas, POS.class)); List<Token> tokens = new ArrayList<>(select(jcas, Token.class)); POS source = posAnnotations.get(0); POS target = posAnnotations.get(1); AnnotationFS dep = sut.add(document, username, source, target, jcas, 0, jcas.getDocumentText().length()); assertThat(FSUtil.getFeature(dep, FEAT_REL_SOURCE, Token.class)).isEqualTo(tokens.get(0)); assertThat(FSUtil.getFeature(dep, FEAT_REL_TARGET, Token.class)).isEqualTo(tokens.get(1)); }
actualOriginal.add(posAnnotation.getPosValue()); actualMapped.add(posAnnotation.getType().getShortName());
} else { if (type.matches(txpAnn.pos)) { ((POS) a).setPosValue(annotation.get(position));
private void copyAnnotations(JCas jCasSrc, JCas jCasDst) { CasCopier copier = new CasCopier(jCasSrc.getCas(), jCasDst.getCas()); for (Sentence anSrc : JCasUtil.select(jCasSrc, Sentence.class)) { Sentence anDst = (Sentence) copier.copyFs(anSrc); anDst.addToIndexes(); } for (Token anSrc : JCasUtil.select(jCasSrc, Token.class)) { Token anDst = (Token) copier.copyFs(anSrc); anDst.addToIndexes(); } for (POS anSrc : JCasUtil.select(jCasSrc, POS.class)) { POS anDst = (POS) copier.copyFs(anSrc); anDst.addToIndexes(); } } }
Type posTag = posMappingProvider.getTagType(tt.tag()); POS posAnno = (POS) cas.createAnnotation(posTag, t.getBegin(), t.getEnd()); posAnno.setStringValue(posTag.getFeatureByBaseName("PosValue"), tt.tag() != null ? tt.tag().intern() : null); posAnno.addToIndexes(); t.setPos(posAnno); i++;
private static Boolean containsOnlyNonContentPOSes(Token[] tokenArr) throws AlignmentComponentException { logger.debug("checking non content POSes only or not: "); String logline=""; Boolean nonContentPOSesOnly = true; for(Token t : tokenArr) { POS p = t.getPos(); if (p == null) { throw new AlignmentComponentException("Unable to Process this CAS: There is one (or more) token without POS annotation. The process requires POS and Lemma annotated."); } String s = p.getType().toString(); String typeString = s.substring(s.lastIndexOf(".") + 1); logline += t.getCoveredText() + "/" + typeString + ", "; if (!(isNonContentPos.containsKey(typeString)) ) { nonContentPOSesOnly = false; // break; // no need to continue. } } logger.debug(logline + " => " + nonContentPOSesOnly.toString()); return nonContentPOSesOnly; }
private void convertPos(JCas aJCas, TextCorpus aCorpusData, Map<String, Token> aTokens) { if (aCorpusData.getPosTagsLayer() == null) { return; } for (int i = 0; i < aCorpusData.getPosTagsLayer().size(); i++) { eu.clarin.weblicht.wlfxb.tc.api.Token[] posTokens = aCorpusData.getPosTagsLayer() .getTokens(aCorpusData.getPosTagsLayer().getTag(i)); String value = aCorpusData.getPosTagsLayer().getTag(i).getString(); POS outPos = new POS(aJCas); outPos.setBegin(aTokens.get(posTokens[0].getID()).getBegin()); outPos.setEnd(aTokens.get(posTokens[0].getID()).getEnd()); outPos.setPosValue(value); outPos.addToIndexes(); // Set the POS to the token aTokens.get(posTokens[0].getID()).setPos(outPos); } }
token.getEnd())) { if (pos.getBegin() == token.getBegin() && pos.getEnd() == token.getEnd()) { if (pos.getPosValue().equals(this.posTag)) { duplicate = true; break; POS pos = new POS(getJCas(), token.getBegin(), token.getEnd()); pos.setPosValue(this.posTag); pos.addToIndexes(); token.setPos(pos); token.addToIndexes();
private void annotateTokenWithTag(JCas aJCas, String aToken, String aTag, int aCurrPosInText) { if (readToken) { // Token Token token = new Token(aJCas, aCurrPosInText, aToken.length() + aCurrPosInText); token.addToIndexes(); if (readPOS) { // Tag Type posTag = posMappingProvider.getTagType(aTag); POS pos = (POS) aJCas.getCas().createAnnotation(posTag, token.getBegin(), token.getEnd()); pos.setPosValue(aTag); POSUtils.assignCoarseValue(pos); pos.addToIndexes(); // Set the POS for the Token token.setPos(pos); } } } }
@Test public void thatRelationStackingBehaviorOnCreateDoesNotThrowException() throws Exception { TokenBuilder<Token, Sentence> builder = new TokenBuilder<>(Token.class, Sentence.class); builder.buildTokens(jcas, "This is a test .\nThis is sentence two ."); for (Token t : select(jcas, Token.class)) { POS pos = new POS(jcas, t.getBegin(), t.getEnd()); t.setPos(pos); pos.addToIndexes(); } RelationAdapter sut = new RelationAdapter(featureSupportRegistry, null, depLayer, FEAT_REL_TARGET, FEAT_REL_SOURCE, asList(dependencyLayerGovernor, dependencyLayerDependent), behaviors); List<POS> posAnnotations = new ArrayList<>(select(jcas, POS.class)); List<Token> tokens = new ArrayList<>(select(jcas, Token.class)); POS source = posAnnotations.get(0); POS target = posAnnotations.get(1); depLayer.setAllowStacking(true); AnnotationFS dep1 = sut.add(document, username, source, target, jcas, 0, jcas.getDocumentText().length()); AnnotationFS dep2 = sut.add(document, username, source, target, jcas, 0, jcas.getDocumentText().length()); assertThat(FSUtil.getFeature(dep1, FEAT_REL_SOURCE, Token.class)).isEqualTo(tokens.get(0)); assertThat(FSUtil.getFeature(dep1, FEAT_REL_TARGET, Token.class)).isEqualTo(tokens.get(1)); assertThat(FSUtil.getFeature(dep2, FEAT_REL_SOURCE, Token.class)).isEqualTo(tokens.get(0)); assertThat(FSUtil.getFeature(dep2, FEAT_REL_TARGET, Token.class)).isEqualTo(tokens.get(1)); }