private POS createPartOfSpeechAnnotationFromOutcome(JCas aJCas, int begin, int end, String aOutcome) { Type posTag = mappingProvider.getTagType(aOutcome); POS posAnno = (POS) aJCas.getCas().createAnnotation(posTag, begin, end); posAnno.setPosValue(aOutcome); posAnno.addToIndexes(); return posAnno; }
private void copyAnnotations(JCas jCasSrc, JCas jCasDst) { CasCopier copier = new CasCopier(jCasSrc.getCas(), jCasDst.getCas()); for (Sentence anSrc : JCasUtil.select(jCasSrc, Sentence.class)) { Sentence anDst = (Sentence) copier.copyFs(anSrc); anDst.addToIndexes(); } for (Token anSrc : JCasUtil.select(jCasSrc, Token.class)) { Token anDst = (Token) copier.copyFs(anSrc); anDst.addToIndexes(); } for (POS anSrc : JCasUtil.select(jCasSrc, POS.class)) { POS anDst = (POS) copier.copyFs(anSrc); anDst.addToIndexes(); } } }
@Test public void thatRelationAttachmentBehaviorOnCreateWorks() throws Exception { TokenBuilder<Token, Sentence> builder = new TokenBuilder<>(Token.class, Sentence.class); builder.buildTokens(jcas, "This is a test ."); for (Token t : select(jcas, Token.class)) { POS pos = new POS(jcas, t.getBegin(), t.getEnd()); t.setPos(pos); pos.addToIndexes(); } RelationAdapter sut = new RelationAdapter(featureSupportRegistry, null, depLayer, FEAT_REL_TARGET, FEAT_REL_SOURCE, asList(dependencyLayerGovernor, dependencyLayerDependent), behaviors); List<POS> posAnnotations = new ArrayList<>(select(jcas, POS.class)); List<Token> tokens = new ArrayList<>(select(jcas, Token.class)); POS source = posAnnotations.get(0); POS target = posAnnotations.get(1); AnnotationFS dep = sut.add(document, username, source, target, jcas, 0, jcas.getDocumentText().length()); assertThat(FSUtil.getFeature(dep, FEAT_REL_SOURCE, Token.class)).isEqualTo(tokens.get(0)); assertThat(FSUtil.getFeature(dep, FEAT_REL_TARGET, Token.class)).isEqualTo(tokens.get(1)); }
@Test public void thatRelationStackingBehaviorOnCreateDoesNotThrowException() throws Exception { TokenBuilder<Token, Sentence> builder = new TokenBuilder<>(Token.class, Sentence.class); builder.buildTokens(jcas, "This is a test .\nThis is sentence two ."); for (Token t : select(jcas, Token.class)) { POS pos = new POS(jcas, t.getBegin(), t.getEnd()); t.setPos(pos); pos.addToIndexes(); } RelationAdapter sut = new RelationAdapter(featureSupportRegistry, null, depLayer, FEAT_REL_TARGET, FEAT_REL_SOURCE, asList(dependencyLayerGovernor, dependencyLayerDependent), behaviors); List<POS> posAnnotations = new ArrayList<>(select(jcas, POS.class)); List<Token> tokens = new ArrayList<>(select(jcas, Token.class)); POS source = posAnnotations.get(0); POS target = posAnnotations.get(1); depLayer.setAllowStacking(true); AnnotationFS dep1 = sut.add(document, username, source, target, jcas, 0, jcas.getDocumentText().length()); AnnotationFS dep2 = sut.add(document, username, source, target, jcas, 0, jcas.getDocumentText().length()); assertThat(FSUtil.getFeature(dep1, FEAT_REL_SOURCE, Token.class)).isEqualTo(tokens.get(0)); assertThat(FSUtil.getFeature(dep1, FEAT_REL_TARGET, Token.class)).isEqualTo(tokens.get(1)); assertThat(FSUtil.getFeature(dep2, FEAT_REL_SOURCE, Token.class)).isEqualTo(tokens.get(0)); assertThat(FSUtil.getFeature(dep2, FEAT_REL_TARGET, Token.class)).isEqualTo(tokens.get(1)); }
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { CAS cas = aJCas.getCas(); dictModelProvider.configure(cas); posTaggingModelProvider.configure(cas); posMappingProvider.configure(cas); for (Sentence sentence : select(aJCas, Sentence.class)) { List<Token> tokens = selectCovered(aJCas, Token.class, sentence); List<String> tokenTexts = asList(toText(tokens).toArray(new String[tokens.size()])); DEPTree tree = NLPGetter.toDEPTree(tokenTexts); AbstractComponent tagger = posTaggingModelProvider.getResource(); tagger.process(tree); String[] posTags = tree.getPOSTags(); int i = 0; for (Token t : tokens) { String tag = posTags[i + 1]; Type posTag = posMappingProvider.getTagType(tag != null ? tag.intern() : null); POS posAnno = (POS) cas.createAnnotation(posTag, t.getBegin(), t.getEnd()); posAnno.setPosValue(tag); POSUtils.assignCoarseValue(posAnno); posAnno.addToIndexes(); t.setPos(posAnno); i++; } } }
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { modelProvider.configure(jCas.getCas()); mappingProvider.configure(jCas.getCas()); List<Token> tokens = selectCovered(jCas, Token.class, 0, jCas.getDocumentText().length()); List<TaggedToken> taggedTokens = tagTweetTokens(tokens, modelProvider.getResource()); for (TaggedToken taggedToken : taggedTokens) { Type posType = mappingProvider.getTagType(taggedToken.tag); POS pos = (POS) jCas.getCas().createAnnotation(posType, taggedToken.getBegin(), taggedToken.getEnd()); pos.setPosValue(taggedToken.tag.intern()); pos.addToIndexes(); taggedToken.token.setPos(pos); } }
private POS createPOS(JCas aJCas, PennTreeNode aPreterminal, int aBegin, int aEnd) { POS posAnno; if (posMappingProvider != null) { Type posTag = posMappingProvider.getTagType(aPreterminal.getLabel()); posAnno = (POS) aJCas.getCas().createAnnotation(posTag, aBegin, aEnd); } else { posAnno = new POS(aJCas, aBegin, aEnd); } posAnno.setPosValue( internTags && aPreterminal.getLabel() != null ? aPreterminal.getLabel().intern() : aPreterminal.getLabel()); POSUtils.assignCoarseValue(posAnno); posAnno.addToIndexes(); return posAnno; }
private void annotateTokenWithTag(JCas aJCas, String aToken, String aTag, int aCurrPosInText) { if (readToken) { // Token Token token = new Token(aJCas, aCurrPosInText, aToken.length() + aCurrPosInText); token.addToIndexes(); if (readPOS) { // Tag Type posTag = posMappingProvider.getTagType(aTag); POS pos = (POS) aJCas.getCas().createAnnotation(posTag, token.getBegin(), token.getEnd()); pos.setPosValue(aTag); POSUtils.assignCoarseValue(pos); pos.addToIndexes(); // Set the POS for the Token token.setPos(pos); } } } }
private void convertPos(JCas aJCas, TextCorpus aCorpusData, Map<String, Token> aTokens) { if (aCorpusData.getPosTagsLayer() == null) { return; } for (int i = 0; i < aCorpusData.getPosTagsLayer().size(); i++) { eu.clarin.weblicht.wlfxb.tc.api.Token[] posTokens = aCorpusData.getPosTagsLayer() .getTokens(aCorpusData.getPosTagsLayer().getTag(i)); String value = aCorpusData.getPosTagsLayer().getTag(i).getString(); POS outPos = new POS(aJCas); outPos.setBegin(aTokens.get(posTokens[0].getID()).getBegin()); outPos.setEnd(aTokens.get(posTokens[0].getID()).getEnd()); outPos.setPosValue(value); outPos.addToIndexes(); // Set the POS to the token aTokens.get(posTokens[0].getID()).setPos(outPos); } }
private void convertPos(JCas aJCas, TextCorpus aCorpusData, Map<String, Token> aTokens) { if (aCorpusData.getPosTagsLayer() == null) { return; } for (int i = 0; i < aCorpusData.getPosTagsLayer().size(); i++) { eu.clarin.weblicht.wlfxb.tc.api.Token[] posTokens = aCorpusData.getPosTagsLayer() .getTokens(aCorpusData.getPosTagsLayer().getTag(i)); String value = aCorpusData.getPosTagsLayer().getTag(i).getString(); POS outPos = new POS(aJCas); outPos.setBegin(aTokens.get(posTokens[0].getID()).getBegin()); outPos.setEnd(aTokens.get(posTokens[0].getID()).getEnd()); outPos.setPosValue(value); outPos.addToIndexes(); // Set the POS to the token aTokens.get(posTokens[0].getID()).setPos(outPos); } }
private void convertPos(JCas aJCas, TextCorpus aCorpusData, Map<String, Token> aTokens) { if (aCorpusData.getPosTagsLayer() == null) { return; } for (int i = 0; i < aCorpusData.getPosTagsLayer().size(); i++) { eu.clarin.weblicht.wlfxb.tc.api.Token[] posTokens = aCorpusData.getPosTagsLayer() .getTokens(aCorpusData.getPosTagsLayer().getTag(i)); String value = aCorpusData.getPosTagsLayer().getTag(i).getString(); POS outPos = new POS(aJCas); outPos.setBegin(aTokens.get(posTokens[0].getID()).getBegin()); outPos.setEnd(aTokens.get(posTokens[0].getID()).getEnd()); outPos.setPosValue(value); outPos.addToIndexes(); // Set the POS to the token aTokens.get(posTokens[0].getID()).setPos(outPos); } }
private void convertPos(JCas aJCas, TextCorpus aCorpusData, Map<String, Token> aTokens) { if (aCorpusData.getPosTagsLayer() == null) { return; } for (int i = 0; i < aCorpusData.getPosTagsLayer().size(); i++) { eu.clarin.weblicht.wlfxb.tc.api.Token[] posTokens = aCorpusData.getPosTagsLayer() .getTokens(aCorpusData.getPosTagsLayer().getTag(i)); String value = aCorpusData.getPosTagsLayer().getTag(i).getString(); POS outPos = new POS(aJCas); outPos.setBegin(aTokens.get(posTokens[0].getID()).getBegin()); outPos.setEnd(aTokens.get(posTokens[0].getID()).getEnd()); outPos.setPosValue(value); POSUtils.assignCoarseValue(outPos); outPos.addToIndexes(); // Set the POS to the token aTokens.get(posTokens[0].getID()).setPos(outPos); } }
@Test public void thatRelationCrossSentenceBehaviorOnCreateThrowsException() throws Exception { depLayer.setCrossSentence(false); TokenBuilder<Token, Sentence> builder = new TokenBuilder<>(Token.class, Sentence.class); builder.buildTokens(jcas, "This is a test .\nThis is sentence two ."); for (Token t : select(jcas, Token.class)) { POS pos = new POS(jcas, t.getBegin(), t.getEnd()); t.setPos(pos); pos.addToIndexes(); } RelationAdapter sut = new RelationAdapter(featureSupportRegistry, null, depLayer, FEAT_REL_TARGET, FEAT_REL_SOURCE, asList(dependencyLayerGovernor, dependencyLayerDependent), behaviors); List<POS> posAnnotations = new ArrayList<>(select(jcas, POS.class)); POS source = posAnnotations.get(0); POS target = posAnnotations.get(posAnnotations.size() - 1); assertThatExceptionOfType(MultipleSentenceCoveredException.class) .isThrownBy(() -> sut.add(document, username, source, target, jcas, 0, jcas.getDocumentText().length())) .withMessageContaining("multiple sentences"); }
@Test public void thatRelationCrossSentenceBehaviorOnValidateGeneratesErrors() throws Exception { TokenBuilder<Token, Sentence> builder = new TokenBuilder<>(Token.class, Sentence.class); builder.buildTokens(jcas, "This is a test .\nThis is sentence two ."); for (Token t : select(jcas, Token.class)) { POS pos = new POS(jcas, t.getBegin(), t.getEnd()); t.setPos(pos); pos.addToIndexes(); } RelationAdapter sut = new RelationAdapter(featureSupportRegistry, null, depLayer, FEAT_REL_TARGET, FEAT_REL_SOURCE, asList(dependencyLayerGovernor, dependencyLayerDependent), behaviors); List<POS> posAnnotations = new ArrayList<>(select(jcas, POS.class)); POS source = posAnnotations.get(0); POS target = posAnnotations.get(posAnnotations.size() - 1); depLayer.setCrossSentence(true); sut.add(document, username, source, target, jcas, 0, jcas.getDocumentText().length()); depLayer.setCrossSentence(false); assertThat(sut.validate(jcas)) .extracting(Pair::getLeft) .usingElementComparatorIgnoringFields("source", "message") .containsExactly(LogMessage.error(null, "")); }
@Test public void testSimpleSymbolicRules() throws Exception { ConstraintsGrammar parser = new ConstraintsGrammar(new FileInputStream( "src/test/resources/rules/symbolic1.rules")); Parse p = parser.Parse(); ParsedConstraints constraints = p.accept(new ParserVisitor()); JCas jcas = JCasFactory.createJCas(); CollectionReader reader = createReader(Conll2006Reader.class, Conll2006Reader.PARAM_SOURCE_LOCATION, "src/test/resources/text/1.conll"); reader.getNext(jcas.getCas()); POS pos = new POS(jcas, 8, 9); pos.setPosValue("pronoun"); pos.addToIndexes(); Evaluator constraintsEvaluator = new ValuesGenerator(); Lemma lemma = select(jcas, Lemma.class).iterator().next(); List<PossibleValue> possibleValues = constraintsEvaluator.generatePossibleValues(lemma, "value", constraints); List<PossibleValue> expectedOutput = new ArrayList<>(); expectedOutput.add(new PossibleValue("good", true)); assertEquals(expectedOutput, possibleValues); }
@Test public void thatRelationStackingBehaviorOnCreateThrowsException() throws Exception { depLayer.setAllowStacking(false); TokenBuilder<Token, Sentence> builder = new TokenBuilder<>(Token.class, Sentence.class); builder.buildTokens(jcas, "This is a test .\nThis is sentence two ."); for (Token t : select(jcas, Token.class)) { POS pos = new POS(jcas, t.getBegin(), t.getEnd()); t.setPos(pos); pos.addToIndexes(); } RelationAdapter sut = new RelationAdapter(featureSupportRegistry, null, depLayer, FEAT_REL_TARGET, FEAT_REL_SOURCE, asList(dependencyLayerGovernor, dependencyLayerDependent), behaviors); List<POS> posAnnotations = new ArrayList<>(select(jcas, POS.class)); POS source = posAnnotations.get(0); POS target = posAnnotations.get(1); // First annotation should work sut.add(document, username, source, target, jcas, 0, jcas.getDocumentText().length()); // Second one at the same location should cause an error assertThatExceptionOfType(AnnotationException.class) .isThrownBy(() -> sut.add(document, username, source, target, jcas, 0, jcas.getDocumentText().length())) .withMessageContaining("stacking is not enabled"); }
POS pos = new POS(jcas, t.getBegin(), t.getEnd()); t.setPos(pos); pos.addToIndexes();
@SuppressWarnings("unused") private void getCas1(JCasBuilder aBuilder, JCas aJCas) { Token token1 = aBuilder.add("Hallo", Token.class); POS pos1 = new POS(aJCas, token1.getBegin(), token1.getEnd()); pos1.setPosValue("I"); pos1.addToIndexes(); token1.setPos(pos1); Token token2 = aBuilder.add("Welt", Token.class); POS pos2 = new POS(aJCas, token2.getBegin(), token2.getEnd()); pos2.setPosValue("N"); pos2.addToIndexes(); token2.setPos(pos2); Token token3 = aBuilder.add("!", Token.class); POS pos3 = new POS(aJCas, token3.getBegin(), token3.getEnd()); pos3.setPosValue("SENT"); token3.setPos(pos3); pos3.addToIndexes(); }
@SuppressWarnings("unused") private void getCas2(JCasBuilder aBuilder, JCas aJCas) { Token token1 = aBuilder.add("Hallo", Token.class); POS pos1 = new POS(aJCas, token1.getBegin(), token1.getEnd()); pos1.setPosValue("O"); pos1.addToIndexes(); token1.setPos(pos1); Token token2 = aBuilder.add("Welt", Token.class); POS pos2 = new POS(aJCas, token2.getBegin(), token2.getEnd()); pos2.setPosValue("N"); pos2.addToIndexes(); token2.setPos(pos2); Token token3 = aBuilder.add("!", Token.class); POS pos3 = new POS(aJCas, token3.getBegin(), token3.getEnd()); pos3.setPosValue("SENT"); token3.setPos(pos3); pos3.addToIndexes(); } }