jcas.setDocumentText(metaText.toString() + sb.toString()); ae.process(jcas); Collection<IdentifiedAnnotation> collection = JCasUtil.select(jcas, IdentifiedAnnotation.class); Iterator<IdentifiedAnnotation> iterator = collection.iterator(); while (iterator.hasNext()) {
/** * Returns average token length of chunks in a view * * @param view * the view of the JCas * @return average token length of all chunks */ private double getAverageNounPhraseTokenLength(JCas view) { int totalNumber = 0; for (Chunk chunk : JCasUtil.select(view, Chunk.class)) { totalNumber += JCasUtil.selectCovered(view, Token.class, chunk).size(); } return totalNumber / (double) JCasUtil.select(view, Chunk.class).size(); } }
private TextClassificationOutcome getOutcome(JCas jcas) { List<TextClassificationOutcome> outcomes = new ArrayList<>( JCasUtil.select(jcas, TextClassificationOutcome.class)); if (outcomes.size() != 1) { throw new IllegalStateException("There should be exactly one TC outcome"); } return outcomes.get(0); }
@Test public void testProcess() throws AnalysisEngineProcessException, ResourceInitializationException { final String text = "The fox jumps over the dog."; jCas.setDocumentText(text); processJCas(); final Collection<Sentence> select = JCasUtil.select(jCas, Sentence.class); final Sentence s1 = select.iterator().next(); final List<Dependency> dependencies = JCasUtil.selectCovered(jCas, Dependency.class, s1); // We could test the output here, but its so model dependent its not // worth it, as long as annotations have been created" // 7 = 6 words + 1 punctuation, each should have a dependency assertEquals(7, dependencies.size()); } }
@Test public void testSingular() throws Exception { jCas.setDocumentText("I went to the market and met Sally. She told me he would be late."); addWordTokens(jCas); processJCas(); assertEquals(4, JCasUtil.select(jCas, Person.class).size()); assertEquals("I", JCasUtil.selectByIndex(jCas, Person.class, 0).getCoveredText()); assertEquals("She", JCasUtil.selectByIndex(jCas, Person.class, 1).getCoveredText()); assertEquals("me", JCasUtil.selectByIndex(jCas, Person.class, 2).getCoveredText()); assertEquals("he", JCasUtil.selectByIndex(jCas, Person.class, 3).getCoveredText()); assertEquals(0, JCasUtil.select(jCas, Organisation.class).size()); }
@Test public void test() throws Exception { String text = "This is some text. It has three sentences. The first sentence has four words."; jCas.setDocumentText(text); SimplePipeline.runPipeline(jCas, ae); assertEquals(3, JCasUtil.select(jCas, Sentence.class).size()); // 3 sentences Sentence s1 = JCasUtil.selectByIndex(jCas, Sentence.class, 0); List<WordToken> tokens = JCasUtil.selectCovered(jCas, WordToken.class, s1); assertEquals(5, tokens.size()); // 5 tokens in the first sentence assertEquals("NN", tokens.get(3).getPartOfSpeech()); // 4th token of first sentence is a noun List<PhraseChunk> phrases = JCasUtil.selectCovered(jCas, PhraseChunk.class, s1); assertEquals(3, phrases.size()); // 3 chunks in the first sentence assertEquals( "some text", phrases.get(2).getCoveredText()); // 3rd chunk of 1st sentence is "some text" }
@Test public void testPreciseConstructRelativePronoun() throws AnalysisEngineProcessException, ResourceInitializationException { String text = "The police want to catch a man who ran away."; // man - who jCas.setDocumentText(text); processJCas(); processJCasWithSieve(4); List<ReferenceTarget> targets = new ArrayList<>(JCasUtil.select(jCas, ReferenceTarget.class)); assertEquals(1, targets.size()); }
@Test public void testPlural() throws Exception { jCas.setDocumentText( "They were last seen running towards the school, making a nuisance of themselves."); addWordTokens(jCas); processJCas(); assertEquals(2, JCasUtil.select(jCas, Organisation.class).size()); assertEquals("They", JCasUtil.selectByIndex(jCas, Organisation.class, 0).getCoveredText()); assertEquals( "themselves", JCasUtil.selectByIndex(jCas, Organisation.class, 1).getCoveredText()); assertEquals(0, JCasUtil.select(jCas, Person.class).size()); }
private TextClassificationOutcome getOutcome(JCas jcas) { List<TextClassificationOutcome> outcomes = new ArrayList<>( JCasUtil.select(jcas, TextClassificationOutcome.class)); if (outcomes.size() != 1) { throw new IllegalStateException("There should be exactly one TC outcome"); } return outcomes.get(0); }
@Test public void testWiithText() throws Exception { String text = "This is some text. It has three sentences. The first sentence has four words."; jCas.setDocumentText(text); Text t1 = new Text(jCas, 19, 43); t1.addToIndexes(); Text t2 = new Text(jCas, 43, jCas.getDocumentText().length()); t2.addToIndexes(); SimplePipeline.runPipeline(jCas, ae); assertEquals(2, JCasUtil.select(jCas, Sentence.class).size()); // 2 sentences // note due to text the first sentence annotation is the second in the text Sentence s1 = JCasUtil.selectByIndex(jCas, Sentence.class, 0); List<WordToken> tokens = JCasUtil.selectCovered(jCas, WordToken.class, s1); assertEquals(5, tokens.size()); // 5 tokens in the first sentence assertEquals("NNS", tokens.get(3).getPartOfSpeech()); // 4th token of first sentence is a noun List<PhraseChunk> phrases = JCasUtil.selectCovered(jCas, PhraseChunk.class, s1); assertEquals(3, phrases.size()); // 3 chunks in the first sentence assertEquals( "three sentences", phrases.get(2).getCoveredText()); // 3rd chunk of 1st sentence is "some text" } }
@Override public List<Feature> extract(JCas jcas, TextClassificationUnit classificationUnit) throws TextClassificationException { if (classificationUnit == null) { return new Feature(FN_NR_OF_SENTENCES, JCasUtil.select(jcas, Sentence.class).size()).asList(); } else { return new Feature(FN_NR_OF_SENTENCES, JCasUtil.selectCovered(jcas, Sentence.class, classificationUnit).size()).asList(); } } }
@Test public void testProperHeadMatchSameNumbers() throws AnalysisEngineProcessException, ResourceInitializationException { String text = "The 200 people visited and then the people left."; jCas.setDocumentText(text); processJCas(); processJCasWithSieve(8); List<ReferenceTarget> targets = new ArrayList<>(JCasUtil.select(jCas, ReferenceTarget.class)); assertEquals(1, targets.size()); }
@Test public void test() throws AnalysisEngineProcessException, ResourceInitializationException { jCas.setDocumentText( "The CAS Number for water is 7732-18-5, but carbon could be either CASRN:7440-44-0 or CAS Registry Number 7782-42-5. CAS Number 7440-44-5 is not valid."); processJCas(); assertEquals(3, JCasUtil.select(jCas, Chemical.class).size()); assertEquals("7732-18-5", JCasUtil.selectByIndex(jCas, Chemical.class, 0).getCoveredText()); assertEquals("7440-44-0", JCasUtil.selectByIndex(jCas, Chemical.class, 1).getCoveredText()); assertEquals("7782-42-5", JCasUtil.selectByIndex(jCas, Chemical.class, 2).getCoveredText()); } }
private void processOutcome(JCas aJCas) throws Exception { List<TextClassificationOutcome> outcomes = new ArrayList<TextClassificationOutcome>( JCasUtil.select(aJCas, TextClassificationOutcome.class)); for (int i = 0; i < outcomes.size(); i++) { String outcome = outcomes.get(i).getOutcome(); outcomeVector.append(outcome); } outcomeVector.append(" "); }
@Test public void testProperHeadMatchSameLocation() throws AnalysisEngineProcessException, ResourceInitializationException { String text = "We visited the south of Amercia and travelled to the deep south of America."; jCas.setDocumentText(text); processJCas(); processJCasWithSieve(8); List<ReferenceTarget> targets = new ArrayList<>(JCasUtil.select(jCas, ReferenceTarget.class)); assertEquals(1, targets.size()); }
@Test public void test() throws Exception { jCas.setDocumentText( "The second CVE to be issued, cve-1999-0002, describes a buffer overflow in NFS mountd."); processJCas(); assertEquals(1, JCasUtil.select(jCas, Vulnerability.class).size()); Vulnerability bw = JCasUtil.selectByIndex(jCas, Vulnerability.class, 0); assertEquals("cve-1999-0002", bw.getCoveredText()); } }
private void processOutcome(JCas aJCas) throws Exception { List<TextClassificationOutcome> outcomes = new ArrayList<TextClassificationOutcome>( JCasUtil.select(aJCas, TextClassificationOutcome.class)); for (int i = 0; i < outcomes.size(); i++) { String outcome = outcomes.get(i).getOutcome(); outcomeVector.append(outcome); } outcomeVector.append(" "); }
@Test public void testProperHeadMatchDifferentLocations() throws AnalysisEngineProcessException, ResourceInitializationException { String text = "We visited the south of Amercia and went to the north of America."; jCas.setDocumentText(text); processJCas(); processJCasWithSieve(8); List<ReferenceTarget> targets = new ArrayList<>(JCasUtil.select(jCas, ReferenceTarget.class)); assertEquals(0, targets.size()); }
@Test public void test() throws Exception { jCas.setDocumentText( "Document 123 was produced, and complemented letters 56, 59 and 57, 12 is not a resolution."); processJCas(); assertEquals(2, JCasUtil.select(jCas, DocumentReference.class).size()); DocumentReference dr1 = JCasUtil.selectByIndex(jCas, DocumentReference.class, 0); assertEquals("Document 123", dr1.getCoveredText()); DocumentReference dr2 = JCasUtil.selectByIndex(jCas, DocumentReference.class, 1); assertEquals("letters 56, 59 and 57", dr2.getCoveredText()); } }
private void setPredictedOutcome(JCas jcas, String aLabels) { List<TextClassificationOutcome> outcomes = new ArrayList<TextClassificationOutcome>( JCasUtil.select(jcas, TextClassificationOutcome.class)); String[] labels = aLabels.split("\n"); for (int i = 0, labelIdx = 0; i < outcomes.size(); i++) { if (labels[labelIdx].isEmpty()) { // empty lines mark end of sequence // shift label index +1 to begin of next sequence labelIdx++; } TextClassificationOutcome o = outcomes.get(i); o.setOutcome(labels[labelIdx++]); } }