Refine search
/** * Returns average token length of chunks in a view * * @param view * the view of the JCas * @return average token length of all chunks */ private double getAverageNounPhraseTokenLength(JCas view) { int totalNumber = 0; for (Chunk chunk : JCasUtil.select(view, Chunk.class)) { totalNumber += JCasUtil.selectCovered(view, Token.class, chunk).size(); } return totalNumber / (double) JCasUtil.select(view, Chunk.class).size(); } }
@Override protected List<SourceTargetPair> getSourceTargetPairs(JCas jCas) { List<SourceTargetPair> pairs = Lists.newArrayList(); DocumentCreationTime dct = JCasUtil.selectSingle(jCas, DocumentCreationTime.class); for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) { for (Event event : JCasUtil.selectCovered(jCas, Event.class, sentence)) { pairs.add(new SourceTargetPair(event, dct)); } } return pairs; } }
@Test public void test() throws Exception { String text = "This is some text. It has three sentences. The first sentence has four words."; jCas.setDocumentText(text); SimplePipeline.runPipeline(jCas, ae); assertEquals(3, JCasUtil.select(jCas, Sentence.class).size()); // 3 sentences Sentence s1 = JCasUtil.selectByIndex(jCas, Sentence.class, 0); List<WordToken> tokens = JCasUtil.selectCovered(jCas, WordToken.class, s1); assertEquals(5, tokens.size()); // 5 tokens in the first sentence assertEquals("NN", tokens.get(3).getPartOfSpeech()); // 4th token of first sentence is a noun List<PhraseChunk> phrases = JCasUtil.selectCovered(jCas, PhraseChunk.class, s1); assertEquals(3, phrases.size()); // 3 chunks in the first sentence assertEquals( "some text", phrases.get(2).getCoveredText()); // 3rd chunk of 1st sentence is "some text" }
private void setTokenId(JCas aJCas, Map<Integer, String> aTokenAddress) { int sentenceId = 1; for (Sentence sentence : select(aJCas, Sentence.class)) { int tokenId = 1; for (Token token : selectCovered(Token.class, sentence)) { aTokenAddress.put(token.getAddress(), sentenceId + "-" + tokenId++); } sentenceId++; } }
protected void assignSpeakerIds(JCas jcas) { DramatisPersonae dp = JCasUtil.selectSingle(jcas, DramatisPersonae.class); int speakerId = 1; Map<String, Speaker> speakerMap = new HashMap<String, Speaker>(); for (Speaker speaker : JCasUtil.selectCovered(Speaker.class, dp)) { speaker.setId(speakerId++); speakerMap.put(speaker.getCoveredText(), speaker); } ; for (Speaker speaker : JCasUtil.select(jcas, Speaker.class)) { if (speaker.getId() == 0) { try { speaker.setId(speakerMap.get(speaker.getCoveredText()).getId()); } catch (NullPointerException e) { // no entry in speaker map } } } }
private void setTokenId(JCas aJCas, Map<Integer, String> aTokenAddress) { int sentenceId = 1; for (Sentence sentence : select(aJCas, Sentence.class)) { int tokenId = 1; for (Token token : selectCovered(Token.class, sentence)) { aTokenAddress.put(token.getAddress(), sentenceId + "-" + tokenId++); } sentenceId++; } }
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { for(Markable m : JCasUtil.select(jcas, Markable.class)){ StringBuffer buff = new StringBuffer(); for(BaseToken token : JCasUtil.selectCovered(BaseToken.class, m)){ buff.append(token.getCoveredText().replace('\n', ' ')); buff.append(' '); } if(buff.length() > 0){ out.println(buff.substring(0, buff.length()-1)); } } }
public static Collection<Speech> getSpeeches(JCas jcas, Figure figure) { List<Speech> ret = new LinkedList<Speech>(); for (Utterance u : JCasUtil.select(jcas, Utterance.class)) { Speaker sp = DramaUtil.getFirstSpeaker(u); if (sp != null && sp.getFigure() == figure) { ret.addAll(JCasUtil.selectCovered(jcas, Speech.class, u)); } } return ret; }
@Override protected List<SourceTargetPair> getSourceTargetPairs(JCas jCas) { List<SourceTargetPair> pairs = Lists.newArrayList(); for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) { for (Event source : JCasUtil.selectCovered(jCas, Event.class, sentence)) { for (Event target : this.getSubordinateEvents(jCas, source, sentence)) { pairs.add(new SourceTargetPair(source, target)); } } } return pairs; }
@Override protected List<SourceTargetPair> getSourceTargetPairs(JCas jCas) { List<SourceTargetPair> pairs = Lists.newArrayList(); for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) { for (Event event : JCasUtil.selectCovered(jCas, Event.class, sentence)) { for (Time time : getSubordinateTimes(event, sentence, jCas)) { pairs.add(new SourceTargetPair(event, time)); } } } return pairs; }
private void setTokenSentenceAddress(JCas aJCas, Map<Integer, Integer> aTokenListInSentence) { for (Sentence sentence : select(aJCas, Sentence.class)) { for (Token token : selectCovered(Token.class, sentence)) { aTokenListInSentence.put(token.getAddress(), sentence.getAddress()); } } }
private void setTokenSentenceAddress(JCas aJCas, Map<Integer, Integer> aTokenListInSentence) { for (Sentence sentence : select(aJCas, Sentence.class)) { for (Token token : selectCovered(Token.class, sentence)) { aTokenListInSentence.put(token.getAddress(), sentence.getAddress()); } } }
private void setTokenId(JCas aJCas, Map<Integer, String> aTokenAddress) { LowLevelCAS llCas = aJCas.getLowLevelCas(); int sentenceId = 1; for (Sentence sentence : select(aJCas, Sentence.class)) { int tokenId = 1; for (Token token : selectCovered(Token.class, sentence)) { aTokenAddress.put(llCas.ll_getFSRef(token), sentenceId + "-" + tokenId++); } sentenceId++; } }
private void setTokenId(JCas aJCas, Map<Integer, String> aTokenAddress) { LowLevelCAS llCas = aJCas.getLowLevelCas(); int sentenceId = 1; for (Sentence sentence : select(aJCas, Sentence.class)) { int tokenId = 1; for (Token token : selectCovered(Token.class, sentence)) { aTokenAddress.put(llCas.ll_getFSRef(token), sentenceId + "-" + tokenId++); } sentenceId++; } }
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { for (Sentence s : select(aJCas, Sentence.class)) { for (NGram ngram : NGramIterable.create(selectCovered(Token.class, s), n)) { ngram.addToIndexes(); } } } }
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { for (Sentence s : select(aJCas, Sentence.class)) { for (NGram ngram : NGramIterable.create(selectCovered(Token.class, s), n)) { ngram.addToIndexes(); } } } }
public List<String> getOutcomes(JCas aJCas, AnnotationFS anAnnotation) throws TextClassificationException { Collection<TextClassificationOutcome> outcomes; if (anAnnotation == null) { outcomes = JCasUtil.select(aJCas, TextClassificationOutcome.class); } else { outcomes = JCasUtil.selectCovered(aJCas, TextClassificationOutcome.class, anAnnotation); } if (outcomes.size() == 0) { throw new TextClassificationException("No outcome annotations present in current CAS."); } List<String> stringOutcomes = new ArrayList<String>(); for (TextClassificationOutcome outcome : outcomes) { stringOutcomes.add(outcome.getOutcome()); } return stringOutcomes; }
private List<List<String>> extractTags(JCas jCas) { List<List<String>> posTags = new ArrayList<List<String>>(); Collection<Sentence> sentences = JCasUtil.select(jCas, Sentence.class); for (Sentence s : sentences) { List<String> tags = new ArrayList<String>(); List<TextClassificationOutcome> tcos = JCasUtil.selectCovered(jCas, TextClassificationOutcome.class, s.getBegin(), s.getEnd()); for (TextClassificationOutcome tco : tcos) { tags.add(tco.getOutcome()); } posTags.add(tags); } return posTags; }
public List<String> getOutcomes(JCas aJCas, AnnotationFS anAnnotation) throws TextClassificationException { Collection<TextClassificationOutcome> outcomes; if (anAnnotation == null) { outcomes = JCasUtil.select(aJCas, TextClassificationOutcome.class); } else { outcomes = JCasUtil.selectCovered(aJCas, TextClassificationOutcome.class, anAnnotation); } if (outcomes.size() == 0) { throw new TextClassificationException("No outcome annotations present in current CAS."); } List<String> stringOutcomes = new ArrayList<String>(); for (TextClassificationOutcome outcome : outcomes) { stringOutcomes.add(outcome.getOutcome()); } return stringOutcomes; }
private void setTokenSentenceAddress(JCas aJCas, Map<Integer, Integer> aTokenListInSentence) { LowLevelCAS llCas = aJCas.getLowLevelCas(); for (Sentence sentence : select(aJCas, Sentence.class)) { for (Token token : selectCovered(Token.class, sentence)) { aTokenListInSentence.put(llCas.ll_getFSRef(token), llCas.ll_getFSRef(sentence)); } } }