public void buildTokenBoundaryMap() { tokenBeginEndTreeSet = new TreeSet<Integer>(); AnnotationIndex<Annotation> annotationIndex = jcas.getAnnotationIndex(BaseToken.type); for (Annotation current : annotationIndex) { BaseToken bt = (BaseToken)current; // filter out NewlineToken if (!(bt instanceof NewlineToken)) { int begin = bt.getBegin(); int end = bt.getEnd(); tokenBeginEndTreeSet.add(begin); tokenBeginEndTreeSet.add(end); } } }
public void buildTokenBoundaryMap() { tokenBeginEndTreeSet = new TreeSet<Integer>(); AnnotationIndex<Annotation> annotationIndex = jcas.getAnnotationIndex(BaseToken.type); for (Annotation current : annotationIndex) { BaseToken bt = (BaseToken)current; // filter out NewlineToken if (!(bt instanceof NewlineToken)) { int begin = bt.getBegin(); int end = bt.getEnd(); tokenBeginEndTreeSet.add(begin); tokenBeginEndTreeSet.add(end); } } }
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { List<BaseToken> tokens = new ArrayList<>(JCasUtil.select(jcas, BaseToken.class)); BaseToken lastToken = null; int parStart = 0; for(int i = 0; i < tokens.size(); i++){ BaseToken token = tokens.get(i); if(parStart == i && token instanceof NewlineToken){ // we've just created a pargraph ending but there were multiple newlines -- don't want to start the // new paragraph until we are past the newlines -- increment the parStart index and move forward parStart++; }else if(lastToken != null && token instanceof NewlineToken){ Paragraph par = new Paragraph(jcas, tokens.get(parStart).getBegin(), lastToken.getEnd()); par.addToIndexes(); parStart = i+1; } lastToken = token; } }
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { List<BaseToken> tokens = new ArrayList<>(JCasUtil.select(jcas, BaseToken.class)); BaseToken lastToken = null; int parStart = 0; for(int i = 0; i < tokens.size(); i++){ BaseToken token = tokens.get(i); if(parStart == i && token instanceof NewlineToken){ // we've just created a pargraph ending but there were multiple newlines -- don't want to start the // new paragraph until we are past the newlines -- increment the parStart index and move forward parStart++; }else if(lastToken != null && token instanceof NewlineToken){ Paragraph par = new Paragraph(jcas, tokens.get(parStart).getBegin(), lastToken.getEnd()); par.addToIndexes(); parStart = i+1; } lastToken = token; } }
public int getSemanticType(JCas systemView, BaseToken baseToken) { List<IdentifiedAnnotation> coveredIdentifiedAnnotations = JCasUtil.selectCovered( systemView, IdentifiedAnnotation.class, baseToken.getBegin(), baseToken.getEnd()); if(coveredIdentifiedAnnotations.size() < 1) { return CONST.NE_TYPE_ID_UNKNOWN; // no type } return coveredIdentifiedAnnotations.get(0).getTypeID(); } }
public int getSemanticType(JCas systemView, BaseToken baseToken) { List<IdentifiedAnnotation> coveredIdentifiedAnnotations = JCasUtil.selectCovered( systemView, IdentifiedAnnotation.class, baseToken.getBegin(), baseToken.getEnd()); if(coveredIdentifiedAnnotations.size() < 1) { return CONST.NE_TYPE_ID_UNKNOWN; // no type } return coveredIdentifiedAnnotations.get(0).getTypeID(); } }
@SuppressWarnings("null") @Override public void process(JCas jCas) throws AnalysisEngineProcessException { ArrayList<BaseToken> tokens = new ArrayList<BaseToken>(JCasUtil.select(jCas, BaseToken.class)); String lastKey = null; BaseToken lastToken = null; for(int i = 0; i < tokens.size(); i++){ BaseToken token = tokens.get(i); String key = token.getCoveredText().toLowerCase(); if(cueWords.containsKey(key)){ addCuePhrase(jCas, key, token.getBegin(), token.getEnd()); } if(i > 0){ String twoKey = lastKey + " " + key; if(cueWords.containsKey(twoKey)){ addCuePhrase(jCas, twoKey, lastToken.getBegin(), token.getEnd()); } } lastToken = token; lastKey = key; } }
@SuppressWarnings("null") @Override public void process(JCas jCas) throws AnalysisEngineProcessException { ArrayList<BaseToken> tokens = new ArrayList<BaseToken>(JCasUtil.select(jCas, BaseToken.class)); String lastKey = null; BaseToken lastToken = null; for(int i = 0; i < tokens.size(); i++){ BaseToken token = tokens.get(i); String key = token.getCoveredText().toLowerCase(); if(cueWords.containsKey(key)){ addCuePhrase(jCas, key, token.getBegin(), token.getEnd()); } if(i > 0){ String twoKey = lastKey + " " + key; if(cueWords.containsKey(twoKey)){ addCuePhrase(jCas, twoKey, lastToken.getBegin(), token.getEnd()); } } lastToken = token; lastKey = key; } }
public AttributeCalculator (JCas jcas) { this.jcas = jcas; // index the base tokens and NEs by their offsets hbs = new Hashtable<Integer, BaseToken>(); hbe = new Hashtable<Integer, BaseToken>(); FSIterator iter = jcas.getJFSIndexRepository().getAnnotationIndex(BaseToken.type).iterator(); while (iter.hasNext()) { BaseToken t = (BaseToken) iter.next(); hbs.put(t.getBegin(), t); hbe.put(t.getEnd(), t); } }
public AttributeCalculator (JCas jcas) { this.jcas = jcas; // index the base tokens and NEs by their offsets hbs = new Hashtable<Integer, BaseToken>(); hbe = new Hashtable<Integer, BaseToken>(); FSIterator iter = jcas.getJFSIndexRepository().getAnnotationIndex(BaseToken.type).iterator(); while (iter.hasNext()) { BaseToken t = (BaseToken) iter.next(); hbs.put(t.getBegin(), t); hbe.put(t.getEnd(), t); } }
/** * A utility method that annotates a given range. */ protected void annotateRange(JCas jcas, String text, int rangeBegin, int rangeEnd) throws AnalysisEngineProcessException { JFSIndexRepository indexes = jcas.getJFSIndexRepository(); Iterator tokenItr = indexes.getAnnotationIndex(BaseToken.type) .iterator(); while (tokenItr.hasNext()) { BaseToken tokenAnnotation = (BaseToken) tokenItr.next(); if (tokenAnnotation.getBegin() >= rangeBegin && tokenAnnotation.getEnd() <= rangeEnd) { String token = text.substring(tokenAnnotation.getBegin(), tokenAnnotation.getEnd()); // skip past words that are part of the exclusion set if (exclusionSet.contains(token)) continue; setNormalizedForm(tokenAnnotation, token); if (postLemmas) setLemma(tokenAnnotation, token, jcas); } } }
/** * @param annotation1 - * @param annotation2 - * @param baseTokens baseTokens within window * @return number of basetokens that lie between annotation1 and annotation2 */ static private int countBetween( final Annotation annotation1, final Annotation annotation2, final Collection<BaseToken> baseTokens ) { final int lowEnd = Math.min( annotation1.getEnd(), annotation2.getEnd() ); final int highBegin = Math.max( annotation1.getBegin(), annotation2.getBegin() ); int between = 0; for ( BaseToken baseToken : baseTokens ) { if ( lowEnd < baseToken.getBegin() && baseToken.getEnd() < highBegin ) { between++; } } return between; }
private Predicate createPredicate( JCas jCas, String rolesetId, BaseToken token ) { Predicate pred = new Predicate( jCas, token.getBegin(), token.getEnd() ); pred.setFrameSet( rolesetId ); pred.addToIndexes(); return pred; }
private Predicate createPredicate( JCas jCas, String rolesetId, BaseToken token ) { Predicate pred = new Predicate( jCas, token.getBegin(), token.getEnd() ); pred.setFrameSet( rolesetId ); pred.addToIndexes(); return pred; }
/** * @param annotation1 - * @param annotation2 - * @param baseTokens baseTokens within window * @return number of basetokens that lie between annotation1 and annotation2 */ static private int countBetween( final Annotation annotation1, final Annotation annotation2, final Collection<BaseToken> baseTokens ) { final int lowEnd = Math.min( annotation1.getEnd(), annotation2.getEnd() ); final int highBegin = Math.max( annotation1.getBegin(), annotation2.getBegin() ); int between = 0; for ( BaseToken baseToken : baseTokens ) { if ( lowEnd < baseToken.getBegin() && baseToken.getEnd() < highBegin ) { between++; } } return between; }
@Override public void processCas( CAS cas ) throws ResourceProcessException { try { JCas jcas; jcas = cas.getJCas(); List<String> offsets = new ArrayList<String>(); JFSIndexRepository indexes = jcas.getJFSIndexRepository(); Iterator<?> tokenItr = indexes.getAnnotationIndex(BaseToken.type).iterator(); while (tokenItr.hasNext()) { BaseToken token = (BaseToken) tokenItr.next(); String offset = ""+token.getBegin()+"|"+token.getEnd(); offsets.add(offset); } String documentID = DocumentIDAnnotationUtil.getDocumentID(jcas); writeToFile(documentID, offsets); } catch(Exception e) { throw new ResourceProcessException(e); } }
@Override public void processCas( CAS cas ) throws ResourceProcessException { try { JCas jcas; jcas = cas.getJCas(); List<String> offsets = new ArrayList<String>(); JFSIndexRepository indexes = jcas.getJFSIndexRepository(); Iterator<?> tokenItr = indexes.getAnnotationIndex(BaseToken.type).iterator(); while (tokenItr.hasNext()) { BaseToken token = (BaseToken) tokenItr.next(); String offset = ""+token.getBegin()+"|"+token.getEnd(); offsets.add(offset); } String documentID = DocumentIDAnnotationUtil.getDocumentID(jcas); writeToFile(documentID, offsets); } catch(Exception e) { throw new ResourceProcessException(e); } }
private SemanticArgument createArgument( JCas jCas, DEPArc head, BaseToken token ) { SemanticArgument argument = new SemanticArgument( jCas, token.getBegin(), token.getEnd() ); argument.setLabel( head.getLabel() ); argument.addToIndexes(); return argument; }
private SemanticArgument createArgument( JCas jCas, DEPArc head, BaseToken token ) { SemanticArgument argument = new SemanticArgument( jCas, token.getBegin(), token.getEnd() ); argument.setLabel( head.getLabel() ); argument.addToIndexes(); return argument; }
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { // Create a dummy IdentifiedAnnotation in the type system // If the BaseToken Part Of Speech is a Noun Collection<BaseToken> tokens = JCasUtil.select(jcas, BaseToken.class); for (BaseToken token : tokens) { if (saveAnnotation && token.getPartOfSpeech() != null && token.getPartOfSpeech().startsWith("N")) { IdentifiedAnnotation ann = new IdentifiedAnnotation(jcas); ann.setBegin(token.getBegin()); ann.setEnd(token.getEnd()); ann.addToIndexes(); if (printAnnotation) { LOG.info("Token:" + token.getCoveredText() + " POS:" + token.getPartOfSpeech()); } } } }