final String tokenRepresentation = token.getCoveredText(); if (tokenRepresentation.equals(token.getCoveredText())) { final Unit unit = new Unit(token.getBegin(), token.getEnd(), tokenRepresentation); unitSentence.add(unit);
private void createToken(final JCas jcas, final int begin, final int end) { final Token annotation = new Token(jcas); annotation.setBegin(begin); annotation.setEnd(end); annotation.setId("" + tokenNumber); annotation.setComponentId(COMPONENT_ID); annotation.addToIndexes(); LOGGER.debug("createToken() - created token: " + jcas.getDocumentText().substring(begin, end) + " " + begin + " - " + end); tokenNumber++; }
/** @generated * @param jcas JCas to which this Feature Structure belongs * @param begin offset to the begin spot in the SofA * @param end offset to the end spot in the SofA */ public Token(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }
while (tokenIterator.hasNext()) { Token token = (Token) tokenIterator.get(); tokenString = token.getCoveredText(); LemmataEntry lemmaEntry = null; try { if (token.getPosTag() != null) { POSTag posTag = token.getPosTag(0); tag = posTag.getValue(); lemmaEntry = bioLemm.lemmatizeByLexiconAndRules(tokenString, tag); LemmataEntry.Lemma lemma = lemmaCollection.iterator().next(); String lem = lemma.getLemma(); Lemma jcoreLemma = new Lemma(aJCas, token.getBegin(), token.getEnd()); jcoreLemma.setValue(lem); token.setLemma(jcoreLemma); } else { Lemma jcoreLemma = new Lemma(aJCas, token.getBegin(), token.getEnd()); jcoreLemma.setValue(tokenString); token.setLemma(jcoreLemma);
Interval annotationInterval = metaAnnotationValues[i]; String metaName = featureConfig.getProperty(activatedMetas.get(i) + "_feat_unit"); if (annotationInterval != null && annotationInterval.isIn(token.getBegin(), token.getEnd())) { if (featureConfig.getProperty(activatedMetas.get(i) + "_begin_flag").equals("true") && annotationInterval.getBegin() == token.getBegin()) { metaInfos.put(metaName, "B_" + metaAnnotationValues[i].getAnnotation()); } else { metaInfos.put(metaName, metaAnnotationValues[i].getAnnotation()); if (annotationInterval.getEnd() == token.getEnd()) {
/** * Returns the text tokenized and pos-tagged, in that order. * * @param text * @return * @throws AnalysisEngineProcessException */ private List<String[]> tokenize(String text) throws AnalysisEngineProcessException { jCas.reset(); jCas.setDocumentText(text); new Sentence(jCas, 0, text.length()).addToIndexes(); jtbd.process(jCas.getCas()); pennbioIEPosTagger.process(jCas.getCas()); return JCasUtil.select(jCas, Token.class).stream() .map(t -> new String[] { t.getCoveredText(), t.getPosTag(0).getValue() }).collect(Collectors.toList()); }
/** * Returns the first POSTag annotation associated with the given token that has the * required type (i.e. that belongs to the requested posTagSet). If no such POSTag * is found, returns null. (In general tokens may be provided with POSTags from * different POSTagSets.) * * @param token * @return */ private POSTag getPrefPOSTag(Token token) { FSArray posTags = token.getPosTag(); for (int i = 0; i < posTags.size(); i++) { POSTag posTag = (POSTag) posTags.get(i); if (posTag != null) { // compare to the desired type of POS Tag Set if (posTag.getType().getName().equals(posTagSetPreference)) { return posTag; } } } return null; }
new Token(aJCas, 0, 0).getClass());
Token token = (Token) tokenIterator.next(); tokenArray[i] = token; tokenTextArray[i] = token.getCoveredText(); POSTag postag = null; postag = token.getPosTag(0); LOGGER.error("Token has no POS tag annotation: " + token.getCoveredText()); throw new AnalysisEngineProcessException();
HashMap<String, String> metas = metaList.get(i); Abbreviation abbreviation = abbreviationList.get(i); String tokenRepresentation = token.getCoveredText(); if (tokenRepresentation.equals(token.getCoveredText())) { Unit unit = new de.julielab.jnet.tagger.Unit(token.getBegin(), token.getEnd(), tokenRepresentation, "", metas); unitSentence.add(unit); while (st.hasMoreTokens()) { String fullformToken = st.nextToken(); Unit unit = new de.julielab.jnet.tagger.Unit(token.getBegin(), token.getEnd(), fullformToken, "", metas); unitSentence.add(unit); Unit unit = new de.julielab.jnet.tagger.Unit(token.getBegin(), token.getEnd(), abbrevToken.getCoveredText(), "", metas); unitSentence.add(unit);
private void addHeadAnnotation(Constituent cons, Span headSpan) throws CASRuntimeException, CASException { FSIterator tokens = cons.getCAS().getJCas().getAnnotationIndex(Token.type).subiterator(cons); int headStart = offsetMap.getMapping(headSpan.getStart()); int headEnd = offsetMap.getMapping(headSpan.getEnd()); while (tokens.hasNext()) { Token token = (Token) tokens.next(); if (token.getBegin() == headStart && token.getEnd() == headEnd) { cons.setHead(token); tokens.moveToLast(); tokens.next(); } } }
Token token = (Token) tokenIterator.next(); tokenList.add(token); String tokenText = escapeToken(token.getCoveredText()); int origId = token.getBegin(); for (mapId = start; mapId <= end; mapId++) { offsetMap.putMapping(mapId, origId); if (origId < token.getEnd()) { origId++;
return; tokenPrefixIndex.search(entity).filter(token -> { return token.getEnd() - token.getBegin() <= entityName.length() && entityName.startsWith(token.getCoveredText()) && !specificIndex.contains(token); }).map(token -> { int begin = token.getBegin(); int end = -1; if (token.getEnd() == begin + entityName.length()) { end = token.getEnd(); } else { Token lastToken = tokenEndIndex.get(begin + entityName.length()); if (lastToken != null) end = lastToken.getEnd();