@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { String input = aJCas.getDocumentText(); if (input.length() < 1) { throw new AnalysisEngineProcessException(new Throwable("Document text is empty.")); } Pattern ParagraphPattern = splitPattern; Matcher matcher = ParagraphPattern.matcher(input); int pos = 0; int nextBeginning = 0; while (matcher.find(pos)) { Paragraph paragraph = new Paragraph(aJCas, nextBeginning, matcher.start()); paragraph.addToIndexes(); nextBeginning = matcher.end(); pos = matcher.end(); } if (pos < input.length()) { Paragraph paragraph = new Paragraph(aJCas, nextBeginning, input.length()); paragraph.addToIndexes(); } } }
@SuppressWarnings("unchecked") private StringBuffer processParagraphs(JCas jCas, Element element, String idPrefix) throws CollectionException { StringBuffer paragraphText = new StringBuffer(); for (Iterator<Element> paragraphIterator = element .elementIterator(ELEMENT_PARAGRAPH); paragraphIterator .hasNext();) { Element paragraph = paragraphIterator.next(); String paragraphId = paragraph.attributeValue(ATTR_PNUM); Paragraph paragraphAnnotation = new Paragraph(jCas); paragraphAnnotation.setBegin(paragraphText.length()); paragraphText.append(processSentences(jCas, paragraph, paragraphText.length(), idPrefix + ".p" + paragraphId)); paragraphAnnotation.setEnd(paragraphText.length()); paragraphAnnotation.addToIndexes(); } return paragraphText; }
/** @generated * @param jcas JCas to which this Feature Structure belongs * @param begin offset to the begin spot in the SofA * @param end offset to the end spot in the SofA */ public Paragraph(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }
private static void copyParagraphAndTokenAnnotations(JCas source, JCas target) { if (!source.getDocumentText().equals(target.getDocumentText())) { throw new IllegalArgumentException("Source and target have different content"); } for (Paragraph p : JCasUtil.select(source, Paragraph.class)) { Paragraph paragraph = new Paragraph(target); paragraph.setBegin(p.getBegin()); paragraph.setEnd(p.getEnd()); paragraph.addToIndexes(); } for (Token t : JCasUtil.select(source, Token.class)) { Token token = new Token(target); token.setBegin(t.getBegin()); token.setEnd(t.getEnd()); token.addToIndexes(); } }
/** * Returns true if the token has a preceding whitespace in the original document * * @param token token * @param jCas jcas * @return boolen */ public static boolean hasSpaceBefore(Token token, JCas jCas) { // select previous token(s) List<Token> prevTokens = JCasUtil.selectPreceding(jCas, Token.class, token, 1); Paragraph paragraph = JCasUtil.selectCovering(jCas, Paragraph.class, token).iterator() .next(); return !prevTokens.isEmpty() && (prevTokens.iterator().next().getEnd() != token.getBegin()) && (token.getBegin() != paragraph.getBegin()); }
/** @generated * @param jcas JCas to which this Feature Structure belongs * @param begin offset to the begin spot in the SofA * @param end offset to the end spot in the SofA */ public Paragraph(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }
new Paragraph(jcas, 0, 19).addToIndexes(); new Paragraph(jcas, 40, 59).addToIndexes();
new Paragraph(jcas, 0, 19).addToIndexes(); new Paragraph(jcas, 40, 59).addToIndexes();
new Paragraph(jcas, 0, 19).addToIndexes(); new Paragraph(jcas, 40, 59).addToIndexes();
new Paragraph(jcas, 0, 19).addToIndexes(); new Paragraph(jcas, 40, 59).addToIndexes();
jcas.setDocumentLanguage(aLanguage); jcas.setDocumentText(text); new Paragraph(jcas, 0, 19).addToIndexes(); new Paragraph(jcas, 40, 65).addToIndexes(); jcas.setDocumentLanguage(aLanguage); jcas.setDocumentText(text); new Paragraph(jcas, 0, 19).addToIndexes(); new Paragraph(jcas, 40, 65).addToIndexes();
jcas.setDocumentLanguage(aLanguage); jcas.setDocumentText(text); new Paragraph(jcas, 0, 19).addToIndexes(); new Paragraph(jcas, 40, 65).addToIndexes(); jcas.setDocumentLanguage(aLanguage); jcas.setDocumentText(text); new Paragraph(jcas, 0, 19).addToIndexes(); new Paragraph(jcas, 40, 65).addToIndexes();
new Paragraph(getJCas(), paragraphStart, getBuffer().length()).addToIndexes();