de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Paragraph java code examples

  @Override
  public void process(JCas aJCas) throws AnalysisEngineProcessException
  {
    String input = aJCas.getDocumentText();

    if (input.length() < 1) {
      throw new AnalysisEngineProcessException(new Throwable("Document text is empty."));
    }

    Pattern ParagraphPattern = splitPattern;
    Matcher matcher = ParagraphPattern.matcher(input);
    int pos = 0;
    int nextBeginning = 0;
    while (matcher.find(pos)) {
      Paragraph paragraph = new Paragraph(aJCas, nextBeginning, matcher.start());
      paragraph.addToIndexes();
      nextBeginning = matcher.end();
      pos = matcher.end();
    }
    if (pos < input.length()) {
      Paragraph paragraph = new Paragraph(aJCas, nextBeginning, input.length());
      paragraph.addToIndexes();
    }
  }
}

@SuppressWarnings("unchecked")
private StringBuffer processParagraphs(JCas jCas, Element element,
    String idPrefix)
  throws CollectionException
{
  StringBuffer paragraphText = new StringBuffer();
  for (Iterator<Element> paragraphIterator = element
      .elementIterator(ELEMENT_PARAGRAPH); paragraphIterator
      .hasNext();) {
    Element paragraph = paragraphIterator.next();
    String paragraphId = paragraph.attributeValue(ATTR_PNUM);
    Paragraph paragraphAnnotation = new Paragraph(jCas);
    paragraphAnnotation.setBegin(paragraphText.length());
    paragraphText.append(processSentences(jCas, paragraph,
        paragraphText.length(), idPrefix + ".p" + paragraphId));
    paragraphAnnotation.setEnd(paragraphText.length());
    paragraphAnnotation.addToIndexes();
  }
  return paragraphText;
}

/** @generated
 * @param jcas JCas to which this Feature Structure belongs
 * @param begin offset to the begin spot in the SofA
 * @param end offset to the end spot in the SofA 
*/  
public Paragraph(JCas jcas, int begin, int end) {
 super(jcas);
 setBegin(begin);
 setEnd(end);
 readObject();
}

private static void copyParagraphAndTokenAnnotations(JCas source, JCas target)
{
  if (!source.getDocumentText().equals(target.getDocumentText())) {
    throw new IllegalArgumentException("Source and target have different content");
  }
  for (Paragraph p : JCasUtil.select(source, Paragraph.class)) {
    Paragraph paragraph = new Paragraph(target);
    paragraph.setBegin(p.getBegin());
    paragraph.setEnd(p.getEnd());
    paragraph.addToIndexes();
  }
  for (Token t : JCasUtil.select(source, Token.class)) {
    Token token = new Token(target);
    token.setBegin(t.getBegin());
    token.setEnd(t.getEnd());
    token.addToIndexes();
  }
}

/**
 * Returns true if the token has a preceding whitespace in the original document
 *
 * @param token token
 * @param jCas  jcas
 * @return boolen
 */
public static boolean hasSpaceBefore(Token token, JCas jCas)
{
  // select previous token(s)
  List<Token> prevTokens = JCasUtil.selectPreceding(jCas, Token.class, token, 1);
  Paragraph paragraph = JCasUtil.selectCovering(jCas, Paragraph.class, token).iterator()
      .next();
  return !prevTokens.isEmpty() && (prevTokens.iterator().next().getEnd() != token.getBegin())
      && (token.getBegin() != paragraph.getBegin());
}

/** @generated
 * @param jcas JCas to which this Feature Structure belongs
 * @param begin offset to the begin spot in the SofA
 * @param end offset to the end spot in the SofA 
*/  
public Paragraph(JCas jcas, int begin, int end) {
 super(jcas);
 setBegin(begin);
 setEnd(end);
 readObject();
}

new Paragraph(jcas, 0, 19).addToIndexes();
new Paragraph(jcas, 40, 59).addToIndexes();

new Paragraph(jcas, 0, 19).addToIndexes();
new Paragraph(jcas, 40, 59).addToIndexes();

new Paragraph(jcas, 0, 19).addToIndexes();
new Paragraph(jcas, 40, 59).addToIndexes();

new Paragraph(jcas, 0, 19).addToIndexes();
new Paragraph(jcas, 40, 59).addToIndexes();

jcas.setDocumentLanguage(aLanguage);
jcas.setDocumentText(text);
new Paragraph(jcas, 0, 19).addToIndexes();
new Paragraph(jcas, 40, 65).addToIndexes();
jcas.setDocumentLanguage(aLanguage);
jcas.setDocumentText(text);
new Paragraph(jcas, 0, 19).addToIndexes();
new Paragraph(jcas, 40, 65).addToIndexes();

jcas.setDocumentLanguage(aLanguage);
jcas.setDocumentText(text);
new Paragraph(jcas, 0, 19).addToIndexes();
new Paragraph(jcas, 40, 65).addToIndexes();
jcas.setDocumentLanguage(aLanguage);
jcas.setDocumentText(text);
new Paragraph(jcas, 0, 19).addToIndexes();
new Paragraph(jcas, 40, 65).addToIndexes();

new Paragraph(getJCas(), paragraphStart, getBuffer().length()).addToIndexes();

Javadoc

Updated by JCasGen Thu Feb 28 07:53:35 UTC 2019 XML source: /usr/src/mvn_workdir/dkpro-core-api-segmentation-asl/target/jcasgen/typesystem.xml

Most used methods

<init>
addToIndexes
setBegin
setEnd
getBegin
getEnd
readObject
Write your own initialization here

Popular in Java

Creating JSON documents from java classes using gson
setContentView (Activity)
getSupportFragmentManager (FragmentActivity)
getContentResolver (Context)
FileInputStream (java.io)
An input stream that reads bytes from a file. File file = ...finally if (in != null) in.clos
PrintWriter (java.io)
Wraps either an existing OutputStream or an existing Writerand provides convenience methods for prin
ArrayList (java.util)
ArrayList is an implementation of List, backed by an array. All optional operations including adding
SortedSet (java.util)
SortedSet is a Set which iterates over its elements in a sorted order. The order is determined eithe
Semaphore (java.util.concurrent)
A counting semaphore. Conceptually, a semaphore maintains a set of permits. Each #acquire blocks if
GridLayout (java.awt)
The GridLayout class is a layout manager that lays out a container's components in a rectangular gri
From CI to AI: The AI layer in your organization

How to useParagraph in de.tudarmstadt.ukp.dkpro.core.api.segmentation.type

Best Java code snippets using de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Paragraph (Showing top 13 results out of 315)

How to use
Paragraph
in
de.tudarmstadt.ukp.dkpro.core.api.segmentation.type