org.apache.uima.jcas.JCas.getDocumentText java code examples

public CasAssert containsText(String string) {
  if(!actual.getDocumentText().contains(string))
    failWithMessage("Expect document text to contain <%s>. Acutal text is: <%s>", string, actual.getDocumentText());
  return this;
}

  private static String getRightContext(JCas jcas, TextClassificationTarget unit)
  {
    int rightOffset = unit.getEnd() + CONTEXT_WIDTH;

    if (rightOffset > jcas.getDocumentText().length()) {
      rightOffset = jcas.getDocumentText().length();
    }

    String context = jcas.getDocumentText().substring(unit.getEnd(), rightOffset);
    context = context.replaceAll("\n", " ");

    return context;
  }
}

  @Override
  public void process(JCas aJCas)
    throws AnalysisEngineProcessException
  {
    try (OutputStream docOS = getOutputStream(aJCas, filenameSuffix)) {
      IOUtils.write(aJCas.getDocumentText(), docOS, targetEncoding);
    }
    catch (Exception e) {
      throw new AnalysisEngineProcessException(e);
    }
  }
}

  /**
   * Get a {@link TextClassificationTarget} covering the full JCAS.
   *
   * @param aJCas
   *            the JCas.
   * @return the {@link TextClassificationTarget} covering the full JCAS.

   */
  public static TextClassificationTarget get(final JCas aJCas)
  {
    return new TextClassificationTarget(aJCas, 0, aJCas.getDocumentText().length());
  }
}

/**
 * Mark a document as corrupt.
 *
 * @param jCas the jCas
 */
private void setCorrupt(JCas jCas) {
 if (Strings.isNullOrEmpty(jCas.getDocumentText())) {
  jCas.setDocumentText(CORRUPT_FILE_TEXT);
 }
}

private void printSectionStart(String desc, JCas cas, int begin) {
  String str = cas.getDocumentText();
  if (begin < 0) {
    System.out.println("AAAA - " + desc + " - -1 - " + BlueCasUtil.getHeaderDocId(cas));
  } else if (begin < str.length()) {
    int end = begin + 50;
    if (end > str.length()) {
      end = str.length() - 1;
    }
    System.out.println("AAAA - "+desc+" ("+ BlueCasUtil.getHeaderDocId(cas)+":"+begin+"): "+str.substring(begin, end));
  }
}

@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
 String id = new File(ViewUriUtil.getURI(jCas)).getName();
 File outFile = new File(this.outputDirectory, id + ".txt");
 try {
  FileUtils.saveString2File(jCas.getDocumentText(), outFile);
 } catch (IOException e) {
  throw new AnalysisEngineProcessException(e);
 }
}

  private void addToken(JCas aJCas, int begin, int end)
  {
    if (aJCas.getDocumentText().substring(begin, end).trim().length() > 0) {
      new Token(aJCas, begin, end).addToIndexes();
    }
  }
}

@Override
public void process(JCas jcas) throws AnalysisEngineProcessException
{
 logger.info("beginning of ConceptConverterAnalysisEngine.process()");
 String contents = jcas.getDocumentText();
 processForEntityType(jcas, EntityMention.type, EntityMention.class);
 processForEntityType(jcas, EventMention.type, EventMention.class);
 logger.info("end of ConceptConverterAnalysisEngine.process()");
}

@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException
{
  DocumentMetaData dmd = JCasUtil.selectSingle(aJCas, DocumentMetaData.class);
  try {
    bw.write(dmd.getDocumentId() + "\t" + aJCas.getDocumentText() + "\n");
  }
  catch (IOException e) {
    throw new AnalysisEngineProcessException(e);
  }
}

/**
 * Checks if is whole document (ie the JCas vs a Text annotation).
 *
 * <p>Note that if a text annotation covers the entire document this will still be true.
 *
 * @return true, if is whole document
 */
public boolean isWholeDocument() {
 return text == null
   || (text.getBegin() == 0 && text.getEnd() == jCas.getDocumentText().length());
}

 @Override
 public void process( JCas jCas ) throws AnalysisEngineProcessException {
   try {
    JCas goldView = jCas.getView( GOLD_VIEW_NAME );
    goldView.setDocumentText( jCas.getDocumentText() );
   } catch ( CASException e ) {
    throw new AnalysisEngineProcessException( e );
   }
 }
}

@Override
protected FrequencyDistribution<String> getNgramsFD(JCas jcas)
  throws TextClassificationException
{
  FrequencyDistribution<String> fd = null;
  TextClassificationTarget target = new TextClassificationTarget(jcas, 0,
      jcas.getDocumentText().length());
  fd = NGramUtils.getDocumentNgrams(jcas, target, ngramLowerCase,
      filterPartialStopwordMatches, 1, 1, stopwords);
  return fd;
}

@Override
public void init(JCas aJCas)
{
  text = aJCas.getDocumentText();
  tokens = new ArrayList<>();
  for (Token token : select(aJCas, Token.class)) {
    Span s = new Span(token.getBegin(), token.getEnd());
    tokens.add(s);
  }
}

@Override
public void process(JCas jcas) throws AnalysisEngineProcessException {
  if (segmentAnnotationClass != null) {
    for (final Annotation segAnno : JCasUtil.select(jcas, segmentAnnotationClass)) {
      doAnnotations(jcas, JCasUtil.selectCovered(jcas, baseAnnotation, segAnno), segAnno.getEnd());
    }
  } else {
    doAnnotations(jcas, JCasUtil.select(jcas, baseAnnotation), jcas.getDocumentText().length());
  }
}

  @Override
  public void process(JCas jcas) throws AnalysisEngineProcessException {
    // Create the NER instance and find all named entities in the text
    final AbstractDatabase db = DatabaseSingleton.getInstance();
    final NamedEntityRecognition ner = new NamedEntityRecognition(db);
    for (NamedEntity entity : ner.findEntities(jcas.getDocumentText())) {
      createOntologyReferenceAnnotation(jcas, entity);
    }
    long t2 = System.nanoTime();
  }
}

@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
int end = jCas.getDocumentText().length();
getContext().getLogger().log(
  Level.CONFIG,
  "Entering " + getClass().getSimpleName() + ". Adding segment of length " + end
    + ".");
Segment segment = new Segment(jCas);
segment.setBegin(0);
segment.setEnd(end);
segment.addToIndexes();
}

  @Override
  public void process(JCas aInput, JCas aOutput)
    throws AnalysisEngineProcessException
  {
    Tokenizer<CoreLabel> tokenizer = new PTBTokenizer<CoreLabel>(new StringReader(
        aInput.getDocumentText()), new CoreLabelTokenFactory(), "invertible");

    for (CoreLabel label : tokenizer.tokenize()) {
      replace(label.beginPosition(), label.endPosition(), label.word());
    }
  }
}

@Override
public void process(JCas aJCas)	throws AnalysisEngineProcessException
{
  // Create the Java parser and parse the source code into an abstract syntax tree
  ASTParser parser = ASTParser.newParser(AST.JLS3);
  parser.setSource(aJCas.getDocumentText().toCharArray());
  CompilationUnit result = (CompilationUnit) parser.createAST(null);
  // Generate JavaDoc related annotations
  result.accept(new JavaDocVisitor(aJCas));
  // Generate Java code related annotations
  result.accept(new CodeVisitor(aJCas));
}

public static double getRelativeDist(Annotation a, Annotation b, JCas jcas) {
  final double dist;
  if (a.getEnd() < b.getBegin()) {
    dist = b.getBegin() - a.getEnd();
  } else if (b.getEnd() < a.getBegin()) {
    dist = a.getBegin() - b.getEnd();
  } else {
    dist = 0;
  }
  return dist / (double) jcas.getDocumentText().length();
}

How to use getDocumentTextmethodin org.apache.uima.jcas.JCas

Best Java code snippets using org.apache.uima.jcas.JCas.getDocumentText (Showing top 20 results out of 1,206)

How to use
getDocumentText
method
in
org.apache.uima.jcas.JCas