org.apache.uima.cas.CAS.setDocumentText java code examples

@Override
public void finishDestination() {
 destinationView.setDocumentText(sofaBuilder.toString());
 completedAnnotations.forEach(destinationView::addFsToIndexes);
}

@Override
protected void endDocument(final PDDocument aPdf)
  throws IOException
{
  cas.setDocumentText(text.toString());
  if (log.isTraceEnabled()) {
    log.trace("</document>");
  }
}

/**
 * Inits the cas.
 */
private final void initCas() {
 this.cas.setDocumentLanguage(this.language);
 this.cas.setDocumentText(this.textArea.getText());
}

/**
 * {@inheritDoc}
 */
@Override
public void getNext(CAS cas) throws IOException, CollectionException {
  String text = consumeLine();
  cas.setDocumentText(text);
}

@Nonnull
@Override
public Document addDocument(@Nonnull String name, @Nonnull String text) {
 CAS view = cas.createView(name);
 view.setDocumentText(text);
 return new CASDocument(view, labelAdapters);
}

  @Override
  public void convertValue(Text keyFrom, Text valueFrom, CASWritable valueTo)
  {
    CAS cas = valueTo.getCAS();
    cas.reset();
    Text doc = valueFrom;
    if (textExtractor != null)
      doc = textExtractor.extractDocumentText(keyFrom, valueFrom);
    cas.setDocumentText(doc.toString());
  }
}

/**
 * Use the given analysis engine and process the given text
 * You must release the return cas yourself
 * @param text the text to rpocess
 * @return the processed cas
 */
public  CAS process(String text) {
  CAS cas = retrieve();
  
  cas.setDocumentText(text);
  try {
    analysisEngine.process(cas);
  } catch (AnalysisEngineProcessException e) {
    if(text != null && !text.isEmpty())
      return process(text);
    throw new RuntimeException(e);
  }
  
  return cas;
  
  
}

/**
 * Use the given analysis engine and process the given text
 * You must release the return cas yourself
 * @param text the text to rpocess
 * @return the processed cas
 */
public CAS process(String text) {
  CAS cas = retrieve();
  cas.setDocumentText(text);
  try {
    analysisEngine.process(cas);
  } catch (AnalysisEngineProcessException e) {
    if (text != null && !text.isEmpty())
      return process(text);
    throw new RuntimeException(e);
  }
  return cas;
}

private InputStream getDocument(String fileName, String text, String language,
    SerialFormat format) {
 String failedToImportLine = "Failed to import: " + fileName + "\n\n";
 CAS cas = createEmtpyCAS();
 cas.setDocumentText(removeNonXmlChars(text));
 cas.setDocumentLanguage(language);
 ByteArrayOutputStream out = new ByteArrayOutputStream(40000);
 try {
  CasIOUtils.save(cas, out, format);
 } catch (IOException e) {
  throw new TaeError(failedToImportLine + e.getMessage(), e);
 }
 return new ByteArrayInputStream(out.toByteArray());
}

/**
 * Use the given analysis engine and process the given text
 * You must release the return cas yourself
 * @param text the text to rpocess
 * @return the processed cas
 */
public  CAS process(String text) {
  CAS cas = retrieve();
  
  cas.setDocumentText(text);
  try {
    analysisEngine.process(cas);
  } catch (AnalysisEngineProcessException e) {
    if(text != null && !text.isEmpty())
      return process(text);
    throw new RuntimeException(e);
  }
  
  return cas;
  
  
}

@Override
public void getNext(CAS cas)
  throws IOException
{
  // Initialize CAS with document meta data
  initCas(cas, currentFileResource, null);
  if (!StringUtils.isWhitespace(language)) {
    cas.setDocumentLanguage(language);
  }
  // The buffer where document text is to be stored
  StringBuilder documentText = new StringBuilder();
  Node node = nodes.poll();
  if (node != null) {
    processNode(cas, node, documentText);
  }
  // Set document text in cas or error if nothing gets parsed out
  String documentTextString = documentText.toString();
  if (StringUtils.isWhitespace(documentTextString)) {
    cas.setDocumentText("[Parse error]");
  }
  else {
    cas.setDocumentText(documentTextString);
  }
}

protected void doProcess(BehemothDocument behemoth, Reporter reporter) throws AnalysisEngineProcessException {
  // does the input document have a some text?
  // if not - skip it
  if (behemoth.getText() == null) {
    LOG.debug(behemoth.getUrl().toString() + " has null text");
  } else {
    // detect language if specified by user
    String lang = this.config.get("uima.language", "en");
    cas.setDocumentLanguage(lang);
    cas.setDocumentText(behemoth.getText());
    // process it
    tae.process(cas);
    convertCASToBehemoth(cas, behemoth, reporter);
  }
}

/**
 * {@inheritDoc}
 */
@Override
public void getNext(CAS cas) throws IOException, CollectionException {
  if (!dbIterator.hasNext()) {
    throw new NoSuchElementException();
  }
  DbTuple tuple = dbIterator.next();
  consumedCount++;
  cas.setDocumentText(tuple.text);
  try {
    DocumentMetadata docMeta = new DocumentMetadata(cas.getJCas());
    docMeta.setSourceUri(tuple.url);
    docMeta.addToIndexes();
  } catch (CASException e) {
    throw new CollectionException(e);
  }
}

/**
 * {@inheritDoc}
 */
@Override
public void getNext(CAS aCAS) throws IOException, CollectionException {
  if (!hasNext()) {
    throw new CollectionException(new NoSuchElementException());
  }
  final int curFileIdx = lastReadFileIdx + 1;
  File file = files.get(curFileIdx);
  lastReadFileIdx = curFileIdx;
  //
  String fileContent = FileUtils.readFileToString(file, encoding);
  aCAS.setDocumentText(fileContent);
  try {
    DocumentMetadata docMeta = new DocumentMetadata(aCAS.getJCas());
    docMeta.setSourceUri(getURIForMetadata(file).toString());
    docMeta.addToIndexes();
  } catch (CASException e) {
    throw new IllegalStateException(e);
  }
}

  @Override
  public void getNext(CAS aJCas)
    throws IOException, CollectionException
  {
    Resource res = nextFile();
    initCas(aJCas, res);

    try (InputStream is = new BufferedInputStream(
        CompressionUtils.getInputStream(res.getLocation(), res.getInputStream()))) {
      String text;

      if (ENCODING_AUTO.equals(sourceEncoding)) {
        CharsetDetector detector = new CharsetDetector();
        text = IOUtils.toString(detector.getReader(is, null));
      }
      else {
        text = IOUtils.toString(is, sourceEncoding);
      }
      
      aJCas.setDocumentText(text);        
    }
  }
}

public static void main(String[] args) throws Exception {
 URL url = TextMarkerEngine.class.getClassLoader().getResource("PlainTextAnnotator.xml");
 if (url == null) {
  url = PlainTextAnnotator.class.getClassLoader().getResource(
      "org/apache/uima/textmarker/engine/PlainTextAnnotator.xml");
 }
 XMLInputSource in = new XMLInputSource(url);
 ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in);
 AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(specifier);
 CAS cas = ae.newCAS();
 cas.setDocumentText(FileUtils.file2String(new File(
     "D:/work/workspace-textmarker/Test/input/list1.txt"), "UTF-8"));
 ae.process(cas);
 AnnotationIndex<AnnotationFS> annotationIndex = cas.getAnnotationIndex();
 for (AnnotationFS annotationFS : annotationIndex) {
  System.out.println(annotationFS.getType().getShortName() + " :  "
      + annotationFS.getCoveredText());
 }
}

public void runCPM(String text) {
 cas.setDocumentText(text);
 cas.setDocumentLanguage(langID);
 try {
  ae.process(cas);
 } catch (AnalysisEngineProcessException e) {
  e.printStackTrace();
 }
 processCAS(cas);
 cas.reset();
}

public void process(CAS aCAS) throws AnalysisEngineProcessException {
 // get handle to CAS view containing XML document
 CAS xmlCas = aCAS.getView("xmlDocument");
 InputStream xmlStream = xmlCas.getSofa().getSofaDataStream();
 // parse with detag handler
 DetagHandler handler = new DetagHandler();
 try {
  SAXParser parser = parserFactory.newSAXParser();
  parser.parse(xmlStream, handler);
 } catch (Exception e) {
  throw new AnalysisEngineProcessException(e);
 }
 // create the plain text view and set its document text
 CAS plainTextView = aCAS.createView("plainTextDocument");
 plainTextView.setDocumentText(handler.getDetaggedText());
 plainTextView.setDocumentLanguage(aCAS.getView("_InitialView").getDocumentLanguage());
 // Index the SourceDocumentInformation object, if there is one, in the new sofa.
 // This is needed by the SemanticSearchCasIndexer
 Iterator iter = xmlCas.getAnnotationIndex(sourceDocInfoType).iterator();
 if (iter.hasNext()) {
  FeatureStructure sourceDocInfoFs = (FeatureStructure) iter.next();
  plainTextView.getIndexRepository().addFS(sourceDocInfoFs);
 }
}

/**
 * analyzes the tokenizer input using the given analysis engine
 * <p>
 * {@link #cas} will be filled with  extracted metadata (UIMA annotations, feature structures)
 *
 * @throws IOException If there is a low-level I/O error.
 */
protected void analyzeInput() throws ResourceInitializationException, AnalysisEngineProcessException, IOException {
 if (ae == null) {
  ae = AEProviderFactory.getInstance().getAEProvider(null, descriptorPath, configurationParameters).getAE();
 }
 if (cas == null) {
  cas = ae.newCAS();
 } else {
  cas.reset();
 }
 cas.setDocumentText(toString(input));
 ae.process(cas);
}

CASArtifact(
  @Nullable LabelAdapters labelAdapters,
  CAS cas,
  String artifactID
) {
 this.labelAdapters = labelAdapters;
 this.cas = cas;
 TypeSystem typeSystem = cas.getTypeSystem();
 metadataType = typeSystem.getType("ArtifactMetadata");
 keyFeature = metadataType.getFeatureByBaseName("key");
 valueFeature = metadataType.getFeatureByBaseName("value");
 metadataCas = cas.createView("metadata");
 metadataCas.setDocumentText("");
 Type idType = typeSystem.getType("ArtifactID");
 Feature idFeat = idType.getFeatureByBaseName("artifactID");
 this.artifactID = artifactID;
 FeatureStructure documentIdFs = metadataCas.createFS(idType);
 documentIdFs.setStringValue(idFeat, artifactID);
 metadataCas.addFsToIndexes(documentIdFs);
 metadataIndex = metadataCas.getIndexRepository().getIndex("metadata", metadataType);
 casMetadata = new CASMetadata();
}

How to use setDocumentTextmethodin org.apache.uima.cas.CAS

Best Java code snippets using org.apache.uima.cas.CAS.setDocumentText (Showing top 20 results out of 315)

How to use
setDocumentText
method
in
org.apache.uima.cas.CAS