edu.illinois.cs.cogcomp.core.datastructures.textannotation.XmlTextAnnotation java code examples

public static List<TextAnnotation> getTAs(List<String> scope){
  List<TextAnnotation> tas = new ArrayList<>();
  if (scope.contains("ACE")) {
    ACEReader aceReader = null;
    try {
      aceReader = new ACEReader("data/all", false);
    } catch (Exception e) {
      e.printStackTrace();
    }
    for (TextAnnotation ta : aceReader) {
      tas.add(ta);
    }
  }
  if (scope.contains("ERE")) {
    EREMentionRelationReader ereMentionRelationReader = null;
    try {
      ereMentionRelationReader = new EREMentionRelationReader(EREDocumentReader.EreCorpus.ENR3, "data/ere/data", false);
    } catch (Exception e) {
      e.printStackTrace();
    }
    for (XmlTextAnnotation xta : ereMentionRelationReader) {
      tas.add(xta.getTextAnnotation());
    }
  }
  return tas;
}
public Object next(){

  this.offsetToSpanInfo.clear();
this.offsetToSpanInfo =
    XmlDocumentProcessor.compileOffsetSpanMapping(sourceTa.getXmlMarkup());
TextAnnotation ta = sourceTa.getTextAnnotation();
SpanLabelView tokens = (SpanLabelView) ta.getView(ViewNames.TOKENS);
compileOffsets(tokens);
sourceTa.getTextAnnotation().addView(getMentionViewName(), nerView);
  AnnotationFixer.rationalizeBoundaryAnnotations(sourceTa.getTextAnnotation(), getCorefViewName());
} else
  AnnotationFixer.rationalizeBoundaryAnnotations(sourceTa.getTextAnnotation(), getCorefViewName());

TextAnnotation ta = xta.getTextAnnotation();
List<SpanInfo> fudge = xta.getXmlMarkup();
    Pair<String, IntPair> neLabelPair = si.attributes.get("type");
    String neLabel = neLabelPair.getFirst();
    int cleanTextCharStart = xta.getXmlSt().computeModifiedOffsetFromOriginal(charOffsets.getFirst());
    int cleanTextCharEnd = xta.getXmlSt().computeModifiedOffsetFromOriginal(charOffsets.getSecond());
    int cleanTextNeTokStart = ta.getTokenIdFromCharacterOffset(cleanTextCharStart);
    int cleanTextNeTokEnd = ta.getTokenIdFromCharacterOffset(cleanTextCharEnd-1); // StringTransformation returns one-past-the-end index; TextAnnotation maps at-the-end index

StringTransformation st = xmlTa.getXmlSt();
TextAnnotation ta = xmlTa.getTextAnnotation();
String rawText = ta.getText();
String rawStr = rawText.substring(adjStart, adjEnd);

/**
 * A method for creating
 * {@link TextAnnotation} by
 * tokenizing the given text string.
 *
 * @param xmlText Raw xml text from corpus document
 * @param corpusId corpus identifier
 * @param docId text identifier
 * @return an XmlTextAnnotation with the cleaned text (StringTransformation), TextAnnotation for
 *          the cleaned text, and xml markup extracted from source
 */
public XmlTextAnnotation createTextAnnotation(String xmlText, String corpusId, String docId)  {
  logger.debug("processing text from document {}", docId);
  Pair<StringTransformation, List<XmlDocumentProcessor.SpanInfo>> cleanResults =
      xmlProcessor.processXml(xmlText);
  TextAnnotation ta = taBuilder.createTextAnnotation(corpusId, docId,
      cleanResults.getFirst().getTransformedText());
  return new XmlTextAnnotation(cleanResults.getFirst(), ta, cleanResults.getSecond());
}

String xmlStr = xmlTa.getXmlSt().getOrigText();
int fillerWindowMin = Math.max(offset - 100, 0);
int fillerWindowMax = Math.min(offset + 100, xmlStr.length());

TextAnnotation ta = xta.getTextAnnotation();
List<SpanInfo> fudge = xta.getXmlMarkup();
    Pair<String, IntPair> neLabelPair = si.attributes.get("type");
    String neLabel = neLabelPair.getFirst();
    int cleanTextCharStart = xta.getXmlSt().computeModifiedOffsetFromOriginal(charOffsets.getFirst());
    int cleanTextCharEnd = xta.getXmlSt().computeModifiedOffsetFromOriginal(charOffsets.getSecond());
    int cleanTextNeTokStart = ta.getTokenIdFromCharacterOffset(cleanTextCharStart);
    int cleanTextNeTokEnd = ta.getTokenIdFromCharacterOffset(cleanTextCharEnd-1); // StringTransformation returns one-past-the-end index; TextAnnotation maps at-the-end index

StringTransformation st = xmlTa.getXmlSt();
TextAnnotation ta = xmlTa.getTextAnnotation();
String rawText = ta.getText();
String rawStr = rawText.substring(adjStart, adjEnd);

/**
 * A method for creating
 * {@link TextAnnotation} by
 * tokenizing the given text string.
 *
 * @param xmlText Raw xml text from corpus document
 * @param corpusId corpus identifier
 * @param docId text identifier
 * @return an XmlTextAnnotation with the cleaned text (StringTransformation), TextAnnotation for
 *          the cleaned text, and xml markup extracted from source
 */
public XmlTextAnnotation createTextAnnotation(String xmlText, String corpusId, String docId)  {
  logger.debug("processing text from document {}", docId);
  Pair<StringTransformation, List<XmlDocumentProcessor.SpanInfo>> cleanResults =
      xmlProcessor.processXml(xmlText);
  TextAnnotation ta = taBuilder.createTextAnnotation(corpusId, docId,
      cleanResults.getFirst().getTransformedText());
  return new XmlTextAnnotation(cleanResults.getFirst(), ta, cleanResults.getSecond());
}

String xmlStr = xmlTa.getXmlSt().getOrigText();
int fillerWindowMin = Math.max(offset - 100, 0);
int fillerWindowMax = Math.min(offset + 100, xmlStr.length());

public static List<TextAnnotation> getTAs(List<String> scope){
  List<TextAnnotation> tas = new ArrayList<>();
  if (scope.contains("ACE")) {
    ACEReader aceReader = null;
    try {
      aceReader = new ACEReader("data/all", false);
    } catch (Exception e) {
      e.printStackTrace();
    }
    for (TextAnnotation ta : aceReader) {
      tas.add(ta);
    }
  }
  if (scope.contains("ERE")) {
    EREMentionRelationReader ereMentionRelationReader = null;
    try {
      ereMentionRelationReader = new EREMentionRelationReader(EREDocumentReader.EreCorpus.ENR3, "data/ere/data", false);
    } catch (Exception e) {
      e.printStackTrace();
    }
    for (XmlTextAnnotation xta : ereMentionRelationReader) {
      tas.add(xta.getTextAnnotation());
    }
  }
  return tas;
}
public Object next(){

TextAnnotation ta = xta.getTextAnnotation();
List<SpanInfo> fudge = xta.getXmlMarkup();
    int cleanTextCharStart = xta.getXmlSt().computeModifiedOffsetFromOriginal(charOffsets.getFirst());
    int cleanTextCharEnd = xta.getXmlSt().computeModifiedOffsetFromOriginal(charOffsets.getSecond());
    int cleanTextNeTokStart = ta.getTokenIdFromCharacterOffset(cleanTextCharStart);
    int cleanTextNeTokEnd = ta.getTokenIdFromCharacterOffset(cleanTextCharEnd - 1); // StringTransformation returns one-past-the-end index; TextAnnotation maps at-the-end index

  this.offsetToSpanInfo.clear();
this.offsetToSpanInfo =
    XmlDocumentProcessor.compileOffsetSpanMapping(sourceTa.getXmlMarkup());
TextAnnotation ta = sourceTa.getTextAnnotation();
SpanLabelView tokens = (SpanLabelView) ta.getView(ViewNames.TOKENS);
compileOffsets(tokens);
sourceTa.getTextAnnotation().addView(getMentionViewName(), nerView);
  AnnotationFixer.rationalizeBoundaryAnnotations(sourceTa.getTextAnnotation(), getCorefViewName());
} else
  AnnotationFixer.rationalizeBoundaryAnnotations(sourceTa.getTextAnnotation(), getCorefViewName());

public static List<TextAnnotation> getTAs(List<String> scope){
  List<TextAnnotation> tas = new ArrayList<>();
  if (scope.contains("ACE")) {
    ACEReader aceReader = null;
    try {
      aceReader = new ACEReader("data/all", false);
    } catch (Exception e) {
      e.printStackTrace();
    }
    for (TextAnnotation ta : aceReader) {
      tas.add(ta);
    }
  }
  if (scope.contains("ERE")) {
    EREMentionRelationReader ereMentionRelationReader = null;
    try {
      ereMentionRelationReader = new EREMentionRelationReader(EREDocumentReader.EreCorpus.ENR3, "data/ere/data", false);
    } catch (Exception e) {
      e.printStackTrace();
    }
    for (XmlTextAnnotation xta : ereMentionRelationReader) {
      tas.add(xta.getTextAnnotation());
    }
  }
  return tas;
}
public Object next(){

TextAnnotation ta = xta.getTextAnnotation();
List<SpanInfo> fudge = xta.getXmlMarkup();
    int cleanTextCharStart = xta.getXmlSt().computeModifiedOffsetFromOriginal(charOffsets.getFirst());
    int cleanTextCharEnd = xta.getXmlSt().computeModifiedOffsetFromOriginal(charOffsets.getSecond());
    int cleanTextNeTokStart = ta.getTokenIdFromCharacterOffset(cleanTextCharStart);
    int cleanTextNeTokEnd = ta.getTokenIdFromCharacterOffset(cleanTextCharEnd - 1); // StringTransformation returns one-past-the-end index; TextAnnotation maps at-the-end index

ret.add(xta.getTextAnnotation());

List<XmlDocumentProcessor.SpanInfo> markup = xmlTa.getXmlMarkup();
TextAnnotation ta = xmlTa.getTextAnnotation();
View postView = new View(getPostViewName(), NAME, ta, 1.0);
        new IntPair(xmlTa.getXmlSt().computeModifiedOffsetFromOriginal(spanInfo.spanOffsets.getFirst()),
            xmlTa.getXmlSt().computeModifiedOffsetFromOriginal(spanInfo.spanOffsets.getSecond()));
    if (-1 == cleanTextOffsets.getFirst() || -1 == cleanTextOffsets.getSecond())
      throw new IllegalStateException("could not compute cleanText offsets for " + label + " span with offsets " +

ret.add(xta.getTextAnnotation());

List<XmlDocumentProcessor.SpanInfo> markup = xmlTa.getXmlMarkup();
TextAnnotation ta = xmlTa.getTextAnnotation();
View postView = new View(getPostViewName(), NAME, ta, 1.0);
        new IntPair(xmlTa.getXmlSt().computeModifiedOffsetFromOriginal(spanInfo.spanOffsets.getFirst()),
            xmlTa.getXmlSt().computeModifiedOffsetFromOriginal(spanInfo.spanOffsets.getSecond()));
    if (-1 == cleanTextOffsets.getFirst() || -1 == cleanTextOffsets.getSecond())
      throw new IllegalStateException("could not compute cleanText offsets for " + label + " span with offsets " +

ret.add(xta.getTextAnnotation());

Javadoc

Represent a document that contains xml markup. Another class -- XmlTextAnnotationMaker, extracts a subset of the xml source text that will be processed by NLP components and creates a TextAnnotation from it. It also extracts xml markup that contains relevant information for use by applications. Examples of possible text fields are "" and ""; possible supplementary info could be the "author" attribute in a tag such as "". Finally, it provides a StringTransformation that maps between the xml source and the cleaned NLP-processable text. These elements comprise the information for a XmlTextAnnotation object.

Most used methods

Popular in Java

Making http post requests using okhttp
getResourceAsStream (ClassLoader)
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
getSupportFragmentManager (FragmentActivity)
OutputStream (java.io)
A writable sink for bytes.Most clients will use output streams that write data to the file system (
Path (java.nio.file)
Calendar (java.util)
Calendar is an abstract base class for converting between a Date object and a set of integer fields
DataSource (javax.sql)
An interface for the creation of Connection objects which represent a connection to a database. This
DateTimeFormat (org.joda.time.format)
Factory that creates instances of DateTimeFormatter from patterns and styles. Datetime formatting i
Rectangle (java.awt)
A Rectangle specifies an area in a coordinate space that is enclosed by the Rectangle object's top-
Top PhpStorm plugins

How to useXmlTextAnnotation in edu.illinois.cs.cogcomp.core.datastructures.textannotation

Best Java code snippets using edu.illinois.cs.cogcomp.core.datastructures.textannotation.XmlTextAnnotation (Showing top 20 results out of 315)

How to use
XmlTextAnnotation
in
edu.illinois.cs.cogcomp.core.datastructures.textannotation