edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation.getTokenIdFromCharacterOffset java code examples

/**
 * Ignores the bug in pre-computing token offsets
 * @param ta
 */
private static void validateTextAnnotationOffset(TextAnnotation ta){
  try{
    ta.getTokenIdFromCharacterOffset(0);
  }catch(Exception e){
  }
}

/**
 * Ignores the bug in pre-computing token offsets
 * 
 * @param ta
 */
private static void validateTextAnnotationOffset(TextAnnotation ta) {
  try {
    ta.getTokenIdFromCharacterOffset(0);
  } catch (Exception e) {
  }
}

protected static Constituent getNewConstituentForSpan(String label, String viewName,
    TextAnnotation ta, Span span) {
  int start = ta.getTokenIdFromCharacterOffset(span.getStart());
  int end = ta.getTokenIdFromCharacterOffset(span.getEnding() - 1) + 1;
  Constituent constituent = new Constituent(label, viewName, ta, start, end);
  if (span.isSetAttributes()) {
    copyAttributesToConstituent(span, constituent);
  }
  return constituent;
}

protected static Constituent getNewConstituentForSpan(String label, String viewName,
    TextAnnotation ta, Span span) {
  int start = ta.getTokenIdFromCharacterOffset(span.getStart());
  int end = ta.getTokenIdFromCharacterOffset(span.getEnding() - 1) + 1;
  Constituent constituent = new Constituent(label, viewName, ta, start, end);
  if (span.isSetAttributes()) {
    copyAttributesToConstituent(span, constituent);
  }
  return constituent;
}

protected static Constituent getNewConstituentForSpan(String label, String viewName, TextAnnotation ta, Span span) {
  int start = ta.getTokenIdFromCharacterOffset(span.getStart());
  int end = ta.getTokenIdFromCharacterOffset(span.getEnding() - 1) + 1;
  Constituent constituent = new Constituent(label, viewName, ta, start, end);
  if (span.isSetAttributes()) {
    copyAttributesToConstituent(span, constituent);
  }
  return constituent;
}

@Override
public void addView(TextAnnotation ta) throws AnnotatorException {
  assert (ta.hasView(ViewNames.SENTENCE));
  SpanLabelView quantifierView =
      new SpanLabelView(ViewNames.QUANTITIES, "illinois-quantifier", ta, 1d);
  List<QuantSpan> quantSpans = getSpans(ta.getTokenizedText(), true, ta);
  for (QuantSpan span : quantSpans) {
    int startToken = ta.getTokenIdFromCharacterOffset(span.start);
    int endToken = ta.getTokenIdFromCharacterOffset(span.end);
    quantifierView.addSpanLabel(startToken, endToken, span.object.toString(), 1d);
  }
  ta.addView(ViewNames.QUANTITIES, quantifierView);
}

@Override
public void addView(TextAnnotation ta) throws AnnotatorException {
  assert (ta.hasView(ViewNames.SENTENCE));
  SpanLabelView quantifierView =
      new SpanLabelView(ViewNames.QUANTITIES, "illinois-quantifier", ta, 1d);
  List<QuantSpan> quantSpans = getSpans(ta.getTokenizedText(), true, ta);
  for (QuantSpan span : quantSpans) {
    int startToken = ta.getTokenIdFromCharacterOffset(span.start);
    int endToken = ta.getTokenIdFromCharacterOffset(span.end);
    quantifierView.addSpanLabel(startToken, endToken, span.object.toString(), 1d);
  }
  ta.addView(ViewNames.QUANTITIES, quantifierView);
}

/**
 * Gets the token index of a Stanford dependency node relative to the current sentence
 * 
 * @param ta The TextAnnotation containing the sentences
 * @param node The Stanford Dependency node
 * @param sentId The sentence number
 * @return The token index relative to sentence
 */
private int getNodePosition(TextAnnotation ta, IndexedWord node, int sentId) {
  int sentenceStart =
      ta.getView(ViewNames.SENTENCE).getConstituents().get(sentId).getStartSpan();
  int nodeCharacterOffset = node.beginPosition();
  int tokenStartSpan = ta.getTokenIdFromCharacterOffset(nodeCharacterOffset);
  return tokenStartSpan - sentenceStart;
}

/**
 * Gets the token index of a Stanford dependency node relative to the current sentence
 * 
 * @param ta The TextAnnotation containing the sentences
 * @param node The Stanford Dependency node
 * @param sentId The sentence number
 * @return The token index relative to sentence
 */
private int getNodePosition(TextAnnotation ta, IndexedWord node, int sentId) {
  int sentenceStart =
      ta.getView(ViewNames.SENTENCE).getConstituents().get(sentId).getStartSpan();
  int nodeCharacterOffset = node.beginPosition();
  int tokenStartSpan = ta.getTokenIdFromCharacterOffset(nodeCharacterOffset);
  return tokenStartSpan - sentenceStart;
}

/**
 * Helper function to create a head constituent from an extent constituent.
 */
public static Constituent getEntityHeadForConstituent(Constituent extentConstituent,
                            TextAnnotation textAnnotation,
                            String viewName) {
  int startCharOffset =
      Integer.parseInt(extentConstituent
          .getAttribute(ACEReader.EntityHeadStartCharOffset));
  int endCharOffset =
      Integer.parseInt(extentConstituent.getAttribute(ACEReader.EntityHeadEndCharOffset)) - 1;
  int startToken = textAnnotation.getTokenIdFromCharacterOffset(startCharOffset);
  int endToken = textAnnotation.getTokenIdFromCharacterOffset(endCharOffset);
  if (startToken >= 0 && endToken >= 0 && !(endToken - startToken < 0)) {
    Constituent cons =
        new Constituent(extentConstituent.getLabel(), 1.0, viewName, textAnnotation,
            startToken, endToken + 1);
    for (String attributeKey : extentConstituent.getAttributeKeys()) {
      cons.addAttribute(attributeKey, extentConstituent.getAttribute(attributeKey));
    }
    return cons;
  }
  return null;
}

/**
 * Helper function to create a head constituent from an extent constituent.
 */
public static Constituent getEntityHeadForConstituent(Constituent extentConstituent,
                            TextAnnotation textAnnotation,
                            String viewName) {
  int startCharOffset =
      Integer.parseInt(extentConstituent
          .getAttribute(ACEReader.EntityHeadStartCharOffset));
  int endCharOffset =
      Integer.parseInt(extentConstituent.getAttribute(ACEReader.EntityHeadEndCharOffset)) - 1;
  int startToken = textAnnotation.getTokenIdFromCharacterOffset(startCharOffset);
  int endToken = textAnnotation.getTokenIdFromCharacterOffset(endCharOffset);
  if (startToken >= 0 && endToken >= 0 && !(endToken - startToken < 0)) {
    Constituent cons =
        new Constituent(extentConstituent.getLabel(), 1.0, viewName, textAnnotation,
            startToken, endToken + 1);
    for (String attributeKey : extentConstituent.getAttributeKeys()) {
      cons.addAttribute(attributeKey, extentConstituent.getAttribute(attributeKey));
    }
    return cons;
  }
  return null;
}

int topTokenId = ta.getTokenIdFromCharacterOffset(topNode.getSpan().getStart());
    int childTokenId = ta.getTokenIdFromCharacterOffset(childNode.getSpan().getStart());

    Integer.parseInt(extentConstituent.getAttribute(ACEReader.EntityHeadEndCharOffset)) - 1;
int startToken = textAnnotation.getTokenIdFromCharacterOffset(startCharOffset);
int endToken = textAnnotation.getTokenIdFromCharacterOffset(endCharOffset);

/**
 * Aligns a {@link Labeling} to a {@link TokenLabelView}.
 *
 * @return A TokenLabelView
 */
public static TokenLabelView alignLabelingToTokenLabelView(String viewName, TextAnnotation ta, Labeling labeling) {
  List<Span> labels = labeling.getLabels();
  double score = labeling.getScore();
  String generator = labeling.getSource();
  TokenLabelView view = new TokenLabelView(viewName, generator, ta, score);
  for (Span span : labels) {
    int tokenId = ta.getTokenIdFromCharacterOffset(span.getStart());
    int endTokenId = ta.getTokenIdFromCharacterOffset(span.getEnding());
    if (tokenId == endTokenId)
      endTokenId++;
    for (int i = tokenId; i < endTokenId; i++) {
      view.addTokenLabel(i, span.getLabel(), span.getScore());
      if (span.isSetAttributes() && span.getAttributes().size() > 0) {
        Constituent c = view.getConstituentAtToken(i);
        copyAttributesToConstituent(span, c);
      }
    }
  }
  return view;
}

public static TreeView alignForestToDependencyView(String viewName, TextAnnotation ta, Forest dep) {
  TreeView view = new TreeView(viewName, dep.getSource(), ta, 0.0d);
  for (edu.illinois.cs.cogcomp.thrift.base.Tree tree : dep.getTrees()) {
    int topId = tree.getTop();
    List<Node> nodes = tree.getNodes();
    int topTokenStart = nodes.get(topId).getSpan().getStart();
    int topTokenId = ta.getTokenIdFromCharacterOffset(topTokenStart);
    int sentenceId = ta.getSentenceId(topTokenId);
    Tree<Pair<String, Integer>> dependencyTree = makeDependencyTree(ta, tree);
    double score = tree.getScore();
    view.setDependencyTree(sentenceId, dependencyTree, score);
  }
  return view;
}

/**
 * Aligns a {@link edu.illinois.cs.cogcomp.thrift.base.Labeling} to a
 * {@link edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView}.
 *
 * <b>NOTE:</b> must correct for one-past-the-end labeling when calling
 * {@link TextAnnotation#getTokenIdFromCharacterOffset(int)}.
 * 
 * @return A TokenLabelView
 */
public static TokenLabelView alignLabelingToTokenLabelView(String viewName, TextAnnotation ta,
    Labeling labeling) {
  List<Span> labels = labeling.getLabels();
  double score = labeling.getScore();
  String generator = labeling.getSource();
  TokenLabelView view = new TokenLabelView(viewName, generator, ta, score);
  for (Span span : labels) {
    int tokenId = ta.getTokenIdFromCharacterOffset(span.getStart());
    int endTokenId = ta.getTokenIdFromCharacterOffset(span.getEnding() - 1);
    if (tokenId == endTokenId)
      endTokenId++;
    for (int i = tokenId; i < endTokenId; i++) {
      view.addTokenLabel(i, span.getLabel(), span.getScore());
      if (span.isSetAttributes() && span.getAttributes().size() > 0) {
        Constituent c = view.getConstituentAtToken(i);
        copyAttributesToConstituent(span, c);
      }
    }
  }
  return view;
}

/**
 * Aligns a {@link edu.illinois.cs.cogcomp.thrift.base.Labeling} to a
 * {@link edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView}.
 *
 * <b>NOTE:</b> must correct for one-past-the-end labeling when calling
 * {@link TextAnnotation#getTokenIdFromCharacterOffset(int)}.
 * 
 * @return A TokenLabelView
 */
public static TokenLabelView alignLabelingToTokenLabelView(String viewName, TextAnnotation ta,
    Labeling labeling) {
  List<Span> labels = labeling.getLabels();
  double score = labeling.getScore();
  String generator = labeling.getSource();
  TokenLabelView view = new TokenLabelView(viewName, generator, ta, score);
  for (Span span : labels) {
    int tokenId = ta.getTokenIdFromCharacterOffset(span.getStart());
    int endTokenId = ta.getTokenIdFromCharacterOffset(span.getEnding() - 1);
    if (tokenId == endTokenId)
      endTokenId++;
    for (int i = tokenId; i < endTokenId; i++) {
      view.addTokenLabel(i, span.getLabel(), span.getScore());
      if (span.isSetAttributes() && span.getAttributes().size() > 0) {
        Constituent c = view.getConstituentAtToken(i);
        copyAttributesToConstituent(span, c);
      }
    }
  }
  return view;
}

public static TreeView alignForestToDependencyView(String viewName, TextAnnotation ta,
    Forest dep) {
  TreeView view = new TreeView(viewName, dep.getSource(), ta, 0.0d);
  for (edu.illinois.cs.cogcomp.thrift.base.Tree tree : dep.getTrees()) {
    int topId = tree.getTop();
    List<Node> nodes = tree.getNodes();
    int topTokenStart = nodes.get(topId).getSpan().getStart();
    int topTokenId = ta.getTokenIdFromCharacterOffset(topTokenStart);
    int sentenceId = ta.getSentenceId(topTokenId);
    Tree<Pair<String, Integer>> dependencyTree = makeDependencyTree(ta, tree);
    double score = tree.getScore();
    view.setDependencyTree(sentenceId, dependencyTree, score);
  }
  return view;
}

public static TreeView alignForestToDependencyView(String viewName, TextAnnotation ta,
    Forest dep) {
  TreeView view = new TreeView(viewName, dep.getSource(), ta, 0.0d);
  for (edu.illinois.cs.cogcomp.thrift.base.Tree tree : dep.getTrees()) {
    int topId = tree.getTop();
    List<Node> nodes = tree.getNodes();
    int topTokenStart = nodes.get(topId).getSpan().getStart();
    int topTokenId = ta.getTokenIdFromCharacterOffset(topTokenStart);
    int sentenceId = ta.getSentenceId(topTokenId);
    Tree<Pair<String, Integer>> dependencyTree = makeDependencyTree(ta, tree);
    double score = tree.getScore();
    view.setDependencyTree(sentenceId, dependencyTree, score);
  }
  return view;
}

int cleanTextCharStart = xta.getXmlSt().computeModifiedOffsetFromOriginal(charOffsets.getFirst());
int cleanTextCharEnd = xta.getXmlSt().computeModifiedOffsetFromOriginal(charOffsets.getSecond());
int cleanTextNeTokStart = ta.getTokenIdFromCharacterOffset(cleanTextCharStart);
int cleanTextNeTokEnd = ta.getTokenIdFromCharacterOffset(cleanTextCharEnd - 1); // StringTransformation returns one-past-the-end index; TextAnnotation maps at-the-end index
Constituent neCon = new Constituent(neLabel, nerView.getViewName(), ta, cleanTextNeTokStart, cleanTextNeTokEnd + 1); //constituent token indexing uses one-past-the-end
nerView.addConstituent(neCon);

Javadoc

Get the position of token that corresponds to the character offset that is passed as a parameter. This function could be useful when dealing with corpora that specify annotation in terms of character offsets. In particular, the CuratorClient uses this function to convert views from the Curator representation. NOTE: one-past-the-end indexing can make this problematic. Currently, constituents are processed so that only characters within tokens are mapped to token ids (avoiding ambiguity at the cost of introducing complexity for users thinking of one-past-the-end indexing). I.E. you MUST modify the end offset in the call if you are using one-past-the-end offsets. (example: curator data structures use one-past-the- end, as do TextAnnotation Views/Constituents. This behavior was chosen to handle the case where there is arbitrary whitespace, and to avoid confusion when two tokens are contiguous (the first character of the second token would conflict with the last (one-past-the-end) character of the first. UPDATED to allow non-zero first token character offset (i.e. in case where source text has markup preamble that you want to ignore. Current implementation maps char offsets not representing tokens to the index '-1'.

Popular methods of TextAnnotation

Popular in Java

Creating JSON documents from java classes using gson
getSystemService (Context)
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
getSupportFragmentManager (FragmentActivity)
BigDecimal (java.math)
An immutable arbitrary-precision signed decimal.A value is represented by an arbitrary-precision "un
Arrays (java.util)
This class contains various methods for manipulating arrays (such as sorting and searching). This cl
Comparator (java.util)
A Comparator is used to compare two objects to determine their ordering with respect to each other.
Pattern (java.util.regex)
Patterns are compiled regular expressions. In many cases, convenience methods such as String#matches
Graphics2D (java.awt)
This Graphics2D class extends the Graphics class to provide more sophisticated control overgraphics
JFrame (javax.swing)
Top PhpStorm plugins

How to use getTokenIdFromCharacterOffsetmethodin edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation

Best Java code snippets using edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation.getTokenIdFromCharacterOffset (Showing top 20 results out of 315)

How to use
getTokenIdFromCharacterOffset
method
in
edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation