/** * Ignores the bug in pre-computing token offsets * @param ta */ private static void validateTextAnnotationOffset(TextAnnotation ta){ try{ ta.getTokenIdFromCharacterOffset(0); }catch(Exception e){ } }
/** * Ignores the bug in pre-computing token offsets * * @param ta */ private static void validateTextAnnotationOffset(TextAnnotation ta) { try { ta.getTokenIdFromCharacterOffset(0); } catch (Exception e) { } }
protected static Constituent getNewConstituentForSpan(String label, String viewName, TextAnnotation ta, Span span) { int start = ta.getTokenIdFromCharacterOffset(span.getStart()); int end = ta.getTokenIdFromCharacterOffset(span.getEnding() - 1) + 1; Constituent constituent = new Constituent(label, viewName, ta, start, end); if (span.isSetAttributes()) { copyAttributesToConstituent(span, constituent); } return constituent; }
protected static Constituent getNewConstituentForSpan(String label, String viewName, TextAnnotation ta, Span span) { int start = ta.getTokenIdFromCharacterOffset(span.getStart()); int end = ta.getTokenIdFromCharacterOffset(span.getEnding() - 1) + 1; Constituent constituent = new Constituent(label, viewName, ta, start, end); if (span.isSetAttributes()) { copyAttributesToConstituent(span, constituent); } return constituent; }
protected static Constituent getNewConstituentForSpan(String label, String viewName, TextAnnotation ta, Span span) { int start = ta.getTokenIdFromCharacterOffset(span.getStart()); int end = ta.getTokenIdFromCharacterOffset(span.getEnding() - 1) + 1; Constituent constituent = new Constituent(label, viewName, ta, start, end); if (span.isSetAttributes()) { copyAttributesToConstituent(span, constituent); } return constituent; }
@Override public void addView(TextAnnotation ta) throws AnnotatorException { assert (ta.hasView(ViewNames.SENTENCE)); SpanLabelView quantifierView = new SpanLabelView(ViewNames.QUANTITIES, "illinois-quantifier", ta, 1d); List<QuantSpan> quantSpans = getSpans(ta.getTokenizedText(), true, ta); for (QuantSpan span : quantSpans) { int startToken = ta.getTokenIdFromCharacterOffset(span.start); int endToken = ta.getTokenIdFromCharacterOffset(span.end); quantifierView.addSpanLabel(startToken, endToken, span.object.toString(), 1d); } ta.addView(ViewNames.QUANTITIES, quantifierView); }
@Override public void addView(TextAnnotation ta) throws AnnotatorException { assert (ta.hasView(ViewNames.SENTENCE)); SpanLabelView quantifierView = new SpanLabelView(ViewNames.QUANTITIES, "illinois-quantifier", ta, 1d); List<QuantSpan> quantSpans = getSpans(ta.getTokenizedText(), true, ta); for (QuantSpan span : quantSpans) { int startToken = ta.getTokenIdFromCharacterOffset(span.start); int endToken = ta.getTokenIdFromCharacterOffset(span.end); quantifierView.addSpanLabel(startToken, endToken, span.object.toString(), 1d); } ta.addView(ViewNames.QUANTITIES, quantifierView); }
/** * Gets the token index of a Stanford dependency node relative to the current sentence * * @param ta The TextAnnotation containing the sentences * @param node The Stanford Dependency node * @param sentId The sentence number * @return The token index relative to sentence */ private int getNodePosition(TextAnnotation ta, IndexedWord node, int sentId) { int sentenceStart = ta.getView(ViewNames.SENTENCE).getConstituents().get(sentId).getStartSpan(); int nodeCharacterOffset = node.beginPosition(); int tokenStartSpan = ta.getTokenIdFromCharacterOffset(nodeCharacterOffset); return tokenStartSpan - sentenceStart; }
/** * Gets the token index of a Stanford dependency node relative to the current sentence * * @param ta The TextAnnotation containing the sentences * @param node The Stanford Dependency node * @param sentId The sentence number * @return The token index relative to sentence */ private int getNodePosition(TextAnnotation ta, IndexedWord node, int sentId) { int sentenceStart = ta.getView(ViewNames.SENTENCE).getConstituents().get(sentId).getStartSpan(); int nodeCharacterOffset = node.beginPosition(); int tokenStartSpan = ta.getTokenIdFromCharacterOffset(nodeCharacterOffset); return tokenStartSpan - sentenceStart; }
/** * Helper function to create a head constituent from an extent constituent. */ public static Constituent getEntityHeadForConstituent(Constituent extentConstituent, TextAnnotation textAnnotation, String viewName) { int startCharOffset = Integer.parseInt(extentConstituent .getAttribute(ACEReader.EntityHeadStartCharOffset)); int endCharOffset = Integer.parseInt(extentConstituent.getAttribute(ACEReader.EntityHeadEndCharOffset)) - 1; int startToken = textAnnotation.getTokenIdFromCharacterOffset(startCharOffset); int endToken = textAnnotation.getTokenIdFromCharacterOffset(endCharOffset); if (startToken >= 0 && endToken >= 0 && !(endToken - startToken < 0)) { Constituent cons = new Constituent(extentConstituent.getLabel(), 1.0, viewName, textAnnotation, startToken, endToken + 1); for (String attributeKey : extentConstituent.getAttributeKeys()) { cons.addAttribute(attributeKey, extentConstituent.getAttribute(attributeKey)); } return cons; } return null; }
/** * Helper function to create a head constituent from an extent constituent. */ public static Constituent getEntityHeadForConstituent(Constituent extentConstituent, TextAnnotation textAnnotation, String viewName) { int startCharOffset = Integer.parseInt(extentConstituent .getAttribute(ACEReader.EntityHeadStartCharOffset)); int endCharOffset = Integer.parseInt(extentConstituent.getAttribute(ACEReader.EntityHeadEndCharOffset)) - 1; int startToken = textAnnotation.getTokenIdFromCharacterOffset(startCharOffset); int endToken = textAnnotation.getTokenIdFromCharacterOffset(endCharOffset); if (startToken >= 0 && endToken >= 0 && !(endToken - startToken < 0)) { Constituent cons = new Constituent(extentConstituent.getLabel(), 1.0, viewName, textAnnotation, startToken, endToken + 1); for (String attributeKey : extentConstituent.getAttributeKeys()) { cons.addAttribute(attributeKey, extentConstituent.getAttribute(attributeKey)); } return cons; } return null; }
int topTokenId = ta.getTokenIdFromCharacterOffset(topNode.getSpan().getStart()); int childTokenId = ta.getTokenIdFromCharacterOffset(childNode.getSpan().getStart());
Integer.parseInt(extentConstituent.getAttribute(ACEReader.EntityHeadEndCharOffset)) - 1; int startToken = textAnnotation.getTokenIdFromCharacterOffset(startCharOffset); int endToken = textAnnotation.getTokenIdFromCharacterOffset(endCharOffset);
/** * Aligns a {@link Labeling} to a {@link TokenLabelView}. * * @return A TokenLabelView */ public static TokenLabelView alignLabelingToTokenLabelView(String viewName, TextAnnotation ta, Labeling labeling) { List<Span> labels = labeling.getLabels(); double score = labeling.getScore(); String generator = labeling.getSource(); TokenLabelView view = new TokenLabelView(viewName, generator, ta, score); for (Span span : labels) { int tokenId = ta.getTokenIdFromCharacterOffset(span.getStart()); int endTokenId = ta.getTokenIdFromCharacterOffset(span.getEnding()); if (tokenId == endTokenId) endTokenId++; for (int i = tokenId; i < endTokenId; i++) { view.addTokenLabel(i, span.getLabel(), span.getScore()); if (span.isSetAttributes() && span.getAttributes().size() > 0) { Constituent c = view.getConstituentAtToken(i); copyAttributesToConstituent(span, c); } } } return view; }
public static TreeView alignForestToDependencyView(String viewName, TextAnnotation ta, Forest dep) { TreeView view = new TreeView(viewName, dep.getSource(), ta, 0.0d); for (edu.illinois.cs.cogcomp.thrift.base.Tree tree : dep.getTrees()) { int topId = tree.getTop(); List<Node> nodes = tree.getNodes(); int topTokenStart = nodes.get(topId).getSpan().getStart(); int topTokenId = ta.getTokenIdFromCharacterOffset(topTokenStart); int sentenceId = ta.getSentenceId(topTokenId); Tree<Pair<String, Integer>> dependencyTree = makeDependencyTree(ta, tree); double score = tree.getScore(); view.setDependencyTree(sentenceId, dependencyTree, score); } return view; }
/** * Aligns a {@link edu.illinois.cs.cogcomp.thrift.base.Labeling} to a * {@link edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView}. * * <b>NOTE:</b> must correct for one-past-the-end labeling when calling * {@link TextAnnotation#getTokenIdFromCharacterOffset(int)}. * * @return A TokenLabelView */ public static TokenLabelView alignLabelingToTokenLabelView(String viewName, TextAnnotation ta, Labeling labeling) { List<Span> labels = labeling.getLabels(); double score = labeling.getScore(); String generator = labeling.getSource(); TokenLabelView view = new TokenLabelView(viewName, generator, ta, score); for (Span span : labels) { int tokenId = ta.getTokenIdFromCharacterOffset(span.getStart()); int endTokenId = ta.getTokenIdFromCharacterOffset(span.getEnding() - 1); if (tokenId == endTokenId) endTokenId++; for (int i = tokenId; i < endTokenId; i++) { view.addTokenLabel(i, span.getLabel(), span.getScore()); if (span.isSetAttributes() && span.getAttributes().size() > 0) { Constituent c = view.getConstituentAtToken(i); copyAttributesToConstituent(span, c); } } } return view; }
/** * Aligns a {@link edu.illinois.cs.cogcomp.thrift.base.Labeling} to a * {@link edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView}. * * <b>NOTE:</b> must correct for one-past-the-end labeling when calling * {@link TextAnnotation#getTokenIdFromCharacterOffset(int)}. * * @return A TokenLabelView */ public static TokenLabelView alignLabelingToTokenLabelView(String viewName, TextAnnotation ta, Labeling labeling) { List<Span> labels = labeling.getLabels(); double score = labeling.getScore(); String generator = labeling.getSource(); TokenLabelView view = new TokenLabelView(viewName, generator, ta, score); for (Span span : labels) { int tokenId = ta.getTokenIdFromCharacterOffset(span.getStart()); int endTokenId = ta.getTokenIdFromCharacterOffset(span.getEnding() - 1); if (tokenId == endTokenId) endTokenId++; for (int i = tokenId; i < endTokenId; i++) { view.addTokenLabel(i, span.getLabel(), span.getScore()); if (span.isSetAttributes() && span.getAttributes().size() > 0) { Constituent c = view.getConstituentAtToken(i); copyAttributesToConstituent(span, c); } } } return view; }
public static TreeView alignForestToDependencyView(String viewName, TextAnnotation ta, Forest dep) { TreeView view = new TreeView(viewName, dep.getSource(), ta, 0.0d); for (edu.illinois.cs.cogcomp.thrift.base.Tree tree : dep.getTrees()) { int topId = tree.getTop(); List<Node> nodes = tree.getNodes(); int topTokenStart = nodes.get(topId).getSpan().getStart(); int topTokenId = ta.getTokenIdFromCharacterOffset(topTokenStart); int sentenceId = ta.getSentenceId(topTokenId); Tree<Pair<String, Integer>> dependencyTree = makeDependencyTree(ta, tree); double score = tree.getScore(); view.setDependencyTree(sentenceId, dependencyTree, score); } return view; }
public static TreeView alignForestToDependencyView(String viewName, TextAnnotation ta, Forest dep) { TreeView view = new TreeView(viewName, dep.getSource(), ta, 0.0d); for (edu.illinois.cs.cogcomp.thrift.base.Tree tree : dep.getTrees()) { int topId = tree.getTop(); List<Node> nodes = tree.getNodes(); int topTokenStart = nodes.get(topId).getSpan().getStart(); int topTokenId = ta.getTokenIdFromCharacterOffset(topTokenStart); int sentenceId = ta.getSentenceId(topTokenId); Tree<Pair<String, Integer>> dependencyTree = makeDependencyTree(ta, tree); double score = tree.getScore(); view.setDependencyTree(sentenceId, dependencyTree, score); } return view; }
int cleanTextCharStart = xta.getXmlSt().computeModifiedOffsetFromOriginal(charOffsets.getFirst()); int cleanTextCharEnd = xta.getXmlSt().computeModifiedOffsetFromOriginal(charOffsets.getSecond()); int cleanTextNeTokStart = ta.getTokenIdFromCharacterOffset(cleanTextCharStart); int cleanTextNeTokEnd = ta.getTokenIdFromCharacterOffset(cleanTextCharEnd - 1); // StringTransformation returns one-past-the-end index; TextAnnotation maps at-the-end index Constituent neCon = new Constituent(neLabel, nerView.getViewName(), ta, cleanTextNeTokStart, cleanTextNeTokEnd + 1); //constituent token indexing uses one-past-the-end nerView.addConstituent(neCon);