/** * Another silly translation between a pair and a span. */ public static Span fromPair(IntPair span) { return fromValues(span.getSource(), span.getTarget()); }
private SimpleMatrix concatenateContextWords(SimpleMatrix childVec, IntPair span, List<String> words) { // TODO: factor out getting the words SimpleMatrix left = (span.getSource() < 0) ? dvModel.getStartWordVector() : dvModel.getWordVector(words.get(span.getSource())); SimpleMatrix right = (span.getTarget() >= words.size()) ? dvModel.getEndWordVector() : dvModel.getWordVector(words.get(span.getTarget())); return NeuralUtils.concatenate(childVec, left, right); }
private static boolean doesOverlap(Constituent c, IntPair p) { if (p.getSource() <= c.start() && p.getTarget() >= c.start()) return true; else if (p.getSource() >= c.start() && p.getTarget() <= c.end()) return true; else if (p.getSource() <= c.end() && p.getTarget() >= c.end()) return true; return false; }
for (CorefChain.CorefMention mention: mentions) { pw.print(mentionIndices.getSource() + " " + mentionIndices.getTarget()); if(mention == cluster.getRepresentativeMention()) pw.print(" " + 1); else pw.print(" " + 0);
for (IntPair p:charOffsets) { int beginRelCharOffset = charOffsetIsRelative? p.getSource():p.getSource()-annoCharBegin; int endRelCharOffset = charOffsetIsRelative? p.getTarget():p.getTarget()-annoCharBegin; int beginCharOffset = beginRelCharOffset + annoCharBegin; int endCharOffset = endRelCharOffset + annoCharBegin;
} else { if (tokenSpan != null && tokenSpan.getTarget() >= word.index()) { word.setOriginalText(originalToken); word.set(CoreAnnotations.CoNLLUTokenSpanAnnotation.class, tokenSpan);
public List<CoreLabel> segmentStringToTokenList(String line) { List<CoreLabel> tokenList = CollectionUtils.makeList(); List<CoreLabel> labeledSequence = segmentStringToIOB(line); for (IntPair span : IOBUtils.TokenSpansForIOB(labeledSequence)) { CoreLabel token = new CoreLabel(); String text = IOBUtils.IOBToString(labeledSequence, prefixMarker, suffixMarker, span.getSource(), span.getTarget()); token.setWord(text); token.setValue(text); token.set(CoreAnnotations.TextAnnotation.class, text); token.set(CoreAnnotations.ArabicSegAnnotation.class, "1"); int start = labeledSequence.get(span.getSource()).beginPosition(); int end = labeledSequence.get(span.getTarget() - 1).endPosition(); token.setOriginalText(line.substring(start, end)); token.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, start); token.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, end); tokenList.add(token); } return tokenList; }
for (IntPair offsets:chunkCharOffsets) { int offsetBegin = offsets.getSource(); int offsetEnd = offsets.getTarget();
assert(token.endPosition() >= 0); int offsetBegin = offsets.getSource(); int offsetEnd = offsets.getTarget();
IntPair tokenSpan = token.get(CoreAnnotations.CoNLLUTokenSpanAnnotation.class); if (tokenSpan.getSource() == token.index()) { String range = String.format("%d-%d", tokenSpan.getSource(), tokenSpan.getTarget()); sb.append(String.format("%s\t%s\t_\t_\t_\t_\t_\t_\t_\t_%n", range, token.originalText()));
if(subTree.isPhrasal()) { IntPair span = subTree.getSpan(); Constituent c = cf.newConstituent(span.getSource(), span.getTarget(), subTree.label(), 0.0); if(t1Labels.contains(c)) { t1Labels.remove(c);
if (keySet.contains(SpanAnnotation.class)) { IntPair span = getAndRegister(coreLabel, keysToSerialize, SpanAnnotation.class); builder.setSpan(CoreNLPProtos.Span.newBuilder().setBegin(span.getSource()).setEnd(span.getTarget()).build()); if (keySet.contains(CoNLLUTokenSpanAnnotation.class)) { IntPair span = getAndRegister(coreLabel, keysToSerialize, CoNLLUTokenSpanAnnotation.class); builder.setConllUTokenSpan(CoreNLPProtos.Span.newBuilder().setBegin(span.getSource()).setEnd(span.getTarget()).build());
nerChunks.add(nerChunk); Tree t = getTreeNonTerminal(tree, startToken, endToken, true); if (t.getSpan().getSource() == startToken && t.getSpan().getTarget() == endToken) { nerChunk.set(TreeCoreAnnotations.TreeAnnotation.class, t); if (options.annotateTreeNer) {
nerChunks.add(nerChunk); Tree t = getTreeNonTerminal(tree, startToken, endToken, true); if (t.getSpan().getSource() == startToken && t.getSpan().getTarget() == endToken) { nerChunk.set(TreeCoreAnnotations.TreeAnnotation.class, t); if (options.annotateTreeNer) {
/** * Create a ParseTree proto from a Tree. If the Tree is a scored tree, the scores will * be preserved. * @param parseTree The parse tree to convert. * @return A protocol buffer message corresponding to this tree. */ public CoreNLPProtos.ParseTree toProto(Tree parseTree) { CoreNLPProtos.ParseTree.Builder builder = CoreNLPProtos.ParseTree.newBuilder(); // Required fields for (Tree child : parseTree.children()) { builder.addChild(toProto(child)); } // Optional fields IntPair span = parseTree.getSpan(); if (span != null) { builder.setYieldBeginIndex(span.getSource()); builder.setYieldEndIndex(span.getTarget()); } if (parseTree.label() != null) { builder.setValue(parseTree.label().value()); } if (!Double.isNaN(parseTree.score())) { builder.setScore(parseTree.score()); } Integer sentiment; if (parseTree.label() instanceof CoreMap && (sentiment = ((CoreMap) parseTree.label()).get(RNNCoreAnnotations.PredictedClass.class)) != null) { builder.setSentiment(CoreNLPProtos.Sentiment.forNumber(sentiment)); } // Return return builder.build(); }
private static boolean doesOverlap(Constituent c, IntPair p) { if (p.getSource() <= c.start() && p.getTarget() >= c.start()) return true; else if (p.getSource() >= c.start() && p.getTarget() <= c.end()) return true; else if (p.getSource() <= c.end() && p.getTarget() >= c.end()) return true; return false; }