Refine search
/** * Returns true if the specified span is contained by this span. Identical * spans are considered to contain each other. * * @param s The span to compare with this span. * * @return true is the specified span is contained by this span; false otherwise. */ public boolean contains(Span s) { return start <= s.getStart() && s.getEnd() <= end; }
/** * Returns true if the specified span intersects with this span. * * @param s The span to compare with this span. * * @return true is the spans overlap; false otherwise. */ public boolean intersects(Span s) { int sstart = s.getStart(); //either s's start is in this or this' start is in s return this.contains(s) || s.contains(this) || getStart() <= sstart && sstart < getEnd() || sstart <= getStart() && getStart() < s.getEnd(); }
public static String[] spansToStrings(Span[] spans, String[] tokens) { String[] chunks = new String[spans.length]; StringBuilder cb = new StringBuilder(); for (int si = 0, sl = spans.length; si < sl; si++) { cb.setLength(0); for (int ti = spans[si].getStart(); ti < spans[si].getEnd(); ti++) { cb.append(tokens[ti]).append(" "); } chunks[si] = cb.substring(0, cb.length() - 1); } return chunks; }
/** * Returns true is the specified span crosses this span. * * @param s The span to compare with this span. * * @return true is the specified span overlaps this span and contains a * non-overlapping section; false otherwise. */ public boolean crosses(Span s) { int sstart = s.getStart(); //either s's start is in this or this' start is in s return !this.contains(s) && !s.contains(this) && (getStart() <= sstart && sstart < getEnd() || sstart <= getStart() && getStart() < s.getEnd()); }
/** * Retrieves the string covered by the current span of the specified text. * * @param text * * @return the substring covered by the current span */ public CharSequence getCoveredText(CharSequence text) { if (getEnd() > text.length()) { throw new IllegalArgumentException("The span " + toString() + " is outside the given text which has length " + text.length() + "!"); } return text.subSequence(getStart(), getEnd()); }
/** * Checks if the specified span is equal to the current span. */ @Override public boolean equals(Object o) { if (o == this) { return true; } if (o instanceof Span) { Span s = (Span) o; return getStart() == s.getStart() && getEnd() == s.getEnd() && Objects.equals(getType(), s.getType()); } return false; }
/** * Initializes the current instance. * * @param text the text which contains the tokens. * @param tokenSpans the spans which mark the begin and end of the tokens. */ public TokenSample(String text, Span[] tokenSpans) { Objects.requireNonNull(tokenSpans, "tokenSpans must not be null"); this.text = Objects.requireNonNull(text, "text must not be null"); this.tokenSpans = Collections.unmodifiableList(new ArrayList<>(Arrays.asList(tokenSpans))); for (Span tokenSpan : tokenSpans) { if (tokenSpan.getStart() < 0 || tokenSpan.getStart() > text.length() || tokenSpan.getEnd() > text.length() || tokenSpan.getEnd() < 0) { throw new IllegalArgumentException("Span " + tokenSpan.toString() + " is out of bounds, text length: " + text.length() + "!"); } } }
/** * Generates a human readable string. */ @Override public String toString() { StringBuilder toStringBuffer = new StringBuilder(15); toStringBuffer.append("["); toStringBuffer.append(getStart()); toStringBuffer.append(".."); toStringBuffer.append(getEnd()); toStringBuffer.append(")"); if (getType() != null) { toStringBuffer.append(" "); toStringBuffer.append(getType()); } return toStringBuffer.toString(); }
@Override public String toString() { StringBuilder sentence = new StringBuilder(); int lastEndIndex = -1; for (Span token : tokenSpans) { if (lastEndIndex != -1) { // If there are no chars between last token // and this token insert the separator chars // otherwise insert a space String separator; if (lastEndIndex == token.getStart()) separator = separatorChars; else separator = " "; sentence.append(separator); } sentence.append(token.getCoveredText(text)); lastEndIndex = token.getEnd(); } return sentence.toString(); }
/** * Return a copy of this span with leading and trailing white spaces removed. * * @param text * * @return the trimmed span or the same object if already trimmed */ public Span trim(CharSequence text) { int newStartOffset = getStart(); for (int i = getStart(); i < getEnd() && StringUtil.isWhitespace(text.charAt(i)); i++) { newStartOffset++; } int newEndOffset = getEnd(); for (int i = getEnd(); i > getStart() && StringUtil.isWhitespace(text.charAt(i - 1)); i--) { newEndOffset--; } if (newStartOffset == getStart() && newEndOffset == getEnd()) { return this; } else if (newStartOffset > newEndOffset) { return new Span(getStart(), getStart(), getType()); } else { return new Span(newStartOffset, newEndOffset, getType()); } }
public String[] encode(Span[] names, int length) { String[] outcomes = new String[length]; for (int i = 0; i < outcomes.length; i++) { outcomes[i] = BioCodec.OTHER; } for (Span name : names) { if (name.getType() == null) { outcomes[name.getStart()] = "default" + "-" + BioCodec.START; } else { outcomes[name.getStart()] = name.getType() + "-" + BioCodec.START; } // now iterate from begin + 1 till end for (int i = name.getStart() + 1; i < name.getEnd(); i++) { if (name.getType() == null) { outcomes[i] = "default" + "-" + BioCodec.CONTINUE; } else { outcomes[i] = name.getType() + "-" + BioCodec.CONTINUE; } } } return outcomes; }
@Override BratAnnotation parse(Span[] tokens, CharSequence line) throws IOException { Span noteSpan = new Span( tokens[START_VALUE_OFFSET].getStart(), tokens[tokens.length - 1].getEnd() ); return new AnnotatorNoteAnnotation(tokens[ID_OFFSET].getCoveredText(line).toString(), tokens[ATTACH_TO_OFFSET].getCoveredText(line).toString(), noteSpan.getCoveredText(line).toString()); } }
/** * Returns an array of probabilities for each of the specified spans which is * the arithmetic mean of the probabilities for each of the outcomes which * make up the span. * * @param spans The spans of the names for which probabilities are desired. * * @return an array of probabilities for each of the specified spans. */ public double[] probs(Span[] spans) { double[] sprobs = new double[spans.length]; double[] probs = bestSequence.getProbs(); for (int si = 0; si < spans.length; si++) { double p = 0; for (int oi = spans[si].getStart(); oi < spans[si].getEnd(); oi++) { p += probs[oi]; } p /= spans[si].length(); sprobs[si] = p; } return sprobs; }
@Override public SentenceSample read() throws IOException { SentenceSample sample = samples.read(); if (sample != null) { List<String> sentenceTexts = new ArrayList<>(); for (Span sentenceSpan : sample.getSentences()) { sentenceTexts.add(sample.getDocument().substring(sentenceSpan.getStart(), sentenceSpan.getEnd())); } StringBuilder documentText = new StringBuilder(); List<Span> newSentenceSpans = new ArrayList<>(); for (String sentenceText : sentenceTexts) { String[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(sentenceText); int begin = documentText.length(); documentText.append(detokenizer.detokenize(tokens, null)); newSentenceSpans.add(new Span(begin, documentText.length())); documentText.append(' '); } return new SentenceSample(documentText, newSentenceSpans.toArray(new Span[newSentenceSpans.size()])); } return null; } }