private static void printCharSpans(View view, String file) throws FileNotFoundException { PrintStream out = new PrintStream(file); for (Constituent c : view.getConstituents()) out.println(c.getStartCharOffset() + "," + c.getEndCharOffset()); out.close(); } }
private static void printCharSpans(View view, String file) throws FileNotFoundException { PrintStream out = new PrintStream(file); for (Constituent c : view.getConstituents()) out.println(c.getStartCharOffset() + "," + c.getEndCharOffset()); out.close(); } }
@Override public String getText() { int start = sentenceConstituent.getStartCharOffset(); int end = sentenceConstituent.getEndCharOffset(); return textAnnotation.getText().substring(start, end); }
@Override public String getText() { int start = sentenceConstituent.getStartCharOffset(); int end = sentenceConstituent.getEndCharOffset(); return textAnnotation.getText().substring(start, end); }
protected String getNERString() { List<Constituent> constituents = new ArrayList<>(view.getConstituents()); Collections.sort(constituents, TextAnnotationUtilities.constituentStartComparator); StringBuilder sb = new StringBuilder(); String text = textAnnotation.getText(); int where = 0; for (Constituent c : constituents) { int start = c.getStartCharOffset(); String startstring = text.substring(where, start); sb.append(startstring).append("[").append(c.getLabel()).append(" ") .append(c.getTokenizedSurfaceForm()).append(" ] "); where = c.getEndCharOffset(); } return sb.toString(); } }
protected String getNERString() { List<Constituent> constituents = new ArrayList<>(view.getConstituents()); Collections.sort(constituents, TextAnnotationUtilities.constituentStartComparator); StringBuilder sb = new StringBuilder(); String text = textAnnotation.getText(); int where = 0; for (Constituent c : constituents) { int start = c.getStartCharOffset(); String startstring = text.substring(where, start); sb.append(startstring).append("[").append(c.getLabel()).append(" ") .append(c.getTokenizedSurfaceForm()).append(" ] "); where = c.getEndCharOffset(); } return sb.toString(); } }
protected String getNERString() { List<Constituent> constituents = new ArrayList<>(view.getConstituents()); Collections.sort(constituents, TextAnnotationUtilities.constituentStartComparator); StringBuilder sb = new StringBuilder(); String text = textAnnotation.getText(); int where = 0; for (Constituent c : constituents) { int start = c.getStartCharOffset(); String startstring = text.substring(where, start); sb.append(startstring).append("[").append(c.getLabel()).append(" ") .append(c.getTokenizedSurfaceForm()).append(" ] "); where = c.getEndCharOffset(); } return sb.toString(); } }
/** * Render a string representing the original data with embedded labels in the text. */ private String renderString(View labels, TextAnnotation ta) { List<Constituent> constituents = new ArrayList<>(labels.getConstituents()); Collections.sort(constituents, TextAnnotationUtilities.constituentStartComparator); StringBuilder sb = new StringBuilder(); String text = ta.getText(); int where = 0; for (Constituent c : constituents) { int start = c.getStartCharOffset(); String startstring = text.substring(where, start); sb.append(startstring).append("[").append(c.getLabel()).append(" ") .append(c.getTokenizedSurfaceForm()).append(" ] "); where = c.getEndCharOffset(); } return sb.toString(); }
/** * Render a string representing the original data with embedded labels in the text. */ private String renderString(View labels, TextAnnotation ta) { List<Constituent> constituents = new ArrayList<>(labels.getConstituents()); Collections.sort(constituents, TextAnnotationUtilities.constituentStartComparator); StringBuilder sb = new StringBuilder(); String text = ta.getText(); int where = 0; for (Constituent c : constituents) { int start = c.getStartCharOffset(); String startstring = text.substring(where, start); sb.append(startstring).append("[").append(c.getLabel()).append(" ") .append(c.getTokenizedSurfaceForm()).append(" ] "); where = c.getEndCharOffset(); } return sb.toString(); }
/** * Render a string representing the original data with embedded labels in the text. */ private String renderString(View labels, TextAnnotation ta) { List<Constituent> constituents = new ArrayList<>(labels.getConstituents()); Collections.sort(constituents, TextAnnotationUtilities.constituentStartComparator); StringBuilder sb = new StringBuilder(); String text = ta.getText(); int where = 0; for (Constituent c : constituents) { int start = c.getStartCharOffset(); String startstring = text.substring(where, start); sb.append(startstring).append("[").append(c.getLabel()).append(" ") .append(c.getTokenizedSurfaceForm()).append(" ] "); where = c.getEndCharOffset(); } return sb.toString(); }
private static IntPair[] getCharacterOffsets(View view) { List<Constituent> constituents = view.getConstituents(); IntPair[] offsets = new IntPair[constituents.size()]; int index = 0; for (Constituent c : constituents) offsets[index++] = new IntPair(c.getStartCharOffset(), c.getEndCharOffset()); return offsets; } }
/** * assumption: each constituent has unique character offsets * @param view * @param goldCharOffsets * @return */ private static int countCorrectSpans(View view, IntPair[] goldCharOffsets) { List<Constituent> constituents = view.getConstituents(); Set<IntPair> predOffsets = new HashSet<>(); for (Constituent c : constituents) predOffsets.add(new IntPair(c.getStartCharOffset(), c.getEndCharOffset())); int numCorrect = 0; for (IntPair goldSpan : goldCharOffsets) if (predOffsets.contains(goldSpan)) numCorrect++; return numCorrect; }
private static IntPair[] getCharacterOffsets(View view) { List<Constituent> constituents = view.getConstituents(); IntPair[] offsets = new IntPair[constituents.size()]; int index = 0; for (Constituent c : constituents) offsets[index++] = new IntPair(c.getStartCharOffset(), c.getEndCharOffset()); return offsets; } }
/** * assumption: each constituent has unique character offsets * @param view * @param goldCharOffsets * @return */ private static int countCorrectSpans(View view, IntPair[] goldCharOffsets) { List<Constituent> constituents = view.getConstituents(); Set<IntPair> predOffsets = new HashSet<>(); for (Constituent c : constituents) predOffsets.add(new IntPair(c.getStartCharOffset(), c.getEndCharOffset())); int numCorrect = 0; for (IntPair goldSpan : goldCharOffsets) if (predOffsets.contains(goldSpan)) numCorrect++; return numCorrect; }
/** * get the start and end offsets of all constituents and store them * note that these are based on the cleaned-up text, so need to be mapped back * to the original text. * * @param tokens SpanLabelView containing Token info (from TextAnnotation) */ protected void compileOffsets(SpanLabelView tokens) { List<Constituent> constituents = tokens.getConstituents(); int n = constituents.size(); starts = new int[n]; ends = new int[n]; int i = 0; for (Constituent cons : tokens.getConstituents()) { starts[i] = cons.getStartCharOffset(); ends[i] = cons.getEndCharOffset(); i++; } }
private void buildIndexFromRealCoref () { CoreferenceView corefView = (CoreferenceView)globalTA.getTa().getView(ViewNames.COREF); for(Constituent source : corefView.getConstituents()) { Constituent target = corefView.getCanonicalEntity(source); // String mention = source.getSurfaceString(); int start = source.getStartCharOffset(); int end = source.getEndCharOffset(); // String coreMention = target.getSurfaceString(); int corefStart = target.getStartCharOffset(); int corefEnd = target.getEndCharOffset(); predCorefStartMap.put(start, new Pair<Integer, Integer>(corefStart, corefEnd)); predCorefEndMap.put(end, new Pair<Integer, Integer>(corefStart, corefEnd)); if (isDebug) { String checkCoreference = contentRemovingTags.substring(corefStart, corefEnd); String checkMention = contentRemovingTags.substring(start, end); System.out.println ("[Mention] " + checkMention + " [Coref org] " + checkCoreference); } } }
/** * get the start and end offsets of all constituents and store them * note that these are based on the cleaned-up text, so need to be mapped back * to the original text. * * @param tokens SpanLabelView containing Token info (from TextAnnotation) */ protected void compileOffsets(SpanLabelView tokens) { List<Constituent> constituents = tokens.getConstituents(); int n = constituents.size(); starts = new int[n]; ends = new int[n]; int i = 0; for (Constituent cons : tokens.getConstituents()) { starts[i] = cons.getStartCharOffset(); ends[i] = cons.getEndCharOffset(); i++; } }
private void buildIndexFromRealCoref () { CoreferenceView corefView = (CoreferenceView)globalTA.getTa().getView(ViewNames.COREF); for(Constituent source : corefView.getConstituents()) { Constituent target = corefView.getCanonicalEntity(source); // String mention = source.getSurfaceString(); int start = source.getStartCharOffset(); int end = source.getEndCharOffset(); // String coreMention = target.getSurfaceString(); int corefStart = target.getStartCharOffset(); int corefEnd = target.getEndCharOffset(); predCorefStartMap.put(start, new Pair<Integer, Integer>(corefStart, corefEnd)); predCorefEndMap.put(end, new Pair<Integer, Integer>(corefStart, corefEnd)); if (isDebug) { String checkCoreference = contentRemovingTags.substring(corefStart, corefEnd); String checkMention = contentRemovingTags.substring(start, end); System.out.println ("[Mention] " + checkMention + " [Coref org] " + checkCoreference); } } }
/** Helper methods to handle Protobuf implementation */ private static void writeTokenOffsets(TextAnnotation ta, TextAnnotationProto.Builder taBuilder) { for (Constituent c : ta.getView(ViewNames.TOKENS).getConstituents()) { TokenOffsetsProto.Builder tokenOffsetBuilder = TokenOffsetsProto.newBuilder(); tokenOffsetBuilder.setForm(c.getSurfaceForm()); tokenOffsetBuilder.setStartCharOffset(c.getStartCharOffset()); tokenOffsetBuilder.setEndCharOffset(c.getEndCharOffset()); taBuilder.addTokenOffsets(tokenOffsetBuilder); } }
/** Helper methods to handle Protobuf implementation */ private static void writeTokenOffsets(TextAnnotation ta, TextAnnotationProto.Builder taBuilder) { for (Constituent c : ta.getView(ViewNames.TOKENS).getConstituents()) { TokenOffsetsProto.Builder tokenOffsetBuilder = TokenOffsetsProto.newBuilder(); tokenOffsetBuilder.setForm(c.getSurfaceForm()); tokenOffsetBuilder.setStartCharOffset(c.getStartCharOffset()); tokenOffsetBuilder.setEndCharOffset(c.getEndCharOffset()); taBuilder.addTokenOffsets(tokenOffsetBuilder); } }