/** * Process each sentence individually. This potentially allows for failure at an individual * sentence level, without failing for the whole text. THIS REQUIRES THAT ALL RELATIONS ARE * INTRA-SENTENCE. Any that are *not* will be omitted for the sentence-level processing. * * @param annotator Annotator to apply * @param textAnnotation TextAnnotation to augment * @return */ public void processBySentence(Annotator annotator, TextAnnotation textAnnotation) { logger.debug("in processBySentence()..."); for (int sentenceId = 0; sentenceId < textAnnotation.sentences().size(); ++sentenceId) { TextAnnotation sentTa = TextAnnotationUtilities.getSubTextAnnotation(textAnnotation, sentenceId); try { annotator.getView(sentTa); int start = textAnnotation.getSentence(sentenceId).getStartSpan(); int end = textAnnotation.getSentence(sentenceId).getEndSpan(); TextAnnotationUtilities.copyViewFromTo(annotator.getViewName(), sentTa, textAnnotation, start, end, start); } catch (AnnotatorException e) { e.printStackTrace(); } } return; }
/** * required: consMap *must* contain the source and target constituents for r as keys, and their values * must be non-null * @param r relation to copy * @param consMap map from original constituents to new counterparts * @return new relation with all info copied from original, but with new source and target constituents */ public static Relation copyRelation(Relation r, Map<Constituent, Constituent> consMap) { Relation newRel = null; if ( null == r.getLabelsToScores() ) newRel = new Relation(r.getRelationName(), consMap.get(r.getSource()), consMap.get(r.getTarget()), r.getScore()); else newRel = new Relation(r.getLabelsToScores(), consMap.get(r.getSource()), consMap.get(r.getTarget())); copyAttributesFromTo(r, newRel); return newRel; }
tokensPairs.toArray(new IntPair[tokenSize]), tokens.toArray(new String[tokenSize]), new int[]{tokenSize}); copyViewsFromTo(ta, newTA, start, end, -start); return newTA;
Constituent newC = copyConstituentWithNewTokenOffsets(newTA, c, offset); consMap.put(c, newC); newVu.addConstituent(newC); continue; Relation newR = copyRelation(r, consMap); newVu.addRelation(newR);
edu.illinois.cs.cogcomp.core.datastructures.textannotation.View view; Record record = addRecordViewFromCurator(ta.getText(), TextAnnotationUtilities.getSentenceList(ta), viewName); ViewTypes viewType = ViewNames.getViewType(viewName); Forest depForest = record.getParseViews().get(convertCuratorViewName(viewName)); if (depForest.trees.size() > TextAnnotationUtilities.getSentenceList(ta).size()) throw new AnnotationFailedException("mismatched number of trees and sentences."); if (parseForest.trees.size() > TextAnnotationUtilities.getSentenceList(ta).size()) throw new AnnotationFailedException("mismatched number of trees and sentences.");
/** * copy views from the relevant span from ta to newTA. If ta is smaller than newTA, map all constituents, * changing offsets according to the value 'offset'. * Otherwise, only map those constituents within the span sourceStartTokenIndex, sourceEndTokenIndex to newTA. * * @param ta * @param newTA * @param sourceStartTokenIndex * @param sourceEndTokenIndex * @param offset */ public static void copyViewsFromTo(TextAnnotation ta, TextAnnotation newTA, int sourceStartTokenIndex, int sourceEndTokenIndex, int offset) { for (String vuName : ta.getAvailableViews()) { if (ViewNames.TOKENS.equals(vuName) || ViewNames.SENTENCE.equals(vuName)) continue; copyViewFromTo(vuName, ta, newTA, sourceStartTokenIndex, sourceEndTokenIndex, offset); } }
static public void printTextAnnotation(PrintStream out, TextAnnotation ta) { out.println("TextAnnotation with id: " + ta.getId()); String rawText = ta.getText(); out.println("Raw Text: " + rawText); out.println(getLineFill()); out.println("TextAnnotation Views:"); for (String name : ta.getAvailableViews()) { out.println("View Name: " + name); out.println(ta.getView(name).toString()); out.println(getLineFill()); } }
sb.append(TextAnnotationUtilities.getTokenSequence(ta, 0, end)); for (Constituent c : mentions) { sb.append(TextAnnotationUtilities.getTokenSequence(ta, end, c.getStartSpan())); sb.append(TextAnnotationUtilities.getTokenSequence(ta, end, ta.size())); return sb.toString();
RecordUtils.printRecord(new PrintStream(outFile), outputRecord); else TextAnnotationUtilities.printTextAnnotation(new PrintStream(outFile), outputTextAnnotation);
Constituent newC = copyConstituentWithNewTokenOffsets(newTA, c, 0); consMap.put(c, newC); newVu.addConstituent(newC); continue; Relation newR = copyRelation(r, consMap); newVu.addRelation(newR);
edu.illinois.cs.cogcomp.core.datastructures.textannotation.View view; Record record = addRecordViewFromCurator(ta.getText(), TextAnnotationUtilities.getSentenceList(ta), viewName); ViewTypes viewType = ViewNames.getViewType(viewName); Forest depForest = record.getParseViews().get(convertCuratorViewName(viewName)); if (depForest.trees.size() > TextAnnotationUtilities.getSentenceList(ta).size()) throw new AnnotationFailedException("mismatched number of trees and sentences."); if (parseForest.trees.size() > TextAnnotationUtilities.getSentenceList(ta).size()) throw new AnnotationFailedException("mismatched number of trees and sentences.");
/** * copy views from the relevant span from ta to newTA. If ta is smaller than newTA, map all constituents, * changing offsets according to the value 'offset'. * Otherwise, only map those constituents within the span sourceStartTokenIndex, sourceEndTokenIndex to newTA. * * @param ta * @param newTA * @param sourceStartTokenIndex * @param sourceEndTokenIndex * @param offset */ public static void copyViewsFromTo(TextAnnotation ta, TextAnnotation newTA, int sourceStartTokenIndex, int sourceEndTokenIndex, int offset) { for (String vuName : ta.getAvailableViews()) { if (ViewNames.TOKENS.equals(vuName) || ViewNames.SENTENCE.equals(vuName)) continue; copyViewFromTo(vuName, ta, newTA, sourceStartTokenIndex, sourceEndTokenIndex, offset); } }
static public void printTextAnnotation(PrintStream out, TextAnnotation ta) { out.println("TextAnnotation with id: " + ta.getId()); String rawText = ta.getText(); out.println("Raw Text: " + rawText); out.println(getLineFill()); out.println("TextAnnotation Views:"); for (String name : ta.getAvailableViews()) { out.println("View Name: " + name); out.println(ta.getView(name).toString()); out.println(getLineFill()); } }
/** * Process each sentence individually. This potentially allows for failure at an individual * sentence level, without failing for the whole text. THIS REQUIRES THAT ALL RELATIONS ARE * INTRA-SENTENCE. Any that are *not* will be omitted for the sentence-level processing. * * @param annotator Annotator to apply * @param textAnnotation TextAnnotation to augment * @return */ public void processBySentence(Annotator annotator, TextAnnotation textAnnotation) { logger.debug("in processBySentence()..."); for (int sentenceId = 0; sentenceId < textAnnotation.sentences().size(); ++sentenceId) { TextAnnotation sentTa = TextAnnotationUtilities.getSubTextAnnotation(textAnnotation, sentenceId); try { annotator.getView(sentTa); int start = textAnnotation.getSentence(sentenceId).getStartSpan(); int end = textAnnotation.getSentence(sentenceId).getEndSpan(); TextAnnotationUtilities.copyViewFromTo(annotator.getViewName(), sentTa, textAnnotation, start, end, start); } catch (AnnotatorException e) { e.printStackTrace(); } } return; }
Constituent newC = copyConstituentWithNewTokenOffsets(newTA, c, offset); consMap.add(new Pair<>(c, newC)); newVu.addConstituent(newC, true); Relation newR = copyRelation(r, consMap); newVu.addRelation(newR);
public edu.illinois.cs.cogcomp.core.datastructures.textannotation.View getTextAnnotationView(TextAnnotation ta, String viewName) throws TException, AnnotationFailedException, ServiceUnavailableException, SocketException { edu.illinois.cs.cogcomp.core.datastructures.textannotation.View view; Record record = addRecordViewFromCurator(ta.getText(), TextAnnotationUtilities.getSentenceList(ta), viewName); ViewTypes viewType = ViewNames.getViewType(viewName); if (viewType == ViewTypes.TOKEN_LABEL_VIEW) {
/** * required: consMap *must* contain the source and target constituents for r as keys, and their values * must be non-null * @param r relation to copy * @param consMap map from original constituents to new counterparts * @return new relation with all info copied from original, but with new source and target constituents */ public static Relation copyRelation(Relation r, Map<Constituent, Constituent> consMap) { Relation newRel = null; if ( null == r.getLabelsToScores() ) newRel = new Relation(r.getRelationName(), consMap.get(r.getSource()), consMap.get(r.getTarget()), r.getScore()); else newRel = new Relation(r.getLabelsToScores(), consMap.get(r.getSource()), consMap.get(r.getTarget())); copyAttributesFromTo(r, newRel); return newRel; }
tokensPairs.toArray(new IntPair[tokenSize]), tokens.toArray(new String[tokenSize]), new int[]{tokenSize}); copyViewsFromTo(ta, newTA, start, end, -start); return newTA;
Constituent newC = copyConstituentWithNewTokenOffsets(newTA, c, 0); consMap.add(new Pair<>(c, newC)); newVu.addConstituent(newC, true); Relation newR = copyRelation(r, consMap); newVu.addRelation(newR);
/** * create a new constituent with token offsets shifted by the specified amount * @param newTA TextAnnotation which will contain the new Constituent * @param c original Constituent to copy * @param offset the offset to shift token indexes of new Constituent. Can be negative. * @return the new Constituent */ public static Constituent copyConstituentWithNewTokenOffsets(TextAnnotation newTA, Constituent c, int offset) { int newStart = c.getStartSpan() + offset; int newEnd = c.getEndSpan() + offset; assert(newStart >= 0 && newStart <= newTA.size()); assert(newEnd >= 0 && newEnd <= newTA.size()); Constituent newCon = null; if (null != c.getLabelsToScores()) newCon = new Constituent(c.getLabelsToScores(), c.viewName, newTA, newStart, newEnd); else newCon = new Constituent(c.getLabel(), c.getConstituentScore(), c.viewName, newTA, newStart, newEnd); copyAttributesFromTo(c, newCon); return newCon; }