public static List<TextAnnotation> getTAs(List<String> scope){ List<TextAnnotation> tas = new ArrayList<>(); if (scope.contains("ACE")) { ACEReader aceReader = null; try { aceReader = new ACEReader("data/all", false); } catch (Exception e) { e.printStackTrace(); } for (TextAnnotation ta : aceReader) { tas.add(ta); } } if (scope.contains("ERE")) { EREMentionRelationReader ereMentionRelationReader = null; try { ereMentionRelationReader = new EREMentionRelationReader(EREDocumentReader.EreCorpus.ENR3, "data/ere/data", false); } catch (Exception e) { e.printStackTrace(); } for (XmlTextAnnotation xta : ereMentionRelationReader) { tas.add(xta.getTextAnnotation()); } } return tas; } public Object next(){
this.offsetToSpanInfo.clear(); this.offsetToSpanInfo = XmlDocumentProcessor.compileOffsetSpanMapping(sourceTa.getXmlMarkup()); TextAnnotation ta = sourceTa.getTextAnnotation(); SpanLabelView tokens = (SpanLabelView) ta.getView(ViewNames.TOKENS); compileOffsets(tokens); sourceTa.getTextAnnotation().addView(getMentionViewName(), nerView); AnnotationFixer.rationalizeBoundaryAnnotations(sourceTa.getTextAnnotation(), getCorefViewName()); } else AnnotationFixer.rationalizeBoundaryAnnotations(sourceTa.getTextAnnotation(), getCorefViewName());
TextAnnotation ta = xta.getTextAnnotation(); List<SpanInfo> fudge = xta.getXmlMarkup(); Pair<String, IntPair> neLabelPair = si.attributes.get("type"); String neLabel = neLabelPair.getFirst(); int cleanTextCharStart = xta.getXmlSt().computeModifiedOffsetFromOriginal(charOffsets.getFirst()); int cleanTextCharEnd = xta.getXmlSt().computeModifiedOffsetFromOriginal(charOffsets.getSecond()); int cleanTextNeTokStart = ta.getTokenIdFromCharacterOffset(cleanTextCharStart); int cleanTextNeTokEnd = ta.getTokenIdFromCharacterOffset(cleanTextCharEnd-1); // StringTransformation returns one-past-the-end index; TextAnnotation maps at-the-end index
StringTransformation st = xmlTa.getXmlSt(); TextAnnotation ta = xmlTa.getTextAnnotation(); String rawText = ta.getText(); String rawStr = rawText.substring(adjStart, adjEnd);
/** * A method for creating * {@link TextAnnotation} by * tokenizing the given text string. * * @param xmlText Raw xml text from corpus document * @param corpusId corpus identifier * @param docId text identifier * @return an XmlTextAnnotation with the cleaned text (StringTransformation), TextAnnotation for * the cleaned text, and xml markup extracted from source */ public XmlTextAnnotation createTextAnnotation(String xmlText, String corpusId, String docId) { logger.debug("processing text from document {}", docId); Pair<StringTransformation, List<XmlDocumentProcessor.SpanInfo>> cleanResults = xmlProcessor.processXml(xmlText); TextAnnotation ta = taBuilder.createTextAnnotation(corpusId, docId, cleanResults.getFirst().getTransformedText()); return new XmlTextAnnotation(cleanResults.getFirst(), ta, cleanResults.getSecond()); }
String xmlStr = xmlTa.getXmlSt().getOrigText(); int fillerWindowMin = Math.max(offset - 100, 0); int fillerWindowMax = Math.min(offset + 100, xmlStr.length());
TextAnnotation ta = xta.getTextAnnotation(); List<SpanInfo> fudge = xta.getXmlMarkup(); Pair<String, IntPair> neLabelPair = si.attributes.get("type"); String neLabel = neLabelPair.getFirst(); int cleanTextCharStart = xta.getXmlSt().computeModifiedOffsetFromOriginal(charOffsets.getFirst()); int cleanTextCharEnd = xta.getXmlSt().computeModifiedOffsetFromOriginal(charOffsets.getSecond()); int cleanTextNeTokStart = ta.getTokenIdFromCharacterOffset(cleanTextCharStart); int cleanTextNeTokEnd = ta.getTokenIdFromCharacterOffset(cleanTextCharEnd-1); // StringTransformation returns one-past-the-end index; TextAnnotation maps at-the-end index
StringTransformation st = xmlTa.getXmlSt(); TextAnnotation ta = xmlTa.getTextAnnotation(); String rawText = ta.getText(); String rawStr = rawText.substring(adjStart, adjEnd);
/** * A method for creating * {@link TextAnnotation} by * tokenizing the given text string. * * @param xmlText Raw xml text from corpus document * @param corpusId corpus identifier * @param docId text identifier * @return an XmlTextAnnotation with the cleaned text (StringTransformation), TextAnnotation for * the cleaned text, and xml markup extracted from source */ public XmlTextAnnotation createTextAnnotation(String xmlText, String corpusId, String docId) { logger.debug("processing text from document {}", docId); Pair<StringTransformation, List<XmlDocumentProcessor.SpanInfo>> cleanResults = xmlProcessor.processXml(xmlText); TextAnnotation ta = taBuilder.createTextAnnotation(corpusId, docId, cleanResults.getFirst().getTransformedText()); return new XmlTextAnnotation(cleanResults.getFirst(), ta, cleanResults.getSecond()); }
String xmlStr = xmlTa.getXmlSt().getOrigText(); int fillerWindowMin = Math.max(offset - 100, 0); int fillerWindowMax = Math.min(offset + 100, xmlStr.length());
public static List<TextAnnotation> getTAs(List<String> scope){ List<TextAnnotation> tas = new ArrayList<>(); if (scope.contains("ACE")) { ACEReader aceReader = null; try { aceReader = new ACEReader("data/all", false); } catch (Exception e) { e.printStackTrace(); } for (TextAnnotation ta : aceReader) { tas.add(ta); } } if (scope.contains("ERE")) { EREMentionRelationReader ereMentionRelationReader = null; try { ereMentionRelationReader = new EREMentionRelationReader(EREDocumentReader.EreCorpus.ENR3, "data/ere/data", false); } catch (Exception e) { e.printStackTrace(); } for (XmlTextAnnotation xta : ereMentionRelationReader) { tas.add(xta.getTextAnnotation()); } } return tas; } public Object next(){
TextAnnotation ta = xta.getTextAnnotation(); List<SpanInfo> fudge = xta.getXmlMarkup(); int cleanTextCharStart = xta.getXmlSt().computeModifiedOffsetFromOriginal(charOffsets.getFirst()); int cleanTextCharEnd = xta.getXmlSt().computeModifiedOffsetFromOriginal(charOffsets.getSecond()); int cleanTextNeTokStart = ta.getTokenIdFromCharacterOffset(cleanTextCharStart); int cleanTextNeTokEnd = ta.getTokenIdFromCharacterOffset(cleanTextCharEnd - 1); // StringTransformation returns one-past-the-end index; TextAnnotation maps at-the-end index
this.offsetToSpanInfo.clear(); this.offsetToSpanInfo = XmlDocumentProcessor.compileOffsetSpanMapping(sourceTa.getXmlMarkup()); TextAnnotation ta = sourceTa.getTextAnnotation(); SpanLabelView tokens = (SpanLabelView) ta.getView(ViewNames.TOKENS); compileOffsets(tokens); sourceTa.getTextAnnotation().addView(getMentionViewName(), nerView); AnnotationFixer.rationalizeBoundaryAnnotations(sourceTa.getTextAnnotation(), getCorefViewName()); } else AnnotationFixer.rationalizeBoundaryAnnotations(sourceTa.getTextAnnotation(), getCorefViewName());
public static List<TextAnnotation> getTAs(List<String> scope){ List<TextAnnotation> tas = new ArrayList<>(); if (scope.contains("ACE")) { ACEReader aceReader = null; try { aceReader = new ACEReader("data/all", false); } catch (Exception e) { e.printStackTrace(); } for (TextAnnotation ta : aceReader) { tas.add(ta); } } if (scope.contains("ERE")) { EREMentionRelationReader ereMentionRelationReader = null; try { ereMentionRelationReader = new EREMentionRelationReader(EREDocumentReader.EreCorpus.ENR3, "data/ere/data", false); } catch (Exception e) { e.printStackTrace(); } for (XmlTextAnnotation xta : ereMentionRelationReader) { tas.add(xta.getTextAnnotation()); } } return tas; } public Object next(){
TextAnnotation ta = xta.getTextAnnotation(); List<SpanInfo> fudge = xta.getXmlMarkup(); int cleanTextCharStart = xta.getXmlSt().computeModifiedOffsetFromOriginal(charOffsets.getFirst()); int cleanTextCharEnd = xta.getXmlSt().computeModifiedOffsetFromOriginal(charOffsets.getSecond()); int cleanTextNeTokStart = ta.getTokenIdFromCharacterOffset(cleanTextCharStart); int cleanTextNeTokEnd = ta.getTokenIdFromCharacterOffset(cleanTextCharEnd - 1); // StringTransformation returns one-past-the-end index; TextAnnotation maps at-the-end index
ret.add(xta.getTextAnnotation());
List<XmlDocumentProcessor.SpanInfo> markup = xmlTa.getXmlMarkup(); TextAnnotation ta = xmlTa.getTextAnnotation(); View postView = new View(getPostViewName(), NAME, ta, 1.0); new IntPair(xmlTa.getXmlSt().computeModifiedOffsetFromOriginal(spanInfo.spanOffsets.getFirst()), xmlTa.getXmlSt().computeModifiedOffsetFromOriginal(spanInfo.spanOffsets.getSecond())); if (-1 == cleanTextOffsets.getFirst() || -1 == cleanTextOffsets.getSecond()) throw new IllegalStateException("could not compute cleanText offsets for " + label + " span with offsets " +
ret.add(xta.getTextAnnotation());
List<XmlDocumentProcessor.SpanInfo> markup = xmlTa.getXmlMarkup(); TextAnnotation ta = xmlTa.getTextAnnotation(); View postView = new View(getPostViewName(), NAME, ta, 1.0); new IntPair(xmlTa.getXmlSt().computeModifiedOffsetFromOriginal(spanInfo.spanOffsets.getFirst()), xmlTa.getXmlSt().computeModifiedOffsetFromOriginal(spanInfo.spanOffsets.getSecond())); if (-1 == cleanTextOffsets.getFirst() || -1 == cleanTextOffsets.getSecond()) throw new IllegalStateException("could not compute cleanText offsets for " + label + " span with offsets " +
ret.add(xta.getTextAnnotation());