public static boolean numberSingular(JCas jcas, Annotation arg, String s1, ConllDependencyNode head){ // List<BaseToken> tokens = new ArrayList<>(JCasUtil.selectCovered(BaseToken.class, arg)); // for (int i = tokens.size()-1; i >=0; i--){ // BaseToken t = tokens.get(i); // String pos = t.getPartOfSpeech(); if(head != null && head.getPostag() != null){ String pos = head.getPostag(); if ("NN".equals(pos) || "NNP".equals(pos)){ return true; }else if ("NNS".equals(pos) || "NNPS".equals(pos)){ return false; }else if(s1.equals("we") || s1.equals("they")){ return false; } } // } return true; }
public static boolean numberSingular(JCas jcas, Annotation arg, String s1){ ConllDependencyNode head = MapFactory.get(getKey(jcas), arg); // List<BaseToken> tokens = new ArrayList<>(JCasUtil.selectCovered(BaseToken.class, arg)); // for (int i = tokens.size()-1; i >=0; i--){ // BaseToken t = tokens.get(i); // String pos = t.getPartOfSpeech(); if(head != null && head.getPostag() != null){ String pos = head.getPostag(); if ("NN".equals(pos) || "NNP".equals(pos)){ return true; }else if ("NNS".equals(pos) || "NNPS".equals(pos)){ return false; }else if(s1.equals("we") || s1.equals("they")){ return false; } } // } return true; }
public String toSyntaxString() { StringBuilder str = new StringBuilder(); boolean foundCommon = false; for (ConllDependencyNode node : this) { if (isCommonNode(node)) { foundCommon = true; str.append( L_POS_DELIM + node.getPostag() + R_POS_DELIM ); continue; } str.append( foundCommon ? DN_ARC_A + L_REL_DELIM + node.getDeprel() + R_REL_DELIM + DN_ARC_B + L_POS_DELIM + node.getPostag() + R_POS_DELIM : L_POS_DELIM + node.getPostag() + R_POS_DELIM + UP_ARC_B + L_REL_DELIM + node.getDeprel() + R_REL_DELIM + UP_ARC_A ); } return str.toString(); }
/** * This will convert a path into a string lexicalized at the end points with arc labels and POS tags in between */ public static String pathToString(LinkedList<ConllDependencyNode> path) { StringBuilder builder = new StringBuilder(); for (ConllDependencyNode node : path) { if (node == path.getFirst() || node == path.getLast()) { builder.append(node.getCoveredText()); } else { builder.append(node.getPostag()); } builder.append("-"); builder.append(node.getDeprel()); if (node != path.getLast()) { builder.append("/"); } } return builder.toString(); }
/** * This will convert a path into a string lexicalized at the end points with arc labels and POS tags in between */ public static String pathToString(LinkedList<ConllDependencyNode> path) { StringBuilder builder = new StringBuilder(); for (ConllDependencyNode node : path) { if (node == path.getFirst() || node == path.getLast()) { builder.append(node.getCoveredText()); } else { builder.append(node.getPostag()); } builder.append("-"); builder.append(node.getDeprel()); if (node != path.getLast()) { builder.append("/"); } } return builder.toString(); }
public String toSyntaxString() { StringBuilder str = new StringBuilder(); boolean foundCommon = false; for (ConllDependencyNode node : this) { if (isCommonNode(node)) { foundCommon = true; str.append( L_POS_DELIM + node.getPostag() + R_POS_DELIM ); continue; } str.append( foundCommon ? DN_ARC_A + L_REL_DELIM + node.getDeprel() + R_REL_DELIM + DN_ARC_B + L_POS_DELIM + node.getPostag() + R_POS_DELIM : L_POS_DELIM + node.getPostag() + R_POS_DELIM + UP_ARC_B + L_REL_DELIM + node.getDeprel() + R_REL_DELIM + UP_ARC_A ); } return str.toString(); }
private static List<ConllDependencyNode> removeUnannotatedNodes(ConllDependencyNode originalNode, List<ConllDependencyNode> progeny) { List<ConllDependencyNode> filtered = new ArrayList<>(); for(ConllDependencyNode node: progeny){ if(node == originalNode) filtered.add(node); boolean blockedByConj = false; for(ConllDependencyNode pathEl : DependencyUtility.getPath(progeny, node, originalNode)){ if(pathEl == originalNode) continue; if(pathEl.getDeprel().equals("conj") || pathEl.getDeprel().equals("cc") || pathEl.getPostag().equals(".") || pathEl.getPostag().equals(",") || pathEl.getDeprel().equals("punct") || pathEl.getDeprel().equals("meta") || pathEl.getCoveredText().matches("(([A-Z][\\.\\:\\)])|(#\\d+)|(\\d+[\\.\\:\\)]))")){ blockedByConj = true; break; } } if(!blockedByConj){ filtered.add(node); } } return filtered; }
private static List<ConllDependencyNode> removeUnannotatedNodes(ConllDependencyNode originalNode, List<ConllDependencyNode> progeny) { List<ConllDependencyNode> filtered = new ArrayList<>(); for(ConllDependencyNode node: progeny){ if(node == originalNode) filtered.add(node); boolean blockedByConj = false; for(ConllDependencyNode pathEl : DependencyUtility.getPath(progeny, node, originalNode)){ if(pathEl == originalNode) continue; if(pathEl.getDeprel().equals("conj") || pathEl.getDeprel().equals("cc") || pathEl.getPostag().equals(".") || pathEl.getPostag().equals(",") || pathEl.getDeprel().equals("punct") || pathEl.getDeprel().equals("meta") || pathEl.getCoveredText().matches("(([A-Z][\\.\\:\\)])|(#\\d+)|(\\d+[\\.\\:\\)]))")){ blockedByConj = true; break; } } if(!blockedByConj){ filtered.add(node); } } return filtered; }
private static final boolean presenceOfModality(ConllDependencyNode head, List<ConllDependencyNode> sentNodes) { boolean modal = false; ConllDependencyNode vb = null; if(head.getHead() != null){ vb = head.getHead(); while(vb.getHead() != null && !vb.getPostag().startsWith("VB")){ vb = vb.getHead(); } for(ConllDependencyNode node : sentNodes){ if(node.getHead() == vb && node.getPostag().equals("MD")){ modal = true; break; } } } return modal; }
public String toString() { StringBuilder str = new StringBuilder(); boolean foundCommon = false; for (ConllDependencyNode node : this) { if (isCommonNode(node)) { foundCommon = true; str.append( L_TOK_DELIM + node.getForm() + R_TOK_DELIM ); str.append( L_POS_DELIM + node.getPostag() + R_POS_DELIM ); continue; } str.append( foundCommon ? DN_ARC_A + L_REL_DELIM + node.getDeprel() + R_REL_DELIM + DN_ARC_B + L_TOK_DELIM + node.getForm() + R_TOK_DELIM + L_POS_DELIM + node.getPostag() + R_POS_DELIM : L_TOK_DELIM + node.getForm() + R_TOK_DELIM + L_POS_DELIM + node.getPostag() + R_POS_DELIM + UP_ARC_B + L_REL_DELIM + node.getDeprel() + R_REL_DELIM + UP_ARC_A ); } return str.toString(); }
public String toString() { StringBuilder str = new StringBuilder(); boolean foundCommon = false; for (ConllDependencyNode node : this) { if (isCommonNode(node)) { foundCommon = true; str.append( L_TOK_DELIM + node.getForm() + R_TOK_DELIM ); str.append( L_POS_DELIM + node.getPostag() + R_POS_DELIM ); continue; } str.append( foundCommon ? DN_ARC_A + L_REL_DELIM + node.getDeprel() + R_REL_DELIM + DN_ARC_B + L_TOK_DELIM + node.getForm() + R_TOK_DELIM + L_POS_DELIM + node.getPostag() + R_POS_DELIM : L_TOK_DELIM + node.getForm() + R_TOK_DELIM + L_POS_DELIM + node.getPostag() + R_POS_DELIM + UP_ARC_B + L_REL_DELIM + node.getDeprel() + R_REL_DELIM + UP_ARC_A ); } return str.toString(); }
private static final boolean presenceOfModality(ConllDependencyNode head, List<ConllDependencyNode> sentNodes) { boolean modal = false; ConllDependencyNode vb = null; if(head.getHead() != null){ vb = head.getHead(); while(vb.getHead() != null && !vb.getPostag().startsWith("VB")){ vb = vb.getHead(); } for(ConllDependencyNode node : sentNodes){ if(node.getHead() == vb && node.getPostag().equals("MD")){ modal = true; break; } } } return modal; }
public static List<Feature> extractForNode(JCas jCas, IdentifiedAnnotation mention, String ftrPrefix) { List<Feature> features = new ArrayList<Feature>(); ConllDependencyNode mentionHeadNode = DependencyParseUtils.findAnnotationHead(jCas, mention); if (mentionHeadNode != null) { ConllDependencyNode dependsOn = mentionHeadNode.getHead(); if (dependsOn != null) { features.add(new Feature(ftrPrefix + "_DEPENDS_ON_WORD", dependsOn.getCoveredText())); features.add(new Feature(ftrPrefix + "_DEPENDS_ON_POS", dependsOn.getPostag())); // Following features come from Zhou et al. 2005 // ET1DW1: combination of the entity type and the dependent word for M1 features.add(new Feature(ftrPrefix + "_TYPE-GOVERNING_WORD", String.format("%d-%s", mention.getTypeID(), dependsOn.getCoveredText()))); // H1DW1: combination of the head word and the dependent word for M1 features.add(new Feature(ftrPrefix + "_HEAD_WORD-GOVERNING_WORD", String.format("%s-%s", mentionHeadNode.getCoveredText(), dependsOn.getCoveredText()))); features.add(new Feature(ftrPrefix + "_TYPE-GOVERNING_POS", String.format("%d-%s", mention.getTypeID(), dependsOn.getPostag()))); features.add(new Feature(ftrPrefix + "_HEAD_POS-GOVERNING_POS", String.format("%s-%s", mentionHeadNode.getPostag(), dependsOn.getPostag()))); } } return features; }
public static List<Feature> extractForNode(JCas jCas, IdentifiedAnnotation mention, String ftrPrefix) { List<Feature> features = new ArrayList<Feature>(); ConllDependencyNode mentionHeadNode = DependencyParseUtils.findAnnotationHead(jCas, mention); if (mentionHeadNode != null) { ConllDependencyNode dependsOn = mentionHeadNode.getHead(); if (dependsOn != null) { features.add(new Feature(ftrPrefix + "_DEPENDS_ON_WORD", dependsOn.getCoveredText())); features.add(new Feature(ftrPrefix + "_DEPENDS_ON_POS", dependsOn.getPostag())); // Following features come from Zhou et al. 2005 // ET1DW1: combination of the entity type and the dependent word for M1 features.add(new Feature(ftrPrefix + "_TYPE-GOVERNING_WORD", String.format("%d-%s", mention.getTypeID(), dependsOn.getCoveredText()))); // H1DW1: combination of the head word and the dependent word for M1 features.add(new Feature(ftrPrefix + "_HEAD_WORD-GOVERNING_WORD", String.format("%s-%s", mentionHeadNode.getCoveredText(), dependsOn.getCoveredText()))); features.add(new Feature(ftrPrefix + "_TYPE-GOVERNING_POS", String.format("%d-%s", mention.getTypeID(), dependsOn.getPostag()))); features.add(new Feature(ftrPrefix + "_HEAD_POS-GOVERNING_POS", String.format("%s-%s", mentionHeadNode.getPostag(), dependsOn.getPostag()))); } } return features; }
if (regnodes[i].getPostag()==null) regnodes[i].setPostag(ANY_POS); if (regnodes[i].getDeprel()==null) regnodes[i].setDeprel(ANY_DEPREL); if (i==commonNodeIndex-1) { str.append( L_TOK_DELIM + regnodes[i].getForm() + R_TOK_DELIM ); str.append( L_POS_DELIM + regnodes[i].getPostag() + R_POS_DELIM ); continue; + DN_ARC_B + L_TOK_DELIM + regnodes[i].getForm() + R_TOK_DELIM + L_POS_DELIM + regnodes[i].getPostag() + R_POS_DELIM + L_POS_DELIM + regnodes[i].getPostag() + R_POS_DELIM + UP_ARC_B + L_REL_DELIM + regnodes[i].getDeprel() + R_REL_DELIM
if (regnodes[i].getPostag()==null) regnodes[i].setPostag(ANY_POS); if (regnodes[i].getDeprel()==null) regnodes[i].setDeprel(ANY_DEPREL); if (i==commonNodeIndex-1) { str.append( L_TOK_DELIM + regnodes[i].getForm() + R_TOK_DELIM ); str.append( L_POS_DELIM + regnodes[i].getPostag() + R_POS_DELIM ); continue; + DN_ARC_B + L_TOK_DELIM + regnodes[i].getForm() + R_TOK_DELIM + L_POS_DELIM + regnodes[i].getPostag() + R_POS_DELIM + L_POS_DELIM + regnodes[i].getPostag() + R_POS_DELIM + UP_ARC_B + L_REL_DELIM + regnodes[i].getDeprel() + R_REL_DELIM
+ L_POS_DELIM + node.getHead().getPostag() + R_POS_DELIM + DN_ARC_A + L_REL_DELIM + node.getDeprel() + R_REL_DELIM + DN_ARC_B + L_TOK_DELIM + node.getForm() + R_TOK_DELIM + L_POS_DELIM + node.getPostag() + R_POS_DELIM + L_POS_DELIM + node.getPostag() + R_POS_DELIM + UP_ARC_B + L_REL_DELIM + node.getDeprel() + R_REL_DELIM + UP_ARC_A + L_TOK_DELIM + node.getHead().getForm() + R_TOK_DELIM + L_POS_DELIM + node.getHead().getPostag() + R_POS_DELIM
+ L_POS_DELIM + node.getHead().getPostag() + R_POS_DELIM + DN_ARC_A + L_REL_DELIM + node.getDeprel() + R_REL_DELIM + DN_ARC_B + L_TOK_DELIM + node.getForm() + R_TOK_DELIM + L_POS_DELIM + node.getPostag() + R_POS_DELIM + L_POS_DELIM + node.getPostag() + R_POS_DELIM + UP_ARC_B + L_REL_DELIM + node.getDeprel() + R_REL_DELIM + UP_ARC_A + L_TOK_DELIM + node.getHead().getForm() + R_TOK_DELIM + L_POS_DELIM + node.getHead().getPostag() + R_POS_DELIM
private static Markable getDoctorMarkable(JCas jcas, WordToken drToken){ Markable markable = null; ConllDependencyNode nnpHead = DependencyUtility.getDependencyNode(jcas, drToken); try{ while(nnpHead != null && nnpHead.getHead() != null && nnpHead.getHead().getId() != 0 && nnpHead.getHead().getPostag().equals("NNP")){ nnpHead = nnpHead.getHead(); } }catch(NullPointerException e){ System.err.print("."); } int start = drToken.getBegin(); int end = nnpHead.getEnd(); if(end < start) end = drToken.getEnd(); markable = new Markable(jcas, start, end); return markable; } }
private static Markable getDoctorMarkable(JCas jcas, WordToken drToken){ Markable markable = null; ConllDependencyNode nnpHead = DependencyUtility.getDependencyNode(jcas, drToken); try{ while(nnpHead != null && nnpHead.getHead() != null && nnpHead.getHead().getId() != 0 && nnpHead.getHead().getPostag().equals("NNP")){ nnpHead = nnpHead.getHead(); } }catch(NullPointerException e){ System.err.print("."); } int start = drToken.getBegin(); int end = nnpHead.getEnd(); if(end < start) end = drToken.getEnd(); markable = new Markable(jcas, start, end); return markable; } }