public static String dumpDependencyRelations(JCas jcas, Annotation annotation) { StringBuilder builder = new StringBuilder(); for (ConllDependencyNode depNode : JCasUtil.selectCovered(jcas, ConllDependencyNode.class, annotation)) { if (depNode.getHead() != null) { builder.append(String.format("%s(%s,%s)\n", depNode.getDeprel(), depNode.getCoveredText(), depNode.getHead().getCoveredText())); } } return builder.toString(); }
private static Markable getDoctorMarkable(JCas jcas, WordToken drToken){ Markable markable = null; ConllDependencyNode nnpHead = DependencyUtility.getDependencyNode(jcas, drToken); try{ while(nnpHead != null && nnpHead.getHead() != null && nnpHead.getHead().getId() != 0 && nnpHead.getHead().getPostag().equals("NNP")){ nnpHead = nnpHead.getHead(); } }catch(NullPointerException e){ System.err.print("."); } int start = drToken.getBegin(); int end = nnpHead.getEnd(); if(end < start) end = drToken.getEnd(); markable = new Markable(jcas, start, end); return markable; } }
/** @generated * @param jcas JCas to which this Feature Structure belongs * @param begin offset to the begin spot in the SofA * @param end offset to the end spot in the SofA */ public ConllDependencyNode(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }
private static final boolean presenceOfModality(ConllDependencyNode head, List<ConllDependencyNode> sentNodes) { boolean modal = false; ConllDependencyNode vb = null; if(head.getHead() != null){ vb = head.getHead(); while(vb.getHead() != null && !vb.getPostag().startsWith("VB")){ vb = vb.getHead(); } for(ConllDependencyNode node : sentNodes){ if(node.getHead() == vb && node.getPostag().equals("MD")){ modal = true; break; } } } return modal; }
public String calcDepPath(){ if(depPath == null){ if(c1 == null || c2 == null || c2.getBegin() <= c1.getEnd()){ depPath = ""; }else{ buf.append(c2.getDeprel()); ConllDependencyNode cur = c2.getHead(); while(cur != depLca && cur != null){ String rel = cur.getDeprel(); if(rel == null){ cur = null; buf.append(cur.getDeprel()); cur = cur.getHead(); bwd.append(c1.getDeprel()); bwd.insert(0, ">"); cur = c1.getHead(); while(cur != depLca && cur != null){ String rel = cur.getDeprel(); if(rel == null){ cur = null; break; bwd.insert(0,cur.getDeprel()); bwd.insert(0,">"); cur = cur.getHead();
private static void createMarkablesUsingDependencyTrees(JCas jCas) { for(Segment seg : JCasUtil.select(jCas, Segment.class)){ for(ConllDependencyNode node : JCasUtil.selectCovered(jCas, ConllDependencyNode.class, seg)){ String nodeText = node.getCoveredText().toLowerCase(); List<TerminalTreebankNode> terms = JCasUtil.selectCovered(TerminalTreebankNode.class, node); TerminalTreebankNode term = null; if(node.getId() == 0){ continue; if(node.getPostag().startsWith("NN") && term != null && term.getNodeType().startsWith("N")){ if(node.getForm().matches("\\s+")) continue; int begin = node.getBegin(); int end = node.getEnd(); if(progeny.size() > 0){ for(ConllDependencyNode child : progeny){ if(child.getBegin() < begin){ begin = child.getBegin(); if(child.getEnd() > end){ end = child.getEnd(); ConllDependencyNode parent = node.getHead(); if(parent != null && parent.getId() != 0){ if(parent.getBegin() < node.getBegin() && parent.getBegin() > begin){ if(parent.getEnd() > node.getEnd() && parent.getEnd() < end){
if ( node.getId() != 0 ) { // && node.getID() !=pID) { bw.write( node.getId() + "\t" ); bw.write( node.getForm() + "\t" ); bw.write( (node.getHead() == null ? "_" : node.getHead().getId()) + "\t" ); bw.write( node.getDeprel() + "\n" ); } else if ( iv_outputFormat.toLowerCase().contains( "mpos" ) ) { bw.write( node.getId() + "\t" ); bw.write( node.getForm() + "\t" ); bw.write( node.getPostag() + "\t" ); bw.write( (node.getHead() == null ? "_" : node.getHead().getId()) + "\t" ); bw.write( node.getDeprel() + "\n" ); } else if ( iv_outputFormat.toLowerCase().contains( "mlem" ) ) { bw.write( node.getId() + "\t" ); bw.write( node.getForm() + "\t" ); bw.write( node.getLemma() + "\t" ); bw.write( (node.getHead() == null ? "_" : node.getHead().getId()) + "\t" ); bw.write( node.getDeprel() + "\n" ); } else if ( iv_outputFormat.toLowerCase().contains( "dep" ) ) { bw.write( node.getId() + "\t" ); bw.write( node.getForm() + "\t" ); bw.write( node.getLemma() + "\t" ); bw.write( node.getPostag() + "\t" ); bw.write( (node.getHead() == null ? "_" : node.getHead().getId()) + "\t" ); bw.write( node.getDeprel() + "\n" ); } else { //if (iv_outputFormat.toLowerCase().contains("conll")) { bw.write( node.getId() + "\t" ); bw.write( node.getForm() + "\t" ); bw.write( node.getLemma() + "\t" );
private static void createMarkablesUsingDependencyTrees(JCas jCas) { for(Segment seg : JCasUtil.select(jCas, Segment.class)){ for(ConllDependencyNode node : JCasUtil.selectCovered(jCas, ConllDependencyNode.class, seg)){ String nodeText = node.getCoveredText().toLowerCase(); List<TerminalTreebankNode> terms = JCasUtil.selectCovered(TerminalTreebankNode.class, node); TerminalTreebankNode term = null; if(node.getId() == 0){ continue; if(node.getPostag().startsWith("NN") && term != null && term.getNodeType().startsWith("N")){ Markable markable = expandNodeToMarkable(jCas, node); if(markable == null) continue; markable.addToIndexes(); }else if(node.getPostag().equals("DT") && !node.getDeprel().equals("det")){ Markable markable = new Markable(jCas, node.getBegin(), node.getEnd()); markable.addToIndexes(); }else if(node.getCoveredText().toLowerCase().equals("it") && node.getDeprel().contains("bj")){ Markable markable = new Markable(jCas, node.getBegin(), node.getEnd()); markable.addToIndexes();
public static Markable expandNodeToMarkable(JCas jCas, ConllDependencyNode node){ Markable markable = null; String nodeText = node.getCoveredText(); if(node.getForm().matches("\\s+")) return null; int begin = node.getBegin(); int end = node.getEnd(); if(progeny.size() > 0){ for(ConllDependencyNode child : progeny){ if(child.getBegin() < begin){ begin = child.getBegin(); if(child.getEnd() > end){ end = child.getEnd(); ConllDependencyNode parent = node.getHead(); if(parent != null && parent.getId() != 0){ if(parent.getBegin() < node.getBegin() && parent.getBegin() > begin){ if(parent.getEnd() > node.getEnd() && parent.getEnd() < end){ BaseToken prevToken = JCasUtil.selectPreceding(BaseToken.class, parent, 1).get(0); end = prevToken.getEnd();
for ( ConllDependencyNode depNode : JCasUtil.selectCovered( jCas, ConllDependencyNode.class, sentence ) ) { if ( depNode.getHead() == null ) { if ( casDepNode.getId() == 0 ) { casDepNode = casDepNodes.get( 1 ); deprels[ i ] = casDepNode.getDeprel(); ConllDependencyNode head = casDepNode.getHead(); String form = casDepNode.getForm(); String pos = casDepNode.getPostag(); String lemma = casDepNode.getLemma();
if (anodes.get(i).getId()==0) { anodes.remove(i); for (int id2=0; id2<anodes.size(); id2++) { if (id1==id2 || anodes.get(id1).getId()!=anodes.get(id2).getHead().getId()) { matrixofheads[id2][id1]=false; if (Pattern.matches("N..?", anodes.get(i).getPostag())) { return anodes.get(i); if (Pattern.matches("N..?", outnodes.get(i).getPostag())) { return outnodes.get(i);
if(head.getId() != 0 && (head.getPostag().startsWith("PRP") || (head.getPostag().equals("DT") && !head.getDeprel().equals("det")))){ feats.add(new Feature("MorphoIsProper", (head != null && head.getPostag() != null && head.getPostag().equals("NNP")))); feats.add(new Feature("MorphoPlural", head.getPostag().equals("NNS"))); if(node.getPostag().equals("DT") && (node.getLemma().equals("a") || node.getLemma().equals("an"))){ indefinite = true; if(node.getPostag().equals("CD")){ containsNum = true;
private static List<ConllDependencyNode> removeUnannotatedNodes(ConllDependencyNode originalNode, List<ConllDependencyNode> progeny) { List<ConllDependencyNode> filtered = new ArrayList<>(); for(ConllDependencyNode node: progeny){ if(node == originalNode) filtered.add(node); boolean blockedByConj = false; for(ConllDependencyNode pathEl : DependencyUtility.getPath(progeny, node, originalNode)){ if(pathEl == originalNode) continue; if(pathEl.getDeprel().equals("conj") || pathEl.getDeprel().equals("cc") || pathEl.getPostag().equals(".") || pathEl.getPostag().equals(",") || pathEl.getDeprel().equals("punct") || pathEl.getDeprel().equals("meta") || pathEl.getCoveredText().matches("(([A-Z][\\.\\:\\)])|(#\\d+)|(\\d+[\\.\\:\\)]))")){ blockedByConj = true; break; } } if(!blockedByConj){ filtered.add(node); } } return filtered; }
@Override public List<Feature> extract( JCas jCas, IdentifiedAnnotation arg ) { List<Feature> features = new ArrayList<>(); // Pull in general dependency-based features -- externalize to another extractor? ConllDependencyNode node = DependencyUtility.getNominalHeadNode( jCas, arg ); if ( node != null ) { features.add( new Feature( "DEPENDENCY_HEAD", node.getCoveredText() ) ); features.add( new Feature( "DEPENDENCY_HEAD_deprel", node.getDeprel() ) ); } HashMap<String, Boolean> featsMap = WindowedGenericAttributeClassifier.extract( jCas, _sentence, arg ); // Pull in all the features that were used for the rule-based module features.addAll( hashToFeatureList( featsMap ) ); // Pull in the result of the rule-based module as well features.add( new Feature( "GENERIC_CLASSIFIER_LOGIC", WindowedGenericAttributeClassifier.classifyWithLogic( featsMap ) ) ); return features; }
L_TOK_DELIM + node.getHead().getForm() + R_TOK_DELIM + L_POS_DELIM + node.getHead().getPostag() + R_POS_DELIM + DN_ARC_A + L_REL_DELIM + node.getDeprel() + R_REL_DELIM + DN_ARC_B + L_TOK_DELIM + node.getForm() + R_TOK_DELIM + L_POS_DELIM + node.getPostag() + R_POS_DELIM L_TOK_DELIM + node.getForm() + R_TOK_DELIM + L_POS_DELIM + node.getPostag() + R_POS_DELIM + UP_ARC_B + L_REL_DELIM + node.getDeprel() + R_REL_DELIM + UP_ARC_A + L_TOK_DELIM + node.getHead().getForm() + R_TOK_DELIM + L_POS_DELIM + node.getHead().getPostag() + R_POS_DELIM
/** * Finds the head word within a given annotation span */ public static ConllDependencyNode findAnnotationHead(JCas jcas, Annotation annotation) { for (ConllDependencyNode depNode : JCasUtil.selectCovered(jcas, ConllDependencyNode.class, annotation)) { ConllDependencyNode head = depNode.getHead(); if (head == null || head.getEnd() <= annotation.getBegin() || head.getBegin() > annotation.getEnd()) { // The head is outside the bounds of the annotation, so this node must be the annotation's head return depNode; } } // Can this happen? return null; }
public static List<Feature> extractForNode(JCas jCas, IdentifiedAnnotation mention, String ftrPrefix) { List<Feature> features = new ArrayList<Feature>(); ConllDependencyNode mentionHeadNode = DependencyParseUtils.findAnnotationHead(jCas, mention); if (mentionHeadNode != null) { ConllDependencyNode dependsOn = mentionHeadNode.getHead(); if (dependsOn != null) { features.add(new Feature(ftrPrefix + "_DEPENDS_ON_WORD", dependsOn.getCoveredText())); features.add(new Feature(ftrPrefix + "_DEPENDS_ON_POS", dependsOn.getPostag())); // Following features come from Zhou et al. 2005 // ET1DW1: combination of the entity type and the dependent word for M1 features.add(new Feature(ftrPrefix + "_TYPE-GOVERNING_WORD", String.format("%d-%s", mention.getTypeID(), dependsOn.getCoveredText()))); // H1DW1: combination of the head word and the dependent word for M1 features.add(new Feature(ftrPrefix + "_HEAD_WORD-GOVERNING_WORD", String.format("%s-%s", mentionHeadNode.getCoveredText(), dependsOn.getCoveredText()))); features.add(new Feature(ftrPrefix + "_TYPE-GOVERNING_POS", String.format("%d-%s", mention.getTypeID(), dependsOn.getPostag()))); features.add(new Feature(ftrPrefix + "_HEAD_POS-GOVERNING_POS", String.format("%s-%s", mentionHeadNode.getPostag(), dependsOn.getPostag()))); } } return features; }
if (regnodes[i].getForm()==null) regnodes[i].setForm(ANY_TOKEN); if (regnodes[i].getPostag()==null) regnodes[i].setPostag(ANY_POS); if (regnodes[i].getDeprel()==null) regnodes[i].setDeprel(ANY_DEPREL); if (i==commonNodeIndex-1) { str.append( L_TOK_DELIM + regnodes[i].getForm() + R_TOK_DELIM ); str.append( L_POS_DELIM + regnodes[i].getPostag() + R_POS_DELIM ); continue; + L_REL_DELIM + regnodes[i].getDeprel() + R_REL_DELIM + DN_ARC_B + L_TOK_DELIM + regnodes[i].getForm() + R_TOK_DELIM + L_POS_DELIM + regnodes[i].getPostag() + R_POS_DELIM L_TOK_DELIM + regnodes[i].getForm() + R_TOK_DELIM + L_POS_DELIM + regnodes[i].getPostag() + R_POS_DELIM + UP_ARC_B + L_REL_DELIM + regnodes[i].getDeprel() + R_REL_DELIM + UP_ARC_A );