private IndexedWord makeVertex(String word) { Integer index; // initialized below Pair<String, Integer> wordAndIndex = readWordAndIndex(word); if (wordAndIndex != null) { word = wordAndIndex.first(); index = wordAndIndex.second(); } else { index = getNextFreeIndex(); } indexesUsed.add(index); // Note that, despite the use of indexesUsed and getNextFreeIndex(), // nothing is actually enforcing that no indexes are used twice. This // could occur if some words in the string representation being parsed // come with index markers and some do not. IndexedWord ifl = new IndexedWord(null, 0, index); // log.info("SemanticGraphParsingTask>>> word = " + word); // log.info("SemanticGraphParsingTask>>> index = " + index); // log.info("SemanticGraphParsingTask>>> indexesUsed = " + // indexesUsed); String[] wordAndTag = word.split("/"); ifl.set(CoreAnnotations.TextAnnotation.class, wordAndTag[0]); ifl.set(CoreAnnotations.ValueAnnotation.class, wordAndTag[0]); if (wordAndTag.length > 1) ifl.set(CoreAnnotations.PartOfSpeechAnnotation.class, wordAndTag[1]); return ifl; }
private void readDep(IndexedWord gov, String reln) { readWhiteSpace(); if (!isLeftBracket(peek())) { // it's a leaf String label = readName(); IndexedWord dep = makeVertex(label); sg.addVertex(dep); if (gov == null) sg.roots.add(dep); sg.addEdge(gov, dep, GrammaticalRelation.valueOf(this.language, reln), Double.NEGATIVE_INFINITY, false); } else { readLeftBracket(); String label = readName(); IndexedWord dep = makeVertex(label); sg.addVertex(dep); if (gov == null) sg.roots.add(dep); if (gov != null && reln != null) { sg.addEdge(gov, dep, GrammaticalRelation.valueOf(this.language, reln), Double.NEGATIVE_INFINITY, false); } readWhiteSpace(); while (!isRightBracket(peek()) && !isEOF) { reln = readName(); readRelnSeparator(); readDep(dep, reln); readWhiteSpace(); } readRightBracket(); } }
private void readDep(IndexedWord gov, String reln) { readWhiteSpace(); if (!isLeftBracket(peek())) { // it's a leaf String label = readName(); IndexedWord dep = makeVertex(label); sg.addVertex(dep); if (gov == null) sg.roots.add(dep); sg.addEdge(gov, dep, GrammaticalRelation.valueOf(this.language, reln), Double.NEGATIVE_INFINITY, false); } else { readLeftBracket(); String label = readName(); IndexedWord dep = makeVertex(label); sg.addVertex(dep); if (gov == null) sg.roots.add(dep); if (gov != null && reln != null) { sg.addEdge(gov, dep, GrammaticalRelation.valueOf(this.language, reln), Double.NEGATIVE_INFINITY, false); } readWhiteSpace(); while (!isRightBracket(peek()) && !isEOF) { reln = readName(); readRelnSeparator(); readDep(dep, reln); readWhiteSpace(); } readRightBracket(); } }
private void readDep(IndexedWord gov, String reln) { readWhiteSpace(); if (!isLeftBracket(peek())) { // it's a leaf String label = readName(); IndexedWord dep = makeVertex(label); sg.addVertex(dep); if (gov == null) sg.roots.add(dep); sg.addEdge(gov, dep, GrammaticalRelation.valueOf(this.language, reln), Double.NEGATIVE_INFINITY, false); } else { readLeftBracket(); String label = readName(); IndexedWord dep = makeVertex(label); sg.addVertex(dep); if (gov == null) sg.roots.add(dep); if (gov != null && reln != null) { sg.addEdge(gov, dep, GrammaticalRelation.valueOf(this.language, reln), Double.NEGATIVE_INFINITY, false); } readWhiteSpace(); while (!isRightBracket(peek()) && !isEOF) { reln = readName(); readRelnSeparator(); readDep(dep, reln); readWhiteSpace(); } readRightBracket(); } }
private IndexedWord makeVertex(String word) { Integer index; // initialized below Pair<String, Integer> wordAndIndex = readWordAndIndex(word); if (wordAndIndex != null) { word = wordAndIndex.first(); index = wordAndIndex.second(); } else { index = getNextFreeIndex(); } indexesUsed.add(index); // Note that, despite the use of indexesUsed and getNextFreeIndex(), // nothing is actually enforcing that no indexes are used twice. This // could occur if some words in the string representation being parsed // come with index markers and some do not. IndexedWord ifl = new IndexedWord(null, 0, index); // System.err.println("SemanticGraphParsingTask>>> word = " + word); // System.err.println("SemanticGraphParsingTask>>> index = " + index); // System.err.println("SemanticGraphParsingTask>>> indexesUsed = " + // indexesUsed); String[] wordAndTag = word.split("/"); ifl.set(CoreAnnotations.TextAnnotation.class, wordAndTag[0]); if (wordAndTag.length > 1) ifl.set(CoreAnnotations.PartOfSpeechAnnotation.class, wordAndTag[1]); return ifl; }
private IndexedWord makeVertex(String word) { Integer index; // initialized below Pair<String, Integer> wordAndIndex = readWordAndIndex(word); if (wordAndIndex != null) { word = wordAndIndex.first(); index = wordAndIndex.second(); } else { index = getNextFreeIndex(); } indexesUsed.add(index); // Note that, despite the use of indexesUsed and getNextFreeIndex(), // nothing is actually enforcing that no indexes are used twice. This // could occur if some words in the string representation being parsed // come with index markers and some do not. IndexedWord ifl = new IndexedWord(null, 0, index); // log.info("SemanticGraphParsingTask>>> word = " + word); // log.info("SemanticGraphParsingTask>>> index = " + index); // log.info("SemanticGraphParsingTask>>> indexesUsed = " + // indexesUsed); String[] wordAndTag = word.split("/"); ifl.set(CoreAnnotations.TextAnnotation.class, wordAndTag[0]); ifl.set(CoreAnnotations.ValueAnnotation.class, wordAndTag[0]); if (wordAndTag.length > 1) ifl.set(CoreAnnotations.PartOfSpeechAnnotation.class, wordAndTag[1]); return ifl; }
private void readDep(IndexedWord gov, String reln) { readWhiteSpace(); if (!isLeftBracket(peek())) { // it's a leaf String label = readName(); IndexedWord dep = makeVertex(label); sg.addVertex(dep); if (gov == null) sg.roots.add(dep); sg.addEdge(gov, dep, GrammaticalRelation.valueOf(reln), Double.NEGATIVE_INFINITY, false); } else { readLeftBracket(); String label = readName(); IndexedWord dep = makeVertex(label); sg.addVertex(dep); if (gov == null) sg.roots.add(dep); if (gov != null && reln != null) { sg.addEdge(gov, dep, GrammaticalRelation.valueOf(reln), Double.NEGATIVE_INFINITY, false); } readWhiteSpace(); while (!isRightBracket(peek()) && !isEOF) { reln = readName(); readColon(); readDep(dep, reln); readWhiteSpace(); } readRightBracket(); } }
private IndexedWord makeVertex(String word) { Integer index; // initialized below Pair<String, Integer> wordAndIndex = readWordAndIndex(word); if (wordAndIndex != null) { word = wordAndIndex.first(); index = wordAndIndex.second(); } else { index = getNextFreeIndex(); } indexesUsed.add(index); // Note that, despite the use of indexesUsed and getNextFreeIndex(), // nothing is actually enforcing that no indexes are used twice. This // could occur if some words in the string representation being parsed // come with index markers and some do not. IndexedWord ifl = new IndexedWord(null, 0, index); // log.info("SemanticGraphParsingTask>>> word = " + word); // log.info("SemanticGraphParsingTask>>> index = " + index); // log.info("SemanticGraphParsingTask>>> indexesUsed = " + // indexesUsed); String[] wordAndTag = word.split("/"); ifl.set(CoreAnnotations.TextAnnotation.class, wordAndTag[0]); ifl.set(CoreAnnotations.ValueAnnotation.class, wordAndTag[0]); if (wordAndTag.length > 1) ifl.set(CoreAnnotations.PartOfSpeechAnnotation.class, wordAndTag[1]); return ifl; }
@Override public SemanticGraph parse() { sg = new SemanticGraph(); try { readWhiteSpace(); if (!isLeftBracket(peek())) return null; readDep(null, null); return sg; } catch (ParserException e) { log.info("SemanticGraphParser warning: " + e.getMessage()); return null; } }
@Override public SemanticGraph parse() { sg = new SemanticGraph(); try { readWhiteSpace(); if (!isLeftBracket(peek())) return null; readDep(null, null); return sg; } catch (ParserException e) { log.info("SemanticGraphParser warning: " + e.getMessage()); return null; } }
/** * Tries to parse a String representing a SemanticGraph. Right now it's fairly * dumb, could be made more sophisticated. * <p/> * * Example: "[ate subj>Bill dobj>[muffins compound>blueberry]]" * <p/> * * This is the same format generated by toCompactString(). */ public static SemanticGraph valueOf(String s, Language language) { return (new SemanticGraphParsingTask(s, language)).parse(); }
@Override public SemanticGraph parse() { sg = new SemanticGraph(); try { readWhiteSpace(); if (!isLeftBracket(peek())) return null; readDep(null, null); return sg; } catch (ParserException e) { System.err.println("SemanticGraphParser warning: " + e.getMessage()); return null; } }
private void readColon() { readWhiteSpace(); if (isColon(peek())) read(); }
private void readRightBracket() { // System.out.println("Read right."); readWhiteSpace(); char ch = read(); if (!isRightBracket(ch)) throw new ParserException("Expected right paren!"); }
/** * Tries to parse a String representing a SemanticGraph. Right now it's fairly * dumb, could be made more sophisticated. * <p/> * * Example: "[ate subj:Bill dobj:[muffins nn:blueberry]]" * <p/> * * This is the same format generated by toCompactString(). */ public static SemanticGraph valueOf(String s) { return (new SemanticGraphParsingTask(s)).parse(); }
@Override protected boolean isPunct(char ch) { return isLeftBracket(ch) || isRightBracket(ch) || isColon(ch); }
private void readRelnSeparator() { readWhiteSpace(); if (isRelnSeparator(peek())) read(); }
private void readLeftBracket() { // System.out.println("Read left."); readWhiteSpace(); char ch = read(); if (!isLeftBracket(ch)) throw new ParserException("Expected left paren!"); }
private void readRelnSeparator() { readWhiteSpace(); if (isRelnSeparator(peek())) read(); }
private void readLeftBracket() { // System.out.println("Read left."); readWhiteSpace(); char ch = read(); if (!isLeftBracket(ch)) throw new ParserException("Expected left paren!"); }