public void recognize() throws NamedEntityRecognizerException { if (null==sentence) throw new NamedEntityRecognizerException("null sentence or sentence not set."); this.listOfEntities = new ArrayList<NamedEntityWord>(sentence.size()); this.mapOfEntities = new HashMap<Integer, NamedEntityPhrase>(); for (String word : sentence) { listOfEntities.add(new NamedEntityWord(word, null)); } }
/** * Constructs this NamedEntityRecognizer. * <P> * * @param classifierPath a classifier for Stanford NER * (I am using ner-eng-ie.crf-3-all2008-distsim.ser.gz usually, which * is part of the downloadable Stanford NER package). * */ public StanfordNamedEntityRecognizer(File classifierPath) throws NamedEntityRecognizerException { if (classifierPath.exists()) this.classifierPath = classifierPath; else throw new NamedEntityRecognizerException("File "+classifierPath+" does not exist"); }
private void addNerInfo(List<String> tokenizedSent, List<TokenInfo> processedSent) throws InstrumentCombinationException { List<NamedEntityWord> nerSent = null; try { m_ner.setSentence(tokenizedSent); m_ner.recognize(); nerSent = m_ner.getAnnotatedSentence(); } catch (NamedEntityRecognizerException e) { throw new InstrumentCombinationException("see nested exception from NamedEntityRecognizer:", e); } if(nerSent.size() != processedSent.size()){ throw new InstrumentCombinationException("the number of ner-tagged tokens is not equal" + " to the number of sentence-tokens in input sentence " + tokenizedSent); } Iterator<TokenInfo> procSentIter = processedSent.iterator(); Iterator<NamedEntityWord> nerSentIter = nerSent.iterator(); while(procSentIter.hasNext()){ TokenInfo tInfo = procSentIter.next(); NamedEntityWord nerTerm = nerSentIter.next(); if(tInfo.getOrigStr().equals(nerTerm.getWord())){ tInfo.setNamedEntity(nerTerm.getNamedEntity()); }else{ throw new InstrumentCombinationException("a mismatch was found between a ner-tagged word" + " and the sentence word " + tInfo.getOrigStr() + " of input sentence " + tokenizedSent); } } }
innerTool.setSentence(tokenStrings); innerTool.recognize(); taggedTokens = innerTool.getAnnotatedSentence(); throw new NamedEntityRecognizerException("Got NER tagging for " + taggedTokens.size() + " tokens, should have gotten according to the total number of tokens in the sentence: " + tokens.size()); Token tokenAnno = tokenIter.next(); if (taggedToken.getNamedEntity() != null) { String tagString = taggedToken.getNamedEntity().toString();
words.add(word); neRecognizer.setSentence(words); neRecognizer.recognize(); List<NamedEntityWord> neWords = neRecognizer.getAnnotatedSentence(); Matcher<NamedEntityWord, BasicConstructionNode> matcher = new Matcher<NamedEntityWord, BasicConstructionNode>(neWords.iterator(), nodes.iterator(),NamedEntityMergeServices.getMatchFinder(),NamedEntityMergeServices.getOperator()); matcher.makeMatchOperation();
public void recognize() throws NamedEntityRecognizerException throw new NamedEntityRecognizerException("StanfordNamedEntityRecognizer was not initialized."); if (this.sentence==null) throw new NamedEntityRecognizerException("Wrong input to StanfordNamedEntityRecognizer. A null sentence was supplied."); throw new NamedEntityRecognizerException("classifying (i.e. retrieving the Named-Entity for the sentence words) sentence failed.",e); throw new NamedEntityRecognizerException("classification returned a null list"); newNamedEntityType = StanfordAnswerToNamedEntityMapper.convert(strNamedEntity); listOfEntities.add(new NamedEntityWord(strWord, newNamedEntityType)); mapOfEntities.put(startOfCurrentNamedEntity, new NamedEntityPhrase(canonicalNamedEntity, currentNamedEntityType)); currentNamedEntityType = null; currentNamedEntity.setLength(0);
/** * A demo program for Stanford NER. * @param args a single argument which is the path of a classifier from the Stanford NER package, e.g. * ${env_var:JARS}/stanford-ner-2009-01-16/classifiers/ner-eng-ie.crf-3-all2008-distsim.ser.gz * @throws Exception */ public static void main(String[] args) throws Exception { if (args.length==0) throw new IllegalArgumentException("First argument must be the path to a Stanford NER classifier (with .ser.gz extension)"); String pathToNER = args[0]; StanfordNamedEntityRecognizer ner = new StanfordNamedEntityRecognizer( new File(pathToNER)); ner.init(); String testString = "The Israeli PM Binyamin Netanyahu said that Israel will accept the Eitan W. Shishinsky recommendations"; //String testString = "In December 2004 the state sold 18.4% of its equity in Air France-KLM. The state's shareholding in Air France-KLM subsequently fell to just under 20%."; LinkedList<String> testStringList = new LinkedList<String>(); for (String word : testString.split(" ")) testStringList.add(word); ner.setSentence(testStringList); ner.recognize(); System.out.println("\nNEs detected:"); System.out.println(ner.getAnnotatedEntities()); System.out.println("\nNE tag for each word: "); List<NamedEntityWord> list = ner.getAnnotatedSentence(); for (NamedEntityWord neWord: list) System.out.println(neWord.getWord()+" ["+neWord.getNamedEntity()+"]"); ner.cleanUp(); }
/** * Create an initialized NamedEntityRecognizer using the given configuration params. */ public static NamedEntityRecognizer createNamedEntityRecognizer(ConfigurationParams params) throws InstrumentCombinationException{ boolean doNer; try { doNer = (params.containsKey("do_named_entity_recognition")? params.getBoolean("do_named_entity_recognition"): false); if (doNer) { NamedEntityRecognizer ner = new StanfordNamedEntityRecognizer(new File(params.getFile("ner-classifier-path").getAbsolutePath())); ner.init(); return ner; } else return null; } catch (ConfigurationException e) { throw new InstrumentCombinationException("Nested exception with configuration file while initializing the NER", e); } catch (NamedEntityRecognizerException e) { throw new InstrumentCombinationException("Nested exception while initializing the NER", e); } } }
public boolean areMatch() { boolean ret = false; try{ret = node.getInfo().getNodeInfo().getWord().equalsIgnoreCase(neWord.getWord());}catch(Exception e){} return ret; }
public void close() { if(m_tokenizer != null) m_tokenizer.cleanUp(); m_tokenizer = null; if(m_postagger != null) m_postagger.cleanUp(); m_postagger = null; if(m_ner != null) m_ner.cleanUp(); m_ner = null; if(m_lemmatizer != null) m_lemmatizer.cleanUp(); m_lemmatizer = null; }
public void makeOperation() { NodeInfo nodeInfo = null; try{nodeInfo = node.getInfo().getNodeInfo();}catch(Exception e){} String word = null; String lemma = null; int serial = 0; NamedEntity namedEntity = null; SyntacticInfo syntacticInfo = null; if (nodeInfo != null) { word = nodeInfo.getWord(); lemma = nodeInfo.getWordLemma(); serial = nodeInfo.getSerial(); syntacticInfo = nodeInfo.getSyntacticInfo(); } namedEntity = neWord.getNamedEntity(); Info oldInfo = node.getInfo(); node.setInfo(new DefaultInfo(oldInfo.getId(),new DefaultNodeInfo(word,lemma,serial,namedEntity,syntacticInfo),oldInfo.getEdgeInfo())); }
tokenizer.init(); NamedEntityRecognizer ner = new StanfordNamedEntityRecognizer(new File(args[0])); ner.init(); while (!line.equals("exit")) ner.setSentence(line, tokenizer); ner.recognize(); for (NamedEntityWord neWord : ner.getAnnotatedSentence()) System.out.print(neWord.getWord()+"/"); if (neWord.getNamedEntity()!=null) System.out.print(neWord.getNamedEntity().name());
System.out.println(neWord.getWord()+" ["+neWord.getNamedEntity()+"]");
private void initializeInstruments() throws NamedEntityRecognizerException, ParserRunException, CoreferenceResolutionException { if (doNer) { instruments.getNamedEntityRecognizer().init(); } this.instruments.getParser().init(); this.instruments.getCoreferenceResolver().init(); this.areInstrumentsInitialized=true; }
public void init() throws NamedEntityRecognizerException { if (initialized) throw new NamedEntityRecognizerException( "init() was called though the StanfordNamedEntityRecognizer was already initialized."); try { this.crfClassifier = CRFClassifier.getClassifier(this.classifierPath.getPath()); this.initialized = true; } catch (Exception e) { throw new NamedEntityRecognizerException("Classifier load failed.",e); } }
public BiuTreeBuilder() throws TokenizerException, PosTaggerException, NamedEntityRecognizerException, ParserRunException { splitter = new LingPipeSentenceSplitter(); tokenizer = new MaxentTokenizer(); tagger = new MaxentPosTagger(BiuTestParams.MAXENT_POS_TAGGER_MODEL_FILE); ner = new StanfordNamedEntityRecognizer(new File(BiuTestParams.STANFORD_NER_CLASSIFIER_PATH)); parser = new EasyFirstParser( BiuTestParams.EASYFIRST_HOST, BiuTestParams.EASYFIRST_PORT, tokenizer, tagger ); ner.init(); parser.init(); }