public <F> MultiClassChunkEvalStats(Classifier<String,F> classifier, GeneralDataset<String,F> data, String negLabel) { super(classifier, data, negLabel); chunker = new LabeledChunkIdentifier(); chunker.setNegLabel(negLabel); }
@SuppressWarnings("unchecked") public List<CoreMap> getAnnotatedChunks(List<CoreLabel> tokens, int totalTokensOffset, Class textKey, Class labelKey, Predicate<Pair<CoreLabel, CoreLabel>> checkTokensCompatible) { return getAnnotatedChunks(tokens, totalTokensOffset, textKey, labelKey, null, null, checkTokensCompatible); }
CoreLabel token = tokens.get(i); String label = (String) token.get(labelKey); LabelTagType curTagType = getTagType(label); boolean isCompatible = true; if (checkTokensCompatible != null) { isCompatible = checkTokensCompatible.test(p); if (isEndOfChunk(prevTagType, curTagType) || !isCompatible) { int tokenEnd = i; if (tokenBegin >= 0 && tokenEnd > tokenBegin) { if (isStartOfChunk(prevTagType, curTagType) || (!isCompatible && isChunk(curTagType))) { if (tokenBegin >= 0) { throw new RuntimeException("New chunk started, prev chunk not ended yet!");
} else { MultiClassChunkEvalStats mstats = new MultiClassChunkEvalStats(backgroundLabel); mstats.getChunker().setDefaultPosTag(defaultPosTag); mstats.getChunker().setIgnoreProvidedTag(ignoreProvidedTag); stats = mstats;
boolean prevCorrectEnded = chunker.isEndOfChunk(prevCorrect, correct); boolean prevGuessEnded = chunker.isEndOfChunk(prevGuess, guess); if (prevCorrectEnded && prevGuessEnded && prevGuess.typeMatches(prevCorrect)) { inCorrect=false; boolean correctStarted = LabeledChunkIdentifier.isStartOfChunk(prevCorrect, correct); boolean guessStarted = LabeledChunkIdentifier.isStartOfChunk(prevGuess, guess); if ( correctStarted && guessStarted && guess.typeMatches(correct)) { inCorrect = true; if (chunker.isIgnoreProvidedTag()) { if (guess.typeMatches(correct)) { tokensCorrect++;
CoreLabel token = tokens.get(i); String label = (String) token.get(labelKey); LabelTagType curTagType = getTagType(label); if (isEndOfChunk(prevTagType, curTagType)) { int tokenEnd = i; CoreMap chunk = ChunkAnnotationUtils.getAnnotatedChunk(tokens, tokenBegin, tokenEnd, totalTokensOffset, tokenBegin = -1; if (isStartOfChunk(prevTagType, curTagType)) { if (tokenBegin >= 0) { throw new RuntimeException("New chunk started, prev chunk not ended yet!");
/** * Returns whether a chunk ended between the previous and current token. * * @param prev - the label/tag/type of the previous token * @param cur - the label/tag/type of the current token * @return true if the previous token was the last token of a chunk */ public static boolean isEndOfChunk(LabelTagType prev, LabelTagType cur) { if (prev == null) return false; return isEndOfChunk(prev.tag, prev.type, cur.tag, cur.type); }
@Override protected void addGuess(String guess, String trueLabel, boolean addUnknownLabels) { LabeledChunkIdentifier.LabelTagType guessTagType = chunker.getTagType(guess); LabeledChunkIdentifier.LabelTagType correctTagType = chunker.getTagType(trueLabel); addGuess(guessTagType, correctTagType, addUnknownLabels); }
public EntityMentionsAnnotator() { // defaults chunkIdentifier = new LabeledChunkIdentifier(); doAcronyms = false; }
/** * Returns whether a chunk started between the previous and current token * @param prev - the label/tag/type of the previous token * @param cur - the label/tag/type of the current token * @return true if the current token was the first token of a chunk */ public static boolean isStartOfChunk(LabelTagType prev, LabelTagType cur) { if (prev == null) { return isStartOfChunk("O", "O", cur.tag, cur.type); } else { return isStartOfChunk(prev.tag, prev.type, cur.tag, cur.type); } }
boolean prevCorrectEnded = chunker.isEndOfChunk(prevCorrect, correct); boolean prevGuessEnded = chunker.isEndOfChunk(prevGuess, guess); if (prevCorrectEnded && prevGuessEnded && prevGuess.typeMatches(prevCorrect)) { inCorrect=false; boolean correctStarted = LabeledChunkIdentifier.isStartOfChunk(prevCorrect, correct); boolean guessStarted = LabeledChunkIdentifier.isStartOfChunk(prevGuess, guess); if ( correctStarted && guessStarted && guess.typeMatches(correct)) { inCorrect = true; if (chunker.isIgnoreProvidedTag()) { if (guess.typeMatches(correct)) { tokensCorrect++;
CoreLabel token = tokens.get(i); String label = (String) token.get(labelKey); LabelTagType curTagType = getTagType(label); if (isEndOfChunk(prevTagType, curTagType)) { int tokenEnd = i; CoreMap chunk = ChunkAnnotationUtils.getAnnotatedChunk(tokens, tokenBegin, tokenEnd, totalTokensOffset, tokenBegin = -1; if (isStartOfChunk(prevTagType, curTagType)) { if (tokenBegin >= 0) { throw new RuntimeException("New chunk started, prev chunk not ended yet!");
} else { MultiClassChunkEvalStats mstats = new MultiClassChunkEvalStats(backgroundLabel); mstats.getChunker().setDefaultPosTag(defaultPosTag); mstats.getChunker().setIgnoreProvidedTag(ignoreProvidedTag); stats = mstats;
/** * Returns whether a chunk ended between the previous and current token. * * @param prev - the label/tag/type of the previous token * @param cur - the label/tag/type of the current token * @return true if the previous token was the last token of a chunk */ public static boolean isEndOfChunk(LabelTagType prev, LabelTagType cur) { if (prev == null) return false; return isEndOfChunk(prev.tag, prev.type, cur.tag, cur.type); }
@Override protected void addGuess(String guess, String trueLabel, boolean addUnknownLabels) { LabeledChunkIdentifier.LabelTagType guessTagType = chunker.getTagType(guess); LabeledChunkIdentifier.LabelTagType correctTagType = chunker.getTagType(trueLabel); addGuess(guessTagType, correctTagType, addUnknownLabels); }
@SuppressWarnings({"UnusedDeclaration", "unchecked"}) public EntityMentionsAnnotator(String name, Properties props) { // if the user has supplied custom CoreAnnotations for the ner tags and entity mentions override the default keys try { if (props.containsKey(name + ".nerCoreAnnotation")) { nerCoreAnnotationClass = (Class<? extends CoreAnnotation<String>>) Class.forName(props.getProperty(name + ".nerCoreAnnotation")); } if (props.containsKey(name + ".nerNormalizedCoreAnnotation")) { nerNormalizedCoreAnnotationClass = (Class<? extends CoreAnnotation<String>>) Class.forName(props.getProperty(name + ".nerNormalizedCoreAnnotation")); } if (props.containsKey(name + ".mentionsCoreAnnotation")) { mentionsCoreAnnotationClass = (Class<? extends CoreAnnotation<List<CoreMap>>>) Class.forName(props.getProperty(name + ".mentionsCoreAnnotation")); } } catch (ClassNotFoundException e) { log.error(e.getMessage()); } chunkIdentifier = new LabeledChunkIdentifier(); doAcronyms = Boolean.parseBoolean(props.getProperty(name + ".acronyms", props.getProperty("acronyms", "false"))); // set up language info, this is needed for handling creating pronominal mentions entityMentionsLanguage = LanguageInfo.getLanguageFromString(props.getProperty(name+".language", "en")); }
/** * Returns whether a chunk started between the previous and current token * @param prev - the label/tag/type of the previous token * @param cur - the label/tag/type of the current token * @return true if the current token was the first token of a chunk */ public static boolean isStartOfChunk(LabelTagType prev, LabelTagType cur) { if (prev == null) { return isStartOfChunk("O", "O", cur.tag, cur.type); } else { return isStartOfChunk(prev.tag, prev.type, cur.tag, cur.type); } }
public MultiClassChunkEvalStats(String negLabel) { super(negLabel); chunker = new LabeledChunkIdentifier(); chunker.setNegLabel(negLabel); }
boolean prevCorrectEnded = chunker.isEndOfChunk(prevCorrect, correct); boolean prevGuessEnded = chunker.isEndOfChunk(prevGuess, guess); if (prevCorrectEnded && prevGuessEnded && prevGuess.typeMatches(prevCorrect)) { inCorrect=false; boolean correctStarted = chunker.isStartOfChunk(prevCorrect, correct); boolean guessStarted = chunker.isStartOfChunk(prevGuess, guess); if ( correctStarted && guessStarted && guess.typeMatches(correct)) { inCorrect = true; if (chunker.isIgnoreProvidedTag()) { if (guess.typeMatches(correct)) { tokensCorrect++;
CoreLabel token = tokens.get(i); String label = (String) token.get(labelKey); LabelTagType curTagType = getTagType(label); boolean isCompatible = true; if (checkTokensCompatible != null) { isCompatible = checkTokensCompatible.test(p); if (isEndOfChunk(prevTagType, curTagType) || !isCompatible) { int tokenEnd = i; if (tokenBegin >= 0 && tokenEnd > tokenBegin) { if (isStartOfChunk(prevTagType, curTagType) || (!isCompatible && isChunk(curTagType))) { if (tokenBegin >= 0) { throw new RuntimeException("New chunk started, prev chunk not ended yet!");