@Override protected Map<SUB_CHUNK_TYPE, String> getSubChunkToOutcomeMap( JCas jCas, List<SUB_CHUNK_TYPE> chunkComponents, List<CHUNK_TYPE> chunks) { Feature feature = this.getFeature(jCas); Map<SUB_CHUNK_TYPE, String> subChunkToOutcome = new HashMap<SUB_CHUNK_TYPE, String>(); for (CHUNK_TYPE chunk : chunks) { String suffix = this.getOutcomeSuffix(chunk, feature); for (SUB_CHUNK_TYPE chunkComponent : JCasUtil.selectCovered(this.subChunkClass, chunk)) { subChunkToOutcome.put(chunkComponent, "I" + suffix); } } return subChunkToOutcome; }
@Override public List<String> createOutcomes( JCas jCas, List<SUB_CHUNK_TYPE> subChunks, List<CHUNK_TYPE> chunks) throws AnalysisEngineProcessException { // get the mapping from sub-chunks to their outcomes Map<SUB_CHUNK_TYPE, String> subChunkToOutcome; subChunkToOutcome = this.getSubChunkToOutcomeMap(jCas, subChunks, chunks); // create one outcome for each sub-chunk by combining the prefix and feature value List<String> outcomes = new ArrayList<String>(); for (SUB_CHUNK_TYPE subChunk : subChunks) { String outcome = subChunkToOutcome.get(subChunk); if (outcome == null) { outcome = "O"; } outcomes.add(outcome); } return outcomes; }
@Override protected Map<SUB_CHUNK_TYPE, String> getSubChunkToOutcomeMap( JCas jCas, List<SUB_CHUNK_TYPE> chunkComponents, List<CHUNK_TYPE> chunks) { Feature feature = this.getFeature(jCas); Map<SUB_CHUNK_TYPE, String> subChunkToOutcome = new HashMap<SUB_CHUNK_TYPE, String>(); for (CHUNK_TYPE chunk : chunks) { String suffix = this.getOutcomeSuffix(chunk, feature); boolean isBegin = true; for (SUB_CHUNK_TYPE chunkComponent : JCasUtil.selectCovered(this.subChunkClass, chunk)) { if (isBegin) { subChunkToOutcome.put(chunkComponent, "B" + suffix); isBegin = false; } else { subChunkToOutcome.put(chunkComponent, "I" + suffix); } } } return subChunkToOutcome; }
@Override public void initialize(UimaContext context) throws ResourceInitializationException { RelationExtractorAnnotator.allowClassifierModelOnClasspath(context); super.initialize(context); this.chunking = new BioChunking<BaseToken, Modifier>(BaseToken.class, Modifier.class, "typeID"); }
List<String> outcomes = this.chunking.createOutcomes(jCas, tokens, namedEntityMentions); this.chunking.createChunks(jCas, tokens, outcomes);
if (this.isTraining()) { List<Modifier> modifiers = JCasUtil.selectCovered(jCas, Modifier.class, sentence); outcomes = this.chunking.createOutcomes(jCas, tokens, modifiers); this.chunking.createChunks(jCas, tokens, outcomes);
@Override protected Map<SUB_CHUNK_TYPE, String> getSubChunkToOutcomeMap( JCas jCas, List<SUB_CHUNK_TYPE> chunkComponents, List<CHUNK_TYPE> chunks) { Feature feature = this.getFeature(jCas); Map<SUB_CHUNK_TYPE, String> subChunkToOutcome = new HashMap<SUB_CHUNK_TYPE, String>(); for (CHUNK_TYPE chunk : chunks) { String suffix = this.getOutcomeSuffix(chunk, feature); List<? extends SUB_CHUNK_TYPE> subChunks = JCasUtil.selectCovered(this.subChunkClass, chunk); int nSubChunks = subChunks.size(); if (nSubChunks == 1) { subChunkToOutcome.put(subChunks.get(0), "S" + suffix); } else { for (int i = 0; i < nSubChunks; ++i) { SUB_CHUNK_TYPE subChunk = subChunks.get(i); if (i == 0) { subChunkToOutcome.put(subChunk, "B" + suffix); } else if (i == nSubChunks - 1) { subChunkToOutcome.put(subChunk, "E" + suffix); } else { subChunkToOutcome.put(subChunk, "I" + suffix); } } } } return subChunkToOutcome; }
chunkOutcomes.add(new ChunkOutcome(outcome)); chunkOutcomes.add(new ChunkOutcome("O")); ChunkOutcome curr = chunkOutcomes.get(end); ChunkOutcome next = chunkOutcomes.get(end + 1); if (this.isEndOfChunk(curr.prefix, curr.label, next.prefix, next.label)) { break;
@Override public void initialize(UimaContext context) throws ResourceInitializationException { RelationExtractorAnnotator.allowClassifierModelOnClasspath(context); super.initialize(context); this.chunking = new BioChunking<BaseToken, Modifier>(BaseToken.class, Modifier.class, "typeID"); }
List<String> outcomes = this.chunking.createOutcomes(jCas, tokens, times); this.dataWriter.write(Instances.toInstances(outcomes, featureLists)); this.chunking.createChunks(jCas, tokens, outcomes);
if (this.isTraining()) { List<Modifier> modifiers = JCasUtil.selectCovered(jCas, Modifier.class, sentence); outcomes = this.chunking.createOutcomes(jCas, tokens, modifiers); this.chunking.createChunks(jCas, tokens, outcomes);
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); // define chunking type this.chunking = new BioChunking<Token, Time>(Token.class, Time.class); // add features: word, character pattern, stem, pos this.tokenFeatureExtractors = Lists.newArrayList(); this.tokenFeatureExtractors.add(new CoveredTextExtractor<Token>()); NamedFeatureExtractor1<Token> ex = CharacterCategoryPatternFunction.createExtractor(); this.tokenFeatureExtractors.add(ex); this.tokenFeatureExtractors.add(new TimeWordsExtractor<Token>()); this.tokenFeatureExtractors.add(new TypePathExtractor<Token>(Token.class, "stem")); this.tokenFeatureExtractors.add(new TypePathExtractor<Token>(Token.class, "pos")); // add window of features before and after this.contextFeatureExtractors = Lists.newArrayList(); for (FeatureExtractor1<Token> extractor : this.tokenFeatureExtractors) { this.contextFeatureExtractors.add(new CleartkExtractor<Token, Token>(Token.class, extractor, new Preceding( 3), new Following(3))); } }
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); // the token feature extractor: text, char pattern (uppercase, digits, etc.), and part-of-speech this.extractor = new CombinedExtractor1<Token>( new FeatureFunctionExtractor<Token>( new CoveredTextExtractor<Token>(), new CharacterCategoryPatternFunction<Token>(PatternType.REPEATS_MERGED)), new TypePathExtractor<Token>(Token.class, "pos")); // the context feature extractor: the features above for the 3 preceding and 3 following tokens this.contextExtractor = new CleartkExtractor<Token, Token>( Token.class, this.extractor, new Preceding(3), new Following(3)); // the chunking definition: Tokens will be combined to form NamedEntityMentions, with labels // from the "mentionType" attribute so that we get B-location, I-person, etc. this.chunking = new BioChunking<Token, NamedEntityMention>( Token.class, NamedEntityMention.class, "mentionType"); }