org.cleartk.ml.chunking java code examples

@Override
protected Map<SUB_CHUNK_TYPE, String> getSubChunkToOutcomeMap(
  JCas jCas,
  List<SUB_CHUNK_TYPE> chunkComponents,
  List<CHUNK_TYPE> chunks) {
 Feature feature = this.getFeature(jCas);
 Map<SUB_CHUNK_TYPE, String> subChunkToOutcome = new HashMap<SUB_CHUNK_TYPE, String>();
 for (CHUNK_TYPE chunk : chunks) {
  String suffix = this.getOutcomeSuffix(chunk, feature);
  for (SUB_CHUNK_TYPE chunkComponent : JCasUtil.selectCovered(this.subChunkClass, chunk)) {
   subChunkToOutcome.put(chunkComponent, "I" + suffix);
  }
 }
 return subChunkToOutcome;
}

@Override
public List<String> createOutcomes(
  JCas jCas,
  List<SUB_CHUNK_TYPE> subChunks,
  List<CHUNK_TYPE> chunks) throws AnalysisEngineProcessException {
 // get the mapping from sub-chunks to their outcomes
 Map<SUB_CHUNK_TYPE, String> subChunkToOutcome;
 subChunkToOutcome = this.getSubChunkToOutcomeMap(jCas, subChunks, chunks);
 // create one outcome for each sub-chunk by combining the prefix and feature value
 List<String> outcomes = new ArrayList<String>();
 for (SUB_CHUNK_TYPE subChunk : subChunks) {
  String outcome = subChunkToOutcome.get(subChunk);
  if (outcome == null) {
   outcome = "O";
  }
  outcomes.add(outcome);
 }
 return outcomes;
}

@Override
protected Map<SUB_CHUNK_TYPE, String> getSubChunkToOutcomeMap(
  JCas jCas,
  List<SUB_CHUNK_TYPE> chunkComponents,
  List<CHUNK_TYPE> chunks) {
 Feature feature = this.getFeature(jCas);
 Map<SUB_CHUNK_TYPE, String> subChunkToOutcome = new HashMap<SUB_CHUNK_TYPE, String>();
 for (CHUNK_TYPE chunk : chunks) {
  String suffix = this.getOutcomeSuffix(chunk, feature);
  boolean isBegin = true;
  for (SUB_CHUNK_TYPE chunkComponent : JCasUtil.selectCovered(this.subChunkClass, chunk)) {
   if (isBegin) {
    subChunkToOutcome.put(chunkComponent, "B" + suffix);
    isBegin = false;
   } else {
    subChunkToOutcome.put(chunkComponent, "I" + suffix);
   }
  }
 }
 return subChunkToOutcome;
}

@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
 RelationExtractorAnnotator.allowClassifierModelOnClasspath(context);
 super.initialize(context);
 this.chunking = new BioChunking<BaseToken, Modifier>(BaseToken.class, Modifier.class, "typeID");
}

List<String> outcomes = this.chunking.createOutcomes(jCas, tokens, namedEntityMentions);
this.chunking.createChunks(jCas, tokens, outcomes);

if (this.isTraining()) {
 List<Modifier> modifiers = JCasUtil.selectCovered(jCas, Modifier.class, sentence);
 outcomes = this.chunking.createOutcomes(jCas, tokens, modifiers);
 this.chunking.createChunks(jCas, tokens, outcomes);

@Override
protected Map<SUB_CHUNK_TYPE, String> getSubChunkToOutcomeMap(
  JCas jCas,
  List<SUB_CHUNK_TYPE> chunkComponents,
  List<CHUNK_TYPE> chunks) {
 Feature feature = this.getFeature(jCas);
 Map<SUB_CHUNK_TYPE, String> subChunkToOutcome = new HashMap<SUB_CHUNK_TYPE, String>();
 for (CHUNK_TYPE chunk : chunks) {
  String suffix = this.getOutcomeSuffix(chunk, feature);
  List<? extends SUB_CHUNK_TYPE> subChunks = JCasUtil.selectCovered(this.subChunkClass, chunk);
  int nSubChunks = subChunks.size();
  if (nSubChunks == 1) {
   subChunkToOutcome.put(subChunks.get(0), "S" + suffix);
  } else {
   for (int i = 0; i < nSubChunks; ++i) {
    SUB_CHUNK_TYPE subChunk = subChunks.get(i);
    if (i == 0) {
     subChunkToOutcome.put(subChunk, "B" + suffix);
    } else if (i == nSubChunks - 1) {
     subChunkToOutcome.put(subChunk, "E" + suffix);
    } else {
     subChunkToOutcome.put(subChunk, "I" + suffix);
    }
   }
  }
 }
 return subChunkToOutcome;
}

 chunkOutcomes.add(new ChunkOutcome(outcome));
chunkOutcomes.add(new ChunkOutcome("O"));
   ChunkOutcome curr = chunkOutcomes.get(end);
   ChunkOutcome next = chunkOutcomes.get(end + 1);
   if (this.isEndOfChunk(curr.prefix, curr.label, next.prefix, next.label)) {
    break;

@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
 RelationExtractorAnnotator.allowClassifierModelOnClasspath(context);
 super.initialize(context);
 this.chunking = new BioChunking<BaseToken, Modifier>(BaseToken.class, Modifier.class, "typeID");
}

List<String> outcomes = this.chunking.createOutcomes(jCas, tokens, times);
this.dataWriter.write(Instances.toInstances(outcomes, featureLists));
this.chunking.createChunks(jCas, tokens, outcomes);

if (this.isTraining()) {
 List<Modifier> modifiers = JCasUtil.selectCovered(jCas, Modifier.class, sentence);
 outcomes = this.chunking.createOutcomes(jCas, tokens, modifiers);
 this.chunking.createChunks(jCas, tokens, outcomes);

@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
 super.initialize(context);
 // define chunking type
 this.chunking = new BioChunking<Token, Time>(Token.class, Time.class);
 // add features: word, character pattern, stem, pos
 this.tokenFeatureExtractors = Lists.newArrayList();
 this.tokenFeatureExtractors.add(new CoveredTextExtractor<Token>());
 NamedFeatureExtractor1<Token> ex = CharacterCategoryPatternFunction.createExtractor();
 this.tokenFeatureExtractors.add(ex);
 this.tokenFeatureExtractors.add(new TimeWordsExtractor<Token>());
 this.tokenFeatureExtractors.add(new TypePathExtractor<Token>(Token.class, "stem"));
 this.tokenFeatureExtractors.add(new TypePathExtractor<Token>(Token.class, "pos"));
   
 // add window of features before and after
 this.contextFeatureExtractors = Lists.newArrayList();
 for (FeatureExtractor1<Token> extractor : this.tokenFeatureExtractors) {
  this.contextFeatureExtractors.add(new CleartkExtractor<Token, Token>(Token.class, extractor, new Preceding(
    3), new Following(3)));
 }
}

@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
 super.initialize(context);
 // the token feature extractor: text, char pattern (uppercase, digits, etc.), and part-of-speech
 this.extractor = new CombinedExtractor1<Token>(
   new FeatureFunctionExtractor<Token>(
     new CoveredTextExtractor<Token>(),
     new CharacterCategoryPatternFunction<Token>(PatternType.REPEATS_MERGED)),
   new TypePathExtractor<Token>(Token.class, "pos"));
 // the context feature extractor: the features above for the 3 preceding and 3 following tokens
 this.contextExtractor = new CleartkExtractor<Token, Token>(
   Token.class,
   this.extractor,
   new Preceding(3),
   new Following(3));
 // the chunking definition: Tokens will be combined to form NamedEntityMentions, with labels
 // from the "mentionType" attribute so that we get B-location, I-person, etc.
 this.chunking = new BioChunking<Token, NamedEntityMention>(
   Token.class,
   NamedEntityMention.class,
   "mentionType");
}

How to use org.cleartk.ml.chunking

Best Java code snippets using org.cleartk.ml.chunking (Showing top 13 results out of 315)