TokenAnnotator
Code IndexAdd Codota to your IDE (free)

Best code snippets using org.cleartk.token.tokenizer.TokenAnnotator(Showing top 15 results out of 315)

origin: ClearTK/cleartk

 public static void main(String[] args) throws Exception {

  File filesDirectory = new File(args[0]);

  SimplePipeline.runPipeline(
    UriCollectionReader.getCollectionReaderFromDirectory(filesDirectory),
    UriToDocumentTextAnnotator.getDescription(),
    SentenceAnnotator.getDescription(),
    TokenAnnotator.getDescription(),
    PosTaggerAnnotator.getDescription(),
    ParserAnnotator.getDescription());
 }
}
origin: org.cleartk/cleartk-token

public void process(JCas jCas) throws AnalysisEngineProcessException {
 try {
  if (!typesInitialized)
   initializeTypes(jCas);
  if (windowType != null) {
   FSIterator<Annotation> windows = jCas.getAnnotationIndex(windowType).iterator();
   while (windows.hasNext()) {
    Annotation window = windows.next();
    List<Token> pojoTokens = tokenizer.getTokens(window.getCoveredText());
    createTokens(pojoTokens, window.getBegin(), jCas);
   }
  } else {
   String text = jCas.getDocumentText();
   List<Token> pojoTokens = tokenizer.getTokens(text);
   createTokens(pojoTokens, 0, jCas);
  }
 } catch (Exception e) {
  throw new AnalysisEngineProcessException(e);
 }
}
origin: ClearTK/cleartk

public AggregateBuilder buildTrainingAggregate() throws ResourceInitializationException {
 AggregateBuilder builder = new AggregateBuilder();
 builder.add(UriToDocumentTextAnnotator.getDescription());
 // NLP pre-processing components
 builder.add(SentenceAnnotator.getDescription());
 builder.add(TokenAnnotator.getDescription());
 builder.add(PosTaggerAnnotator.getDescription());
 builder.add(DefaultSnowballStemmer.getDescription("English"));
 // This will extract the features for summarization
 builder.add(AnalysisEngineFactory.createEngineDescription(
   SumBasicAnnotator.class,
   DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
   SumBasicDataWriter.class.getName(),
   DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
   this.modelDirectory.getPath(),
   SumBasicAnnotator.PARAM_TOKEN_FIELD,
   this.tokenField.name(),
   SumBasicAnnotator.PARAM_STOPWORDS_URI,
   stopwordsFile.toURI()));
 // Save off xmis for re-reading
 builder.add(AnalysisEngineFactory.createEngineDescription(
   XmiWriter.class,
   XmiWriter.PARAM_OUTPUT_DIRECTORY,
   xmiDirectory.getPath()));
 return builder;
}
origin: ClearTK/cleartk

public static void main(String[] args) throws Exception {
 Options options = CliFactory.parseArguments(Options.class, args);
 CollectionReader reader = UriCollectionReader.getCollectionReaderFromDirectory(options.getInputDirectory());
 AnalysisEngineDescription uriToText = UriToDocumentTextAnnotator.getDescription();
 AnalysisEngineDescription sentences = SentenceAnnotator.getDescription();
 AnalysisEngineDescription tokenizer = TokenAnnotator.getDescription();
 AnalysisEngineDescription posTagger = PosTaggerAnnotator.getDescription();
 AnalysisEngineDescription lineWriter = AnalysisEngineFactory.createEngineDescription(
   LineWriter.class,
   LineWriter.PARAM_OUTPUT_FILE_NAME,
   options.getOutputFile(),
   LineWriter.PARAM_OUTPUT_ANNOTATION_CLASS_NAME,
   Token.class.getName(),
   LineWriter.PARAM_ANNOTATION_WRITER_CLASS_NAME,
   TokenAnnotationWriter.class.getName());
 SimplePipeline.runPipeline(reader, uriToText, sentences, tokenizer, posTagger, lineWriter);
 System.out.println("results written to " + options.getOutputFile());
}
origin: ClearTK/cleartk

FilesCollectionReader.getCollectionReader(inputFileOrDir),
SentenceAnnotator.getDescription(),
TokenAnnotator.getDescription(),
PosTaggerAnnotator.getDescription(),
DefaultSnowballStemmer.getDescription("English"),
origin: ClearTK/cleartk

public static void main(String[] args) throws Exception {
 Options options = CliFactory.parseArguments(Options.class, args);
 // a reader that loads the URIs of the text file
 CollectionReader reader = UriCollectionReader.getCollectionReaderFromFiles(Arrays.asList(options.getTextFile()));
 // assemble the classification pipeline
 AggregateBuilder aggregate = new AggregateBuilder();
 // an annotator that loads the text from the training file URIs
 aggregate.add(UriToDocumentTextAnnotator.getDescription());
 // annotators that identify sentences, tokens and part-of-speech tags in the text
 aggregate.add(SentenceAnnotator.getDescription());
 aggregate.add(TokenAnnotator.getDescription());
 aggregate.add(PosTaggerAnnotator.getDescription());
 // our NamedEntityChunker annotator, configured to classify on the new texts
 aggregate.add(AnalysisEngineFactory.createEngineDescription(
   NamedEntityChunker.class,
   CleartkSequenceAnnotator.PARAM_IS_TRAINING,
   false,
   GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
   JarClassifierBuilder.getModelJarFile(options.getModelDirectory())));
 // a very simple annotator that just prints out any named entities we found
 aggregate.add(AnalysisEngineFactory.createEngineDescription(PrintNamedEntityMentions.class));
 // run the classification pipeline on the new texts
 SimplePipeline.runPipeline(reader, aggregate.createAggregateDescription());
}
origin: ClearTK/cleartk

UriToDocumentTextAnnotator.getDescription(),
SentenceAnnotator.getDescription(),
TokenAnnotator.getDescription(),
PosTaggerAnnotator.getDescription(),
DefaultSnowballStemmer.getDescription("English"),
origin: ClearTK/cleartk

  SentenceAnnotator.PARAM_WINDOW_CLASS_NAMES,
  new Class<?>[] { Text.class }));
builder.add(TokenAnnotator.getDescription());
builder.add(PosTaggerAnnotator.getDescription());
builder.add(DefaultSnowballStemmer.getDescription("English"));
origin: ClearTK/cleartk

  SentenceAnnotator.PARAM_WINDOW_CLASS_NAMES,
  new Class<?>[] { Text.class }));
preprocess.add(TokenAnnotator.getDescription());
preprocess.add(PosTaggerAnnotator.getDescription());
preprocess.add(DefaultSnowballStemmer.getDescription("English"));
origin: ClearTK/cleartk

builder.add(TokenAnnotator.getDescription()); // Tokenization
builder.add(DefaultSnowballStemmer.getDescription("English")); // Stemming
origin: ClearTK/cleartk

aggregate.add(TokenAnnotator.getDescription());
aggregate.add(PosTaggerAnnotator.getDescription());
aggregate.add(AnalysisEngineFactory.createEngineDescription(
origin: ClearTK/cleartk

builder.add(TokenAnnotator.getDescription());
origin: ClearTK/cleartk

builder.add(TokenAnnotator.getDescription()); // Tokenization
builder.add(DefaultSnowballStemmer.getDescription("English")); // Stemming
origin: ClearTK/cleartk

builder.add(TokenAnnotator.getDescription());
builder.add(DefaultSnowballStemmer.getDescription("English"));
origin: ClearTK/cleartk

public void process(JCas jCas) throws AnalysisEngineProcessException {
 try {
  if (!typesInitialized)
   initializeTypes(jCas);
  if (windowType != null) {
   FSIterator<Annotation> windows = jCas.getAnnotationIndex(windowType).iterator();
   while (windows.hasNext()) {
    Annotation window = windows.next();
    List<Token> pojoTokens = tokenizer.getTokens(window.getCoveredText());
    createTokens(pojoTokens, window.getBegin(), jCas);
   }
  } else {
   String text = jCas.getDocumentText();
   List<Token> pojoTokens = tokenizer.getTokens(text);
   createTokens(pojoTokens, 0, jCas);
  }
 } catch (Exception e) {
  throw new AnalysisEngineProcessException(e);
 }
}
org.cleartk.token.tokenizerTokenAnnotator

Javadoc


Copyright (c) 2007-2008, Regents of the University of Colorado
All rights reserved.

Most used methods

  • getDescription
  • createTokens
  • initializeTypes

Popular classes and methods

  • getContentResolver (Context)
  • getOriginalFilename (MultipartFile)
  • getSharedPreferences (Context)
  • BufferedInputStream (java.io)
    Wraps an existing InputStream and buffers the input. Expensive interaction with the underlying input
  • FileReader (java.io)
    A specialized Reader that reads from a file in the file system. All read requests made by calling me
  • BigInteger (java.math)
    An immutable arbitrary-precision signed integer.FAST CRYPTOGRAPHY This implementation is efficient f
  • MalformedURLException (java.net)
    Thrown to indicate that a malformed URL has occurred. Either no legal protocol could be found in a s
  • SQLException (java.sql)
    An exception that indicates a failed JDBC operation. It provides the following information about pro
  • Set (java.util)
    A Set is a data structure which does not allow duplicate elements.
  • JList (javax.swing)

For IntelliJ IDEA,
Android Studio or Eclipse

  • Codota IntelliJ IDEA pluginCodota Android Studio pluginCode IndexSign in
  • EnterpriseFAQAboutContact Us
  • Terms of usePrivacy policyCodeboxFind Usages
Add Codota to your IDE (free)