org.cleartk.token.tokenizer.TokenAnnotator java code examples

 public static void main(String[] args) throws Exception {

  File filesDirectory = new File(args[0]);

  SimplePipeline.runPipeline(
    UriCollectionReader.getCollectionReaderFromDirectory(filesDirectory),
    UriToDocumentTextAnnotator.getDescription(),
    SentenceAnnotator.getDescription(),
    TokenAnnotator.getDescription(),
    PosTaggerAnnotator.getDescription(),
    ParserAnnotator.getDescription());
 }
}

public void process(JCas jCas) throws AnalysisEngineProcessException {
 try {
  if (!typesInitialized)
   initializeTypes(jCas);
  if (windowType != null) {
   FSIterator<Annotation> windows = jCas.getAnnotationIndex(windowType).iterator();
   while (windows.hasNext()) {
    Annotation window = windows.next();
    List<Token> pojoTokens = tokenizer.getTokens(window.getCoveredText());
    createTokens(pojoTokens, window.getBegin(), jCas);
   }
  } else {
   String text = jCas.getDocumentText();
   List<Token> pojoTokens = tokenizer.getTokens(text);
   createTokens(pojoTokens, 0, jCas);
  }
 } catch (Exception e) {
  throw new AnalysisEngineProcessException(e);
 }
}

UriToDocumentTextAnnotator.getDescription(),
SentenceAnnotator.getDescription(),
TokenAnnotator.getDescription(),
PosTaggerAnnotator.getDescription(),
DefaultSnowballStemmer.getDescription("English"),

public static void main(String[] args) throws UIMAException, IOException {
 Options options = new Options();
 options.parseOptions(args);
 CollectionReader reader = UriCollectionReader.getCollectionReaderFromDirectory(options.inputDirectoryName);
 AnalysisEngineDescription uriToText = UriToDocumentTextAnnotator.getDescription();
 AnalysisEngineDescription sentences = SentenceAnnotator.getDescription();
 AnalysisEngineDescription tokenizer = TokenAnnotator.getDescription();
 AnalysisEngineDescription posTagger = PosTaggerAnnotator.getDescription();
 AnalysisEngineDescription lineWriter = AnalysisEngineFactory.createPrimitiveDescription(
   LineWriter.class,
   LineWriter.PARAM_OUTPUT_FILE_NAME,
   options.outputFileName,
   LineWriter.PARAM_OUTPUT_ANNOTATION_CLASS_NAME,
   Token.class.getName(),
   LineWriter.PARAM_ANNOTATION_WRITER_CLASS_NAME,
   TokenAnnotationWriter.class.getName());
 SimplePipeline.runPipeline(reader, uriToText, sentences, tokenizer, posTagger, lineWriter);
 System.out.println("results written to " + options.outputFileName);
}

 public static void main(String[] args) throws Exception {

  File filesDirectory = new File(args[0]);
  String outputDirectory = args[1];

  SimplePipeline.runPipeline(
    UriCollectionReader.getCollectionReaderFromDirectory(filesDirectory),
    UriToDocumentTextAnnotator.getDescription(),
    SentenceAnnotator.getDescription(),
    TokenAnnotator.getDescription(),
    PosTaggerAnnotator.getDescription(),
    ParserAnnotator.getDescription(),
    AnalysisEngineFactory.createPrimitiveDescription(
      XWriter.class,
      XWriter.PARAM_OUTPUT_DIRECTORY_NAME,
      outputDirectory,
      XWriter.PARAM_FILE_NAMER_CLASS_NAME,
      ViewURIFileNamer.class.getName()));
 }
}

public static void main(String[] args) throws Exception {
 Options options = CliFactory.parseArguments(Options.class, args);
 CollectionReader reader = UriCollectionReader.getCollectionReaderFromDirectory(options.getInputDirectory());
 AnalysisEngineDescription uriToText = UriToDocumentTextAnnotator.getDescription();
 AnalysisEngineDescription sentences = SentenceAnnotator.getDescription();
 AnalysisEngineDescription tokenizer = TokenAnnotator.getDescription();
 AnalysisEngineDescription posTagger = PosTaggerAnnotator.getDescription();
 AnalysisEngineDescription lineWriter = AnalysisEngineFactory.createEngineDescription(
   LineWriter.class,
   LineWriter.PARAM_OUTPUT_FILE_NAME,
   options.getOutputFile(),
   LineWriter.PARAM_OUTPUT_ANNOTATION_CLASS_NAME,
   Token.class.getName(),
   LineWriter.PARAM_ANNOTATION_WRITER_CLASS_NAME,
   TokenAnnotationWriter.class.getName());
 SimplePipeline.runPipeline(reader, uriToText, sentences, tokenizer, posTagger, lineWriter);
 System.out.println("results written to " + options.getOutputFile());
}

builder.add(TokenAnnotator.getDescription());

public AggregateBuilder buildTrainingAggregate() throws ResourceInitializationException {
 AggregateBuilder builder = new AggregateBuilder();
 builder.add(UriToDocumentTextAnnotator.getDescription());
 // NLP pre-processing components
 builder.add(SentenceAnnotator.getDescription());
 builder.add(TokenAnnotator.getDescription());
 builder.add(PosTaggerAnnotator.getDescription());
 builder.add(DefaultSnowballStemmer.getDescription("English"));
 // This will extract the features for summarization
 builder.add(AnalysisEngineFactory.createEngineDescription(
   SumBasicAnnotator.class,
   DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
   SumBasicDataWriter.class.getName(),
   DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
   this.modelDirectory.getPath(),
   SumBasicAnnotator.PARAM_TOKEN_FIELD,
   this.tokenField.name(),
   SumBasicAnnotator.PARAM_STOPWORDS_URI,
   stopwordsFile.toURI()));
 // Save off xmis for re-reading
 builder.add(AnalysisEngineFactory.createEngineDescription(
   XmiWriter.class,
   XmiWriter.PARAM_OUTPUT_DIRECTORY,
   xmiDirectory.getPath()));
 return builder;
}

builder.add(TokenAnnotator.getDescription());

FilesCollectionReader.getCollectionReader(inputFileOrDir),
SentenceAnnotator.getDescription(),
TokenAnnotator.getDescription(),
PosTaggerAnnotator.getDescription(),
DefaultSnowballStemmer.getDescription("English"),

builder.add(TokenAnnotator.getDescription()); // Tokenization
builder.add(DefaultSnowballStemmer.getDescription("English")); // Stemming

public static void main(String[] args) throws Exception {
 Options options = new Options();
 options.parseOptions(args);
 // a reader that loads the URIs of the text file
 CollectionReader reader = UriCollectionReader.getCollectionReaderFromFiles(Arrays.asList(options.textFile));
 // assemble the classification pipeline
 AggregateBuilder aggregate = new AggregateBuilder();
 // an annotator that loads the text from the training file URIs
 aggregate.add(UriToDocumentTextAnnotator.getDescription());
 // annotators that identify sentences, tokens and part-of-speech tags in the text
 aggregate.add(SentenceAnnotator.getDescription());
 aggregate.add(TokenAnnotator.getDescription());
 aggregate.add(PosTaggerAnnotator.getDescription());
 // our NamedEntityChunker annotator, configured to classify on the new texts
 aggregate.add(AnalysisEngineFactory.createPrimitiveDescription(
   NamedEntityChunker.class,
   CleartkSequenceAnnotator.PARAM_IS_TRAINING,
   false,
   GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
   new File(options.modelDirectory, "model.jar")));
 // a very simple annotator that just prints out any named entities we found
 aggregate.add(AnalysisEngineFactory.createPrimitiveDescription(PrintNamedEntityMentions.class));
 // run the classification pipeline on the new texts
 SimplePipeline.runPipeline(reader, aggregate.createAggregateDescription());
}

builder.add(TokenAnnotator.getDescription()); // Tokenization
builder.add(DefaultSnowballStemmer.getDescription("English")); // Stemming

builder.add(TokenAnnotator.getDescription()); // Tokenization
builder.add(DefaultSnowballStemmer.getDescription("English")); // Stemming

public static void main(String[] args) throws Exception {
 Options options = CliFactory.parseArguments(Options.class, args);
 // a reader that loads the URIs of the text file
 CollectionReader reader = UriCollectionReader.getCollectionReaderFromFiles(Arrays.asList(options.getTextFile()));
 // assemble the classification pipeline
 AggregateBuilder aggregate = new AggregateBuilder();
 // an annotator that loads the text from the training file URIs
 aggregate.add(UriToDocumentTextAnnotator.getDescription());
 // annotators that identify sentences, tokens and part-of-speech tags in the text
 aggregate.add(SentenceAnnotator.getDescription());
 aggregate.add(TokenAnnotator.getDescription());
 aggregate.add(PosTaggerAnnotator.getDescription());
 // our NamedEntityChunker annotator, configured to classify on the new texts
 aggregate.add(AnalysisEngineFactory.createEngineDescription(
   NamedEntityChunker.class,
   CleartkSequenceAnnotator.PARAM_IS_TRAINING,
   false,
   GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
   JarClassifierBuilder.getModelJarFile(options.getModelDirectory())));
 // a very simple annotator that just prints out any named entities we found
 aggregate.add(AnalysisEngineFactory.createEngineDescription(PrintNamedEntityMentions.class));
 // run the classification pipeline on the new texts
 SimplePipeline.runPipeline(reader, aggregate.createAggregateDescription());
}

builder.add(TokenAnnotator.getDescription());
builder.add(DefaultSnowballStemmer.getDescription("English"));

aggregate.add(TokenAnnotator.getDescription());
aggregate.add(PosTaggerAnnotator.getDescription());
aggregate.add(AnalysisEngineFactory.createEngineDescription(

builder.add(TokenAnnotator.getDescription());
builder.add(DefaultSnowballStemmer.getDescription("English"));

builder.add(TokenAnnotator.getDescription()); // Tokenization
builder.add(DefaultSnowballStemmer.getDescription("English")); // Stemming

aggregate.add(TokenAnnotator.getDescription());
aggregate.add(PosTaggerAnnotator.getDescription());
aggregate.add(AnalysisEngineFactory.createPrimitiveDescription(

Javadoc

Most used methods

Popular in Java

Running tasks concurrently on multiple threads
getSystemService (Context)
scheduleAtFixedRate (Timer)
notifyDataSetChanged (ArrayAdapter)
OutputStream (java.io)
A writable sink for bytes.Most clients will use output streams that write data to the file system (
URI (java.net)
A Uniform Resource Identifier that identifies an abstract or physical resource, as specified by RFC
Date (java.sql)
A class which can consume and produce dates in SQL Date format. Dates are represented in SQL as yyyy
DateTimeFormat (org.joda.time.format)
Factory that creates instances of DateTimeFormatter from patterns and styles. Datetime formatting i
Logger (org.slf4j)
The org.slf4j.Logger interface is the main user entry point of SLF4J API. It is expected that loggin
JButton (javax.swing)
Top plugins for Android Studio

How to useTokenAnnotator in org.cleartk.token.tokenizer

Best Java code snippets using org.cleartk.token.tokenizer.TokenAnnotator (Showing top 20 results out of 315)

How to use
TokenAnnotator
in
org.cleartk.token.tokenizer