@Override public synchronized void process(JCas jCas) throws AnalysisEngineProcessException { super.process(jCas); }
public static void main(String[] args) throws Exception { File filesDirectory = new File(args[0]); SimplePipeline.runPipeline( UriCollectionReader.getCollectionReaderFromDirectory(filesDirectory), UriToDocumentTextAnnotator.getDescription(), SentenceAnnotator.getDescription(), TokenAnnotator.getDescription(), PosTaggerAnnotator.getDescription(), ParserAnnotator.getDescription()); } }
uriReader, UriToDocumentTextAnnotator.getDescription(), SentenceAnnotator.getDescription(), TokenAnnotator.getDescription(), PosTaggerAnnotator.getDescription(),
public static void main(String[] args) throws Exception { Options options = CliFactory.parseArguments(Options.class, args); CollectionReader reader = UriCollectionReader.getCollectionReaderFromDirectory(options.getInputDirectory()); AnalysisEngineDescription uriToText = UriToDocumentTextAnnotator.getDescription(); AnalysisEngineDescription sentences = SentenceAnnotator.getDescription(); AnalysisEngineDescription tokenizer = TokenAnnotator.getDescription(); AnalysisEngineDescription posTagger = PosTaggerAnnotator.getDescription(); AnalysisEngineDescription lineWriter = AnalysisEngineFactory.createEngineDescription( LineWriter.class, LineWriter.PARAM_OUTPUT_FILE_NAME, options.getOutputFile(), LineWriter.PARAM_OUTPUT_ANNOTATION_CLASS_NAME, Token.class.getName(), LineWriter.PARAM_ANNOTATION_WRITER_CLASS_NAME, TokenAnnotationWriter.class.getName()); SimplePipeline.runPipeline(reader, uriToText, sentences, tokenizer, posTagger, lineWriter); System.out.println("results written to " + options.getOutputFile()); }
@Override public synchronized void process(JCas jCas) throws AnalysisEngineProcessException { super.process(jCas); }
public AggregateBuilder buildTrainingAggregate() throws ResourceInitializationException { AggregateBuilder builder = new AggregateBuilder(); builder.add(UriToDocumentTextAnnotator.getDescription()); // NLP pre-processing components builder.add(SentenceAnnotator.getDescription()); builder.add(TokenAnnotator.getDescription()); builder.add(PosTaggerAnnotator.getDescription()); builder.add(DefaultSnowballStemmer.getDescription("English")); // This will extract the features for summarization builder.add(AnalysisEngineFactory.createEngineDescription( SumBasicAnnotator.class, DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME, SumBasicDataWriter.class.getName(), DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY, this.modelDirectory.getPath(), SumBasicAnnotator.PARAM_TOKEN_FIELD, this.tokenField.name(), SumBasicAnnotator.PARAM_STOPWORDS_URI, stopwordsFile.toURI())); // Save off xmis for re-reading builder.add(AnalysisEngineFactory.createEngineDescription( XmiWriter.class, XmiWriter.PARAM_OUTPUT_DIRECTORY, xmiDirectory.getPath())); return builder; }
@Override public synchronized void process(JCas jCas) throws AnalysisEngineProcessException { super.process(jCas); }
public static void main(String[] args) throws Exception { Options options = CliFactory.parseArguments(Options.class, args); CollectionReader reader = UriCollectionReader.getCollectionReaderFromDirectory(options.getInputDirectory()); AggregateBuilder builder = new AggregateBuilder(); builder.add(UriToDocumentTextAnnotator.getDescription()); builder.add(SentenceAnnotator.getDescription()); builder.add(AnalysisEngineFactory.createEngineDescription( LineWriter.class, LineWriter.PARAM_OUTPUT_FILE_NAME, options.getOutputFile(), LineWriter.PARAM_OUTPUT_ANNOTATION_CLASS_NAME, Sentence.class.getName())); SimplePipeline.runPipeline(reader, builder.createAggregateDescription()); System.out.println("results written to " + options.getOutputFile()); } }
builder.add(SentenceAnnotator.getDescription());
SentenceAnnotator.getDescription(), TokenAnnotator.getDescription(), PosTaggerAnnotator.getDescription(),
builder.add(SentenceAnnotator.getDescription()); // Sentence segmentation
public static void main(String[] args) throws Exception { Options options = CliFactory.parseArguments(Options.class, args); // a reader that loads the URIs of the text file CollectionReader reader = UriCollectionReader.getCollectionReaderFromFiles(Arrays.asList(options.getTextFile())); // assemble the classification pipeline AggregateBuilder aggregate = new AggregateBuilder(); // an annotator that loads the text from the training file URIs aggregate.add(UriToDocumentTextAnnotator.getDescription()); // annotators that identify sentences, tokens and part-of-speech tags in the text aggregate.add(SentenceAnnotator.getDescription()); aggregate.add(TokenAnnotator.getDescription()); aggregate.add(PosTaggerAnnotator.getDescription()); // our NamedEntityChunker annotator, configured to classify on the new texts aggregate.add(AnalysisEngineFactory.createEngineDescription( NamedEntityChunker.class, CleartkSequenceAnnotator.PARAM_IS_TRAINING, false, GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH, JarClassifierBuilder.getModelJarFile(options.getModelDirectory()))); // a very simple annotator that just prints out any named entities we found aggregate.add(AnalysisEngineFactory.createEngineDescription(PrintNamedEntityMentions.class)); // run the classification pipeline on the new texts SimplePipeline.runPipeline(reader, aggregate.createAggregateDescription()); }
aggregate.add(SentenceAnnotator.getDescription()); aggregate.add(TokenAnnotator.getDescription()); aggregate.add(PosTaggerAnnotator.getDescription());
builder.add(SentenceAnnotator.getDescription()); builder.add(TokenAnnotator.getDescription()); builder.add(DefaultSnowballStemmer.getDescription("English"));
builder.add(SentenceAnnotator.getDescription()); // Sentence segmentation