private AnalysisEngineDescription buildPipeline(int stageId, Iterable<AnyObject> pipeline, FlowControllerDescription fcd) throws Exception { AggregateBuilder builder = new AggregateBuilder(null, null, fcd); int phase = 1; for (AnyObject aeDescription : pipeline) { AnalysisEngineDescription description = buildComponent(stageId, phase, aeDescription); builder.add(description); phase++; } return builder.createAggregateDescription(); }
/** * This method provides a convenient way to instantiate an AnalysisEngine where the default view * is mapped to the view name passed into the method. * * @param analysisEngineDescription * the analysis engine description from which the engine is instantiated * @param viewName * the view name to map the default view to * @return an aggregate analysis engine consisting of a single component whose default view is * mapped to the the view named by viewName. * @throws ResourceInitializationException * if a failure occurred during production of the resource. * @see <a href="package-summary.html#InstancesVsDescriptors">Why are descriptors better than * component instances?</a> * @see AggregateBuilder */ public static AnalysisEngine createEngine(AnalysisEngineDescription analysisEngineDescription, String viewName) throws ResourceInitializationException { AggregateBuilder builder = new AggregateBuilder(); builder.add(analysisEngineDescription, CAS.NAME_DEFAULT_SOFA, viewName); return builder.createAggregate(); }
protected AggregateBuilder getXMIReadingPreprocessorAggregateBuilder() throws UIMAException { AggregateBuilder aggregateBuilder = new AggregateBuilder(); aggregateBuilder.add( AnalysisEngineFactory.createEngineDescription( XMIReader.class, XMIReader.PARAM_XMI_DIRECTORY, this.xmiDirectory ) ); return aggregateBuilder; }
/** * Creates a new AnalysisEngineDescription from an Aggregrator that contains all input * AnalysisEngineDescriptions in given order. This is intended for the use of a filter like * {@link CasFilter_ImplBase}; all subsequent analysis engines will only see the documents that * have passed the filter. * * @param aEngines * {@link AnalysisEngineDescription}s that should be aggregated. * @return a single {@link AnalysisEngineDescription} aggregating all the input engines. * @throws ResourceInitializationException * if any input analysis engine cannot be initialized */ public static AnalysisEngineDescription createAggregateBuilderDescription( AnalysisEngineDescription... aEngines) throws ResourceInitializationException { AggregateBuilder aggregateBuilder = new AggregateBuilder(); aggregateBuilder.setFlowControllerDescription(FlowControllerFactory .createFlowControllerDescription(FixedFlowController.class, FixedFlowController.PARAM_ACTION_AFTER_CAS_MULTIPLIER, "drop")); for (AnalysisEngineDescription aEngine : aEngines) { aggregateBuilder.add(aEngine); } return aggregateBuilder.createAggregateDescription(); }
@Override protected void train(CollectionReader collectionReader, File directory) throws Exception { AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder(); aggregateBuilder.add(PolarityCleartkAnalysisEngine.createAnnotatorDescription()); aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(DocumentIDPrinter.class)); aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(DeterministicMarkableAnnotator.class)); aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(RemovePersonMarkables.class)); aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(SetGoldConfidence.class, SetGoldConfidence.PARAM_GOLD_VIEW, GOLD_VIEW_NAME)); aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(MarkableSalienceAnnotator.createDataWriterDescription( LibLinearBooleanOutcomeDataWriter.class, directory ))); SimplePipeline.runPipeline(collectionReader, aggregateBuilder.createAggregate()); // s=0 -> logistic regression with L2-norm (gives probabilistic outputs) String[] optArray = new String[]{ "-s", "0", "-c", "1", "-w1", "1"}; JarClassifierBuilder.trainAndPackage(directory, optArray); }
private static void writeAggregateDescriptions( AggregateBuilder preprocessing, AnalysisEngineDescription ClearNLPDepParserDesc, AnalysisEngineDescription ClearNLPSRLDesc, File outputRoot, String aggregateSuffix) throws ResourceInitializationException, FileNotFoundException, SAXException, IOException { // Append Dependency Parser into aggregate and write description file preprocessing.add(ClearNLPDepParserDesc); preprocessing.createAggregateDescription().toXML(new FileOutputStream(new File(outputRoot, DEP_NAME + aggregateSuffix))); // Append SRL Parser into aggregate and write description file preprocessing.add(ClearNLPSRLDesc); preprocessing.createAggregateDescription().toXML(new FileOutputStream(new File(outputRoot, SRL_NAME + aggregateSuffix))); }
/** * Initialize a pipeline that can be used repeatedly using {@link #run} and {@link #run(String)}. * A pipeline can be extended between builds, but the full pipeline will be rebuilt on each call. * Use of this method is order-specific. * @return this PipelineBuilder * @throws IOException if the pipeline could not be built * @throws UIMAException if the pipeline could not be built */ public PipelineBuilder build() throws IOException, UIMAException { if ( _analysisEngineDesc == null || _pipelineChanged ) { final AggregateBuilder builder = new AggregateBuilder(); _descList.forEach( builder::add ); _descEndList.forEach( builder::add ); _analysisEngineDesc = builder.createAggregateDescription(); } _pipelineChanged = false; return this; }
if(!skipWrite){ AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder(); aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(DocumentIDPrinter.class)); aggregateBuilder.add(PolarityCleartkAnalysisEngine.createAnnotatorDescription()); aggregateBuilder.add(UncertaintyCleartkAnalysisEngine.createAnnotatorDescription()); aggregateBuilder.add(GenericCleartkAnalysisEngine.createAnnotatorDescription()); aggregateBuilder.add(HistoryCleartkAnalysisEngine.createAnnotatorDescription()); aggregateBuilder.add(SubjectCleartkAnalysisEngine.createAnnotatorDescription()); aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(ViewCreatorAnnotator.class, ViewCreatorAnnotator.PARAM_VIEW_NAME, "Baseline")); aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(ParagraphAnnotator.class)); aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(RelationPropagator.class)); aggregateBuilder.add(EventAnnotator.createAnnotatorDescription()); aggregateBuilder.add(BackwardsTimeAnnotator.createAnnotatorDescription("/org/apache/ctakes/temporal/ae/timeannotator/model.jar")); aggregateBuilder.add(DocTimeRelAnnotator.createAnnotatorDescription("/org/apache/ctakes/temporal/ae/doctimerel/model.jar")); if(this.goldMarkables){ aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(CopyGoldMarkablesInChains.class)); }else{ aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(DeterministicMarkableAnnotator.class)); // aggregateBuilder.add(CopyFromGold.getDescription(/*Markable.class,*/ CoreferenceRelation.class, CollectionTextRelation.class)); aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(RemovePersonMarkables.class)); aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(MarkableHeadTreeCreator.class)); aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(CopyCoreferenceRelations.class, CopyCoreferenceRelations.PARAM_GOLD_VIEW, GOLD_VIEW_NAME)); aggregateBuilder.add(MarkableSalienceAnnotator.createAnnotatorDescription("/org/apache/ctakes/temporal/ae/salience/model.jar")); if(this.evalType == EVAL_SYSTEM.MENTION_PAIR){ aggregateBuilder.add(EventCoreferenceAnnotator.createDataWriterDescription(
@Override public void extract() throws Exception { Collection<File> xmis = FileUtils.listFiles( this.xmiDirectory, new UriCollectionReader.RejectSystemFiles(), new UriCollectionReader.RejectSystemDirectories()); // This simply runs the model and tags the extracted sentences in the CAS and writes the output // to a file if specified. AggregateBuilder builder = buildExtractAggregate(); // Run preprocessing and tfidf counts analyzer SimplePipeline.runPipeline(this.getCollectionReader(xmis), builder.createAggregateDescription()); }
public static void main(String[] args) throws Exception { List<File> files = new ArrayList<>(); // notes have the same names as the directories in which they exist for(File anaforaNoteDir : ANAFORA_ANNOTATIONS_DIR.listFiles()) { String noteFileName = anaforaNoteDir.getName(); String noteFullPath = anaforaNoteDir.getAbsolutePath() + "/" + noteFileName; files.add(new File(noteFullPath)); } CollectionReader reader = UriCollectionReader.getCollectionReaderFromFiles(files); AnalysisEngine engine = getXMIWritingPreprocessorAggregateBuilder().createAggregate(); SimplePipeline.runPipeline(reader, engine); }
/** * Builds the tokenized preprocessing pipeline for ClearNLP * @return * @throws InvalidXMLException * @throws IOException */ public static AggregateBuilder getTokenizedAggregateBuilder() throws InvalidXMLException, IOException { AggregateBuilder aggregateBuilder = new AggregateBuilder(); aggregateBuilder.add(loadDescription(SIMPLE_SEGMENTER_PATH)); aggregateBuilder.add(loadDescription(LVG_BASE_TOKEN_ANNOTATOR_PATH)); return aggregateBuilder; }
@Override protected void train(CollectionReader collectionReader, File directory) throws Exception { AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder(); aggregateBuilder.add(PolarityCleartkAnalysisEngine.createAnnotatorDescription()); aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(DocumentIDPrinter.class)); aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(DeterministicMarkableAnnotator.class)); aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(RemovePersonMarkables.class)); aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(SetGoldConfidence.class, SetGoldConfidence.PARAM_GOLD_VIEW, GOLD_VIEW_NAME)); aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(MarkableSalienceAnnotator.createDataWriterDescription( LibLinearBooleanOutcomeDataWriter.class, directory ))); SimplePipeline.runPipeline(collectionReader, aggregateBuilder.createAggregate()); // s=0 -> logistic regression with L2-norm (gives probabilistic outputs) String[] optArray = new String[]{ "-s", "0", "-c", "1", "-w1", "1"}; JarClassifierBuilder.trainAndPackage(directory, optArray); }
private static void writeAggregateDescriptions( AggregateBuilder preprocessing, AnalysisEngineDescription ClearNLPDepParserDesc, AnalysisEngineDescription ClearNLPSRLDesc, File outputRoot, String aggregateSuffix) throws ResourceInitializationException, FileNotFoundException, SAXException, IOException { // Append Dependency Parser into aggregate and write description file preprocessing.add(ClearNLPDepParserDesc); preprocessing.createAggregateDescription().toXML(new FileOutputStream(new File(outputRoot, DEP_NAME + aggregateSuffix))); // Append SRL Parser into aggregate and write description file preprocessing.add(ClearNLPSRLDesc); preprocessing.createAggregateDescription().toXML(new FileOutputStream(new File(outputRoot, SRL_NAME + aggregateSuffix))); }
aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(DocumentIDPrinter.class)); aggregateBuilder.add(PolarityCleartkAnalysisEngine.createAnnotatorDescription()); aggregateBuilder.add(UncertaintyCleartkAnalysisEngine.createAnnotatorDescription()); aggregateBuilder.add(GenericCleartkAnalysisEngine.createAnnotatorDescription()); aggregateBuilder.add(HistoryCleartkAnalysisEngine.createAnnotatorDescription()); aggregateBuilder.add(SubjectCleartkAnalysisEngine.createAnnotatorDescription()); aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(ParagraphAnnotator.class)); aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(RelationPropagator.class)); aggregateBuilder.add(BackwardsTimeAnnotator.createAnnotatorDescription("/org/apache/ctakes/temporal/ae/timeannotator/model.jar")); aggregateBuilder.add(EventAnnotator.createAnnotatorDescription()); aggregateBuilder.add(DocTimeRelAnnotator.createAnnotatorDescription("/org/apache/ctakes/temporal/ae/doctimerel/model.jar")); aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(CoreferenceChainScoringOutput.class, ConfigParameterConstants.PARAM_OUTPUTDIR, this.outputDirectory + goldOut, GOLD_VIEW_NAME)); if(this.goldMarkables){ aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(CopyGoldMarkablesInChains.class)); //CopyFromGold.getDescription(Markable.class)); }else{ aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(DeterministicMarkableAnnotator.class)); // aggregateBuilder.add(CopyFromGold.getDescription(/*Markable.class,*/ CoreferenceRelation.class, CollectionTextRelation.class)); aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(RemovePersonMarkables.class)); aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(MarkableHeadTreeCreator.class)); aggregateBuilder.add(MarkableSalienceAnnotator.createAnnotatorDescription("/org/apache/ctakes/temporal/ae/salience/model.jar")); if(this.evalType == EVAL_SYSTEM.MENTION_PAIR){ aggregateBuilder.add(EventCoreferenceAnnotator.createAnnotatorDescription(directory.getAbsolutePath() + File.separator + "model.jar")); }else if(this.evalType == EVAL_SYSTEM.MENTION_CLUSTER){
@Override protected void train() throws Exception { AggregateBuilder builder = this.buildTrainingAggregate(); // Run preprocessing and tfidf counts analyzer SimplePipeline.runPipeline( this.getCollectionReader(items), builder.createAggregateDescription()); // For more details on the training arguments refer to SumBasicModel String[] trainingArgs = { "--max-num-sentences", Integer.toString(this.numSentences), "--seen-words-prob", Double.toString(this.seenWordsProbability), "--composition-function", this.cfType.toString() }; JarClassifierBuilder.trainAndPackage(this.modelDirectory, trainingArgs); }
public static void main(String[] args) throws Exception { List<File> files = new ArrayList<>(); // notes have the same names as the directories in which they exist for(File anaforaNoteDir : ANAFORA_ANNOTATIONS_DIR.listFiles()) { String noteFileName = anaforaNoteDir.getName(); String noteFullPath = anaforaNoteDir.getAbsolutePath() + "/" + noteFileName; files.add(new File(noteFullPath)); } CollectionReader reader = UriCollectionReader.getCollectionReaderFromFiles(files); AnalysisEngine engine = getXMIWritingPreprocessorAggregateBuilder().createAggregate(); SimplePipeline.runPipeline(reader, engine); }
public static AnalysisEngineDescription getMentionClusterCoreferenceDescription() throws ResourceInitializationException { AggregateBuilder builder = new AggregateBuilder(); // Add markables using syntax: (nouns and pronouns) builder.add(AnalysisEngineFactory.createEngineDescription(DeterministicMarkableAnnotator.class)); builder.add(getMentionClusterResolverDescription()); return builder.createAggregateDescription(); }
AggregateBuilder builder = new AggregateBuilder(); builder.add(org.cleartk.opennlp.tools.SentenceAnnotator.getDescription()); builder.add(TokenAnnotator.getDescription()); builder.add(DefaultSnowballStemmer.getDescription("English")); builder.add(org.cleartk.opennlp.tools.ParserAnnotator.getDescription()); builder.add(org.cleartk.timeml.time.TimeAnnotator.FACTORY.getAnnotatorDescription()); builder.add(TimeTypeAnnotator.FACTORY.getAnnotatorDescription()); builder.add(EventAnnotator.FACTORY.getAnnotatorDescription()); builder.add(EventTenseAnnotator.FACTORY.getAnnotatorDescription()); builder.add(EventAspectAnnotator.FACTORY.getAnnotatorDescription()); builder.add(EventClassAnnotator.FACTORY.getAnnotatorDescription()); builder.add(EventPolarityAnnotator.FACTORY.getAnnotatorDescription()); builder.add(EventModalityAnnotator.FACTORY.getAnnotatorDescription()); builder.add(AnalysisEngineFactory.createEngineDescription(AddEmptyDCT.class)); builder.add(TemporalLinkEventToDocumentCreationTimeAnnotator.FACTORY.getAnnotatorDescription()); builder.add(TemporalLinkEventToSameSentenceTimeAnnotator.FACTORY.getAnnotatorDescription()); builder.add(TemporalLinkEventToSubordinatedEventAnnotator.FACTORY.getAnnotatorDescription()); AnalysisEngine aggregateEngine = builder.createAggregate(); JCas sourceCas = createJCas(); sourceCas.setDocumentText(documentText); ViewUriUtil.setURI(sourceCas, new URI("")); aggregateEngine.process(sourceCas); String timeMlXml = TempEval2007Writer.toTimeML(sourceCas);
/** * Builds the tokenized preprocessing pipeline for ClearNLP * @return * @throws InvalidXMLException * @throws IOException */ public static AggregateBuilder getTokenizedAggregateBuilder() throws InvalidXMLException, IOException { AggregateBuilder aggregateBuilder = new AggregateBuilder(); aggregateBuilder.add(loadDescription(SIMPLE_SEGMENTER_PATH)); aggregateBuilder.add(loadDescription(LVG_BASE_TOKEN_ANNOTATOR_PATH)); return aggregateBuilder; }
@Override protected AnnotationStatistics<Boolean> test( CollectionReader collectionReader, File directory) throws Exception { AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder(); aggregateBuilder.add(PolarityCleartkAnalysisEngine.createAnnotatorDescription()); aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(DocumentIDPrinter.class)); aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(DeterministicMarkableAnnotator.class)); aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(RemovePersonMarkables.class)); aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(ViewCreatorAnnotator.class, ViewCreatorAnnotator.PARAM_VIEW_NAME, "PseudoGold")); aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(CreatePseudoGoldMarkables.class, CreatePseudoGoldMarkables.PARAM_GOLD_VIEW, GOLD_VIEW_NAME, CreatePseudoGoldMarkables.PARAM_PSEUDO_GOLD_VIEW, "PseudoGold")); aggregateBuilder.add(MarkableSalienceAnnotator.createAnnotatorDescription(directory.getAbsolutePath() + File.separator + "model.jar")); AnnotationStatistics<Boolean> stats = new AnnotationStatistics<>(); for(Iterator<JCas> casIter = new JCasIterator(collectionReader, aggregateBuilder.createAggregate()); casIter.hasNext();){ JCas jCas = casIter.next(); JCas goldView = jCas.getView("PseudoGold"); JCas systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA); stats.add(JCasUtil.select(goldView, Markable.class), JCasUtil.select(systemView, Markable.class), AnnotationStatistics.<Markable>annotationToSpan(), mapConfidenceToBoolean()); } return stats; }