/** * Read mention-relation annotations -- including coreference -- from ERE corpus. * * @param ereCorpus the ERE corpus release (values from * {@link edu.illinois.cs.cogcomp.nlp.corpusreaders.ereReader.EREDocumentReader.EreCorpus} * @param throwExceptionOnXmlParseFailure if 'true', throws exception if xml parser encounters e.g. mismatched * open/close tags * @throws Exception */ public EREMentionRelationReader(EreCorpus ereCorpus, String corpusRoot, boolean throwExceptionOnXmlParseFailure) throws Exception { this(ereCorpus, new TokenizerTextAnnotationBuilder(new StatefulTokenizer()), corpusRoot, throwExceptionOnXmlParseFailure); }
StatefulTokenizer statefulTokenizer = new StatefulTokenizer(); String sentence = sentences.get(i); String type = types.get(i); Pair<String[], IntPair[]> tokenizedSentence = statefulTokenizer.tokenizeSentence(sentence); List<String> curTokens = new LinkedList<>(Arrays.asList(tokenizedSentence.getFirst())); int firstArgStart = 0;
/** * Read mention-relation annotations -- including coreference -- from ERE English corpus. * * @param ereCorpus the ERE corpus release (values from {@link EreCorpus} * @param corpusRoot the data root directory for the ERE corpus to be processed * @param throwExceptionOnXmlParseFailure if 'true', throws exception if xml parser encounters e.g. mismatched * open/close tags @throws Exception */ public EREEventReader(EreCorpus ereCorpus, String corpusRoot, boolean throwExceptionOnXmlParseFailure) throws Exception { this(ereCorpus, new TokenizerTextAnnotationBuilder(new StatefulTokenizer()), corpusRoot, throwExceptionOnXmlParseFailure); }
/** * Read mention-relation annotations -- including coreference -- from ERE English corpus. * * @param ereCorpus the ERE corpus release (values from {@link EreCorpus} * @param corpusRoot the data root directory for the ERE corpus to be processed * @param throwExceptionOnXmlParseFailure if 'true', throws exception if xml parser encounters e.g. mismatched * open/close tags @throws Exception */ public EREEventReader(EreCorpus ereCorpus, String corpusRoot, boolean throwExceptionOnXmlParseFailure) throws Exception { this(ereCorpus, new TokenizerTextAnnotationBuilder(new StatefulTokenizer()), corpusRoot, throwExceptionOnXmlParseFailure); }
/** * Read mention-relation annotations -- including coreference -- from ERE corpus. * * @param ereCorpus the ERE corpus release (values from * {@link edu.illinois.cs.cogcomp.nlp.corpusreaders.ereReader.EREDocumentReader.EreCorpus} * @param throwExceptionOnXmlParseFailure if 'true', throws exception if xml parser encounters e.g. mismatched * open/close tags * @throws Exception */ public EREMentionRelationReader(EreCorpus ereCorpus, String corpusRoot, boolean throwExceptionOnXmlParseFailure) throws Exception { this(ereCorpus, new TokenizerTextAnnotationBuilder(new StatefulTokenizer()), corpusRoot, throwExceptionOnXmlParseFailure); }
public EnglishTokenizer(){ tab = new TokenizerTextAnnotationBuilder(new StatefulTokenizer()); }
public EnglishTokenizer(){ tab = new TokenizerTextAnnotationBuilder(new StatefulTokenizer()); }
/** * Reads Named Entity -- and possibly nominal mention -- annotation from an English ERE-format corpus. * * @param ereCorpus * @param corpusRoot * @param throwExceptionOnXmlParseFailure * @param addNominalMentions a flag that if true, indicates that nominal mentions should be read, * and that the view created should be named {#ViewNames.MENTION_ERE}. * @param addFillers if 'true', indicates that non-coreferable mentions should be added. * @throws Exception */ public ERENerReader(EreCorpus ereCorpus, String corpusRoot, boolean throwExceptionOnXmlParseFailure, boolean addNominalMentions, boolean addFillers) throws Exception { this(ereCorpus, new TokenizerTextAnnotationBuilder(new StatefulTokenizer()), corpusRoot, throwExceptionOnXmlParseFailure, addNominalMentions, addFillers); }
/** * Reads Named Entity -- and possibly nominal mention -- annotation from an English ERE-format corpus. * * @param ereCorpus * @param corpusRoot * @param throwExceptionOnXmlParseFailure * @param addNominalMentions a flag that if true, indicates that nominal mentions should be read, * and that the view created should be named {#ViewNames.MENTION_ERE}. * @param addFillers if 'true', indicates that non-coreferable mentions should be added. * @throws Exception */ public ERENerReader(EreCorpus ereCorpus, String corpusRoot, boolean throwExceptionOnXmlParseFailure, boolean addNominalMentions, boolean addFillers) throws Exception { this(ereCorpus, new TokenizerTextAnnotationBuilder(new StatefulTokenizer()), corpusRoot, throwExceptionOnXmlParseFailure, addNominalMentions, addFillers); }
/** * builds an {@link XmlTextAnnotationMaker} for reading ERE format English corpus. * * @param ereCorpus which ERE release is being processed -- affects which tag blocks are marked * @param throwExceptionOnXmlParseFail if 'true', throw an exception if xml parser fails * @return an XmlTextAnnotationMaker configured for English ERE. */ public static XmlTextAnnotationMaker buildXmlTextAnnotationMaker(EreCorpus ereCorpus, boolean throwExceptionOnXmlParseFail) { TextAnnotationBuilder textAnnotationBuilder = new TokenizerTextAnnotationBuilder(new StatefulTokenizer()); return buildXmlTextAnnotationMaker(textAnnotationBuilder, ereCorpus, throwExceptionOnXmlParseFail); }
/** * builds an {@link XmlTextAnnotationMaker} for reading ERE format English corpus. * * @param ereCorpus which ERE release is being processed -- affects which tag blocks are marked * @param throwExceptionOnXmlParseFail if 'true', throw an exception if xml parser fails * @return an XmlTextAnnotationMaker configured for English ERE. */ public static XmlTextAnnotationMaker buildXmlTextAnnotationMaker(EreCorpus ereCorpus, boolean throwExceptionOnXmlParseFail) { TextAnnotationBuilder textAnnotationBuilder = new TokenizerTextAnnotationBuilder(new StatefulTokenizer()); return buildXmlTextAnnotationMaker(textAnnotationBuilder, ereCorpus, throwExceptionOnXmlParseFail); }
/** * assumes files are all from a single source directory, and that no extraneous files are included in that directory. * * @param corpusName * @param sourceDirectory * @throws IOException */ public XmlFragmentWhitespacingDocumentReader(String corpusName, String sourceDirectory, String sourceFileExtension, String annotationFileExtension) throws Exception { super(CorpusReaderConfigurator.buildResourceManager(corpusName, sourceDirectory, sourceDirectory, sourceFileExtension, annotationFileExtension)); taBuilder = new TokenizerTextAnnotationBuilder(new StatefulTokenizer()); numFiles = 0; numTextAnnotations = 0; }
/** * assumes files are all from a single source directory, and that no extraneous files are included in that directory. * * @param corpusName * @param sourceDirectory * @throws IOException */ public XmlFragmentWhitespacingDocumentReader(String corpusName, String sourceDirectory, String sourceFileExtension, String annotationFileExtension) throws Exception { super(CorpusReaderConfigurator.buildResourceManager(corpusName, sourceDirectory, sourceDirectory, sourceFileExtension, annotationFileExtension)); taBuilder = new TokenizerTextAnnotationBuilder(new StatefulTokenizer()); numFiles = 0; numTextAnnotations = 0; }
public TextAnnotation runNER(String s) { TextAnnotationBuilder tab; boolean splitOnHyphens = false; tab = new TokenizerTextAnnotationBuilder(new StatefulTokenizer(splitOnHyphens, false)); TextAnnotation ta = tab.createTextAnnotation("001", "001", s); try { co.getView(ta); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } return ta; }
public TACReader(String corpusRoot, boolean throwExceptionOnXmlParseFail) throws Exception { super(TACReader.buildTACConfig(corpusRoot, Language.English), buildXmlTextAnnotationMaker(new TokenizerTextAnnotationBuilder(new StatefulTokenizer()), throwExceptionOnXmlParseFail)); }
public TACReader(String corpusRoot, boolean throwExceptionOnXmlParseFail) throws Exception { super(TACReader.buildTACConfig(corpusRoot, Language.English), buildXmlTextAnnotationMaker(new TokenizerTextAnnotationBuilder(new StatefulTokenizer()), throwExceptionOnXmlParseFail)); }
/** * * @param rm non-default config options * @return AnnotatorService with specified NLP components * @throws IOException * @throws AnnotatorException */ public static BasicAnnotatorService buildPipeline(ResourceManager rm) throws IOException, AnnotatorException { TextAnnotationBuilder taBldr = new TokenizerTextAnnotationBuilder(new StatefulTokenizer(false, false)); Map<String, Annotator> annotators = buildAnnotators(); return new BasicAnnotatorService(taBldr, annotators, rm); }
public Preprocessor(ResourceManager rm) { ResourceManager fullRm = Configurator.mergeProperties(new AnnotatorServiceConfigurator().getDefaultConfig(), rm); try { TextAnnotationBuilder taBldr = new TokenizerTextAnnotationBuilder(new StatefulTokenizer(true, false)); Map<String, Annotator> annotators = new HashMap<>(); annotators.put(ViewNames.POS, new POSAnnotator()); annotators.put(ViewNames.LEMMA, new IllinoisLemmatizer()); annotators.put(ViewNames.SHALLOW_PARSE, new ChunkerAnnotator()); annotators.put(ViewNames.DEPENDENCY, new DepAnnotator()); annotator = new BasicAnnotatorService(taBldr, annotators, fullRm); } catch (Exception e) { logger.error("Unable to create preprocessor. \n{}", e.getMessage()); } }
private static void annotate(String filepath) throws IOException { DepAnnotator annotator = new DepAnnotator(); TextAnnotationBuilder taBuilder = new TokenizerTextAnnotationBuilder(new StatefulTokenizer(true, false)); Preprocessor preprocessor = new Preprocessor(); Files.lines(Paths.get(filepath)).forEach(line -> { TextAnnotation ta = taBuilder.createTextAnnotation(line); try { preprocessor.annotate(ta); annotator.addView(ta); System.out.println(ta.getView(annotator.getViewName()).toString()); } catch (AnnotatorException e) { e.printStackTrace(); } }); }
public Preprocessor(ResourceManager rm) { ResourceManager fullRm = Configurator.mergeProperties(new AnnotatorServiceConfigurator().getDefaultConfig(), rm); try { TextAnnotationBuilder taBldr = new TokenizerTextAnnotationBuilder(new StatefulTokenizer(true, false)); Map<String, Annotator> annotators = new HashMap<>(); annotators.put(ViewNames.POS, new POSAnnotator()); annotators.put(ViewNames.LEMMA, new IllinoisLemmatizer()); annotators.put(ViewNames.SHALLOW_PARSE, new ChunkerAnnotator()); annotators.put(ViewNames.DEPENDENCY, new DepAnnotator()); annotator = new BasicAnnotatorService(taBldr, annotators, fullRm); } catch (Exception e) { logger.error("Unable to create preprocessor. \n{}", e.getMessage()); } }