public TokensRegexAnnotator(String... files) { env = TokenSequencePattern.getNewEnv(); extractor = CoreMapExpressionExtractor.createExtractorFromFiles(env, files); verbose = false; }
public static Env getNewEnv() { Env env = TokenSequencePattern.getNewEnv(); // Do case insensitive matching env.setDefaultStringPatternFlags(Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); initEnv(env); return env; }
private static void readClassesInEnv(String s, Map<String, Env> env, Env globalEnv) throws ClassNotFoundException { for(String line: IOUtils.readLines(s)){ String[] toks = line.split("###"); if(toks.length == 3){ String label = toks[0]; String name = toks[1]; Class c = Class.forName(toks[2]); if(!env.containsKey(label)) env.put(label, TokenSequencePattern.getNewEnv()); env.get(label).bind(name, c); }else if(toks.length ==2){ String name = toks[0]; Class c = Class.forName(toks[1]); assert c!=null : " Why is name for " + toks[1] + " null"; globalEnv.bind(name, c); }else throw new RuntimeException("Ill formed env file!"); } }
public KBPTokensregexExtractor(String tokensregexDir, boolean verbose) { if (verbose) logger.log("Creating TokensRegexExtractor"); // Create extractors for (RelationType rel : RelationType.values()) { String relFileNameComponent = rel.canonicalName.replaceAll(":", "_"); String path = tokensregexDir + File.separator + relFileNameComponent.replaceAll("/", "SLASH") + ".rules"; if (IOUtils.existsInClasspathOrFileSystem(path)) { List<String> listFiles = new ArrayList<>(); listFiles.add(tokensregexDir + File.separator + "defs.rules"); listFiles.add(path); if (verbose) logger.log("Rule files for relation " + rel + " is " + path); Env env = TokenSequencePattern.getNewEnv(); env.bind("collapseExtractionRules", true); env.bind("verbose", verbose); CoreMapExpressionExtractor extr = CoreMapExpressionExtractor.createExtractorFromFiles(env, listFiles).keepTemporaryTags(); rules.put(rel, extr); } } }
int patternFlags = ignoreCaseEntry? Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE:0; int stringMatchFlags = ignoreCaseEntry? (NodePattern.CASE_INSENSITIVE | NodePattern.UNICODE_CASE):0; Env env = TokenSequencePattern.getNewEnv(); env.setDefaultStringPatternFlags(patternFlags); env.setDefaultStringMatchFlags(stringMatchFlags);
env.put(label, TokenSequencePattern.getNewEnv());
private void initEnv() { env = TokenSequencePattern.getNewEnv(); env.setDefaultTokensAnnotationKey(CoreAnnotations.NumerizedTokensAnnotation.class); // Do case insensitive matching env.setDefaultStringMatchFlags(Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); env.setDefaultStringPatternFlags(Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); try { Units.registerUnits(env, options.unitsFilename); } catch (IOException ex) { throw new RuntimeException("Error loading units from " + options.unitsFilename, ex); } try { UnitPrefix.registerPrefixes(env, options.prefixFilename); } catch (IOException ex) { throw new RuntimeException("Error loading prefixes from " + options.prefixFilename, ex); } env.bind("options", options); env.bind("numcomptype", CoreAnnotations.NumericCompositeTypeAnnotation.class); env.bind("numcompvalue", CoreAnnotations.NumericCompositeValueAnnotation.class); }
public TokensRegexAnnotator(String name, Properties props) { String prefix = (name == null)? "": name + '.'; String[] files = PropertiesUtils.getStringArray(props, prefix + "rules"); env = TokenSequencePattern.getNewEnv(); env.bind("options", options); if (PropertiesUtils.getBool(props, prefix+"caseInsensitive")) { System.err.println("using case insensitive!"); env.setDefaultStringMatchFlags(NodePattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); env.setDefaultStringPatternFlags(Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); } if (files.length != 0) { extractor = CoreMapExpressionExtractor.createExtractorFromFiles(env, files); } else { extractor = null; } verbose = PropertiesUtils.getBool(props, prefix + "verbose", false); options.setTokenOffsets = PropertiesUtils.getBool(props, prefix + "setTokenOffsets", options.setTokenOffsets); options.extractWithTokens = PropertiesUtils.getBool(props, prefix + "extractWithTokens", options.extractWithTokens); options.flatten = PropertiesUtils.getBool(props, prefix + "flatten", options.flatten); String matchedExpressionsAnnotationKeyName = props.getProperty(prefix + "matchedExpressionsAnnotationKey"); if (matchedExpressionsAnnotationKeyName != null) { options.matchedExpressionsAnnotationKey = EnvLookup.lookupAnnotationKeyWithClassname(env, matchedExpressionsAnnotationKeyName); if (options.matchedExpressionsAnnotationKey == null) { String propName = prefix + "matchedExpressionsAnnotationKey"; throw new RuntimeException("Cannot determine annotation key for " + propName + '=' + matchedExpressionsAnnotationKeyName); } } }
.createExtractorFromFiles(TokenSequencePattern.getNewEnv(), rules);
private void initEnv() env = TokenSequencePattern.getNewEnv(); env.setDefaultResultsAnnotationExtractor(TimeExpression.TimeExpressionConverter); env.setDefaultTokensAnnotationKey(CoreAnnotations.NumerizedTokensAnnotation.class);
public TokensRegexAnnotator(String... files) { env = TokenSequencePattern.getNewEnv(); extractor = CoreMapExpressionExtractor.createExtractorFromFiles(env, files); verbose = false; }
public TokensRegexAnnotator(String... files) { env = TokenSequencePattern.getNewEnv(); extractor = CoreMapExpressionExtractor.createExtractorFromFiles(env, files); }
public static TokenSequencePattern.Env getNewEnv() { TokenSequencePattern.Env env = TokenSequencePattern.getNewEnv(); // Do case insensitive matching env.setDefaultStringPatternFlags(Pattern.CASE_INSENSITIVE); initEnv(env); return env; }
public static Env getNewEnv() { Env env = TokenSequencePattern.getNewEnv(); // Do case insensitive matching env.setDefaultStringPatternFlags(Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); initEnv(env); return env; }
public static Env getNewEnv() { Env env = TokenSequencePattern.getNewEnv(); // Do case insensitive matching env.setDefaultStringPatternFlags(Pattern.CASE_INSENSITIVE); initEnv(env); return env; }
private static void readClassesInEnv(String s, Map<String, Env> env, Env globalEnv) throws ClassNotFoundException { for(String line: IOUtils.readLines(s)){ String[] toks = line.split("###"); if(toks.length == 3){ String label = toks[0]; String name = toks[1]; Class c = Class.forName(toks[2]); if(!env.containsKey(label)) env.put(label, TokenSequencePattern.getNewEnv()); env.get(label).bind(name, c); }else if(toks.length ==2){ String name = toks[0]; Class c = Class.forName(toks[1]); assert c!=null : " Why is name for " + toks[1] + " null"; globalEnv.bind(name, c); }else throw new RuntimeException("Ill formed env file!"); } }
public KBPTokensregexExtractor(String tokensregexDir, boolean verbose) { if (verbose) logger.log("Creating TokensRegexExtractor"); // Create extractors for (RelationType rel : RelationType.values()) { String relFileNameComponent = rel.canonicalName.replaceAll(":", "_"); String path = tokensregexDir + File.separator + relFileNameComponent.replaceAll("/", "SLASH") + ".rules"; if (IOUtils.existsInClasspathOrFileSystem(path)) { List<String> listFiles = new ArrayList<>(); listFiles.add(tokensregexDir + File.separator + "defs.rules"); listFiles.add(path); if (verbose) logger.log("Rule files for relation " + rel + " is " + path); Env env = TokenSequencePattern.getNewEnv(); env.bind("collapseExtractionRules", true); env.bind("verbose", verbose); CoreMapExpressionExtractor extr = CoreMapExpressionExtractor.createExtractorFromFiles(env, listFiles).keepTemporaryTags(); rules.put(rel, extr); } } }
public TokensRegexAnnotator(String name, Properties props) { String prefix = (name == null)? "":name + "."; String[] files = PropertiesUtils.getStringArray(props, prefix + "rules"); if (files == null || files.length == 0) { throw new RuntimeException("No rules specified for TokensRegexAnnotator " + name + ", check " + prefix + "rules property"); } env = TokenSequencePattern.getNewEnv(); env.bind("options", options); extractor = CoreMapExpressionExtractor.createExtractorFromFiles(env, files); verbose = PropertiesUtils.getBool(props, prefix + "verbose", verbose); options.setTokenOffsets = PropertiesUtils.getBool(props, prefix + "setTokenOffsets", options.setTokenOffsets); options.extractWithTokens = PropertiesUtils.getBool(props, prefix + "extractWithTokens", options.extractWithTokens); options.flatten = PropertiesUtils.getBool(props, prefix + "flatten", options.flatten); String matchedExpressionsAnnotationKeyName = props.getProperty(prefix + "matchedExpressionsAnnotationKey"); if (matchedExpressionsAnnotationKeyName != null) { options.matchedExpressionsAnnotationKey = EnvLookup.lookupAnnotationKey(env, matchedExpressionsAnnotationKeyName); if (options.matchedExpressionsAnnotationKey == null) { String propName = prefix + "matchedExpressionsAnnotationKey"; throw new RuntimeException("Cannot determine annotation key for " + propName + "=" + matchedExpressionsAnnotationKeyName); } } }
private void initEnv() { env = TokenSequencePattern.getNewEnv(); env.setDefaultTokensAnnotationKey(CoreAnnotations.NumerizedTokensAnnotation.class); // Do case insensitive matching env.setDefaultStringMatchFlags(Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); env.setDefaultStringPatternFlags(Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); try { Units.registerUnits(env, options.unitsFilename); } catch (IOException ex) { throw new RuntimeException("Error loading units from " + options.unitsFilename, ex); } try { UnitPrefix.registerPrefixes(env, options.prefixFilename); } catch (IOException ex) { throw new RuntimeException("Error loading prefixes from " + options.prefixFilename, ex); } env.bind("options", options); env.bind("numcomptype", CoreAnnotations.NumericCompositeTypeAnnotation.class); env.bind("numcompvalue", CoreAnnotations.NumericCompositeValueAnnotation.class); }
public TokensRegexAnnotator(String name, Properties props) { String prefix = (name == null)? "": name + '.'; String[] files = PropertiesUtils.getStringArray(props, prefix + "rules"); env = TokenSequencePattern.getNewEnv(); env.bind("options", options); if (PropertiesUtils.getBool(props, prefix+"caseInsensitive")) { System.err.println("using case insensitive!"); env.setDefaultStringMatchFlags(NodePattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); env.setDefaultStringPatternFlags(Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); } if (files.length != 0) { extractor = CoreMapExpressionExtractor.createExtractorFromFiles(env, files); } else { extractor = null; } verbose = PropertiesUtils.getBool(props, prefix + "verbose", false); options.setTokenOffsets = PropertiesUtils.getBool(props, prefix + "setTokenOffsets", options.setTokenOffsets); options.extractWithTokens = PropertiesUtils.getBool(props, prefix + "extractWithTokens", options.extractWithTokens); options.flatten = PropertiesUtils.getBool(props, prefix + "flatten", options.flatten); String matchedExpressionsAnnotationKeyName = props.getProperty(prefix + "matchedExpressionsAnnotationKey"); if (matchedExpressionsAnnotationKeyName != null) { options.matchedExpressionsAnnotationKey = EnvLookup.lookupAnnotationKeyWithClassname(env, matchedExpressionsAnnotationKeyName); if (options.matchedExpressionsAnnotationKey == null) { String propName = prefix + "matchedExpressionsAnnotationKey"; throw new RuntimeException("Cannot determine annotation key for " + propName + '=' + matchedExpressionsAnnotationKeyName); } } }