public static void addSubstringRegexPipes(List<String> usedPipeNames, List<Pipe> pipes) throws Exception { usedPipeNames.add("Substring regexes"); // "thalamic" and nuclie are probably in the 1-grams for (String substring : new String[] { "cortic", "cerebel" }) { pipes.add(new RegexMatches(substring + "Regex", compile(".*" + substring + ".*", CASE_INSENSITIVE))); } }
public static void addPrefixPipes(List<Pipe> pipes, File file, String name) throws IOException { for (String line : linesFrom(file.getAbsolutePath())) { pipes.add(new RegexMatches(name, compile("(" + line.trim() + ".{1,3})", CASE_INSENSITIVE))); } }
public void testTwo () { Pipe p = new SerialPipes (new Pipe[] { new CharSequence2TokenSequence (), new TokenSequenceLowercase (), new RegexMatches ("vowel", Pattern.compile ("[aeiou]")), new RegexMatches ("firsthalf", Pattern.compile ("[a-m]")), new RegexMatches ("secondhalf", Pattern.compile ("[n-z]")), new RegexMatches ("length2", Pattern.compile ("..")), new RegexMatches ("length3", Pattern.compile ("...")), new PrintInput (), new TokenSequence2TokenInstances()}); InstanceList ilist = new InstanceList (p); ilist.addThruPipe (new StringArrayIterator(data)); assert (ilist.size() == 19) : "list size = "+ilist.size(); assertTrue (ilist.size() == 19); }
public void testTwo () { Pipe p = new SerialPipes (new Pipe[] { new CharSequence2TokenSequence (), new TokenSequenceLowercase (), new RegexMatches ("vowel", Pattern.compile ("[aeiou]")), new RegexMatches ("firsthalf", Pattern.compile ("[a-m]")), new RegexMatches ("secondhalf", Pattern.compile ("[n-z]")), new RegexMatches ("length2", Pattern.compile ("..")), new RegexMatches ("length3", Pattern.compile ("...")), new PrintInput (), new TokenSequence2TokenInstances()}); InstanceList ilist = new InstanceList (p); ilist.addThruPipe (new StringArrayIterator(data)); assert (ilist.size() == 19) : "list size = "+ilist.size(); assertTrue (ilist.size() == 19); }
pipes.add(new TokenTextCharSuffix("SUFFIX=", 3)); pipes.add(new TokenTextCharNGrams("NGRAM=", new int[] { 2, 3 })); pipes.add(new RegexMatches("ALL_CAPS_REGEX", Pattern.compile(TextUtil.ALL_CAPS_REGEX))); pipes.add(new RegexMatches("ALPHA_NUMERIC_REGEX", Pattern.compile(TextUtil.ALPHA_NUMERIC_REGEX))); pipes.add(new RegexMatches("CAPS_MIX_REGEX", Pattern.compile(TextUtil.CAPS_MIX_REGEX))); pipes.add(new RegexMatches("EMAIL_REGEX", Pattern.compile(TextUtil.EMAIL_REGEX))); pipes.add(new RegexMatches("END_DASH_REGEX", Pattern.compile(TextUtil.END_DASH_REGEX))); pipes.add(new RegexMatches("EXP_NUMBER_REGEX", Pattern.compile(TextUtil.EXP_NUMBER_REGEX))); pipes.add(new RegexMatches("FLOATING_POINT_NUMBER_REGEX", Pattern.compile(TextUtil.FLOATING_POINT_NUMBER_REGEX))); pipes.add(new RegexMatches("FOUR_CAPS_REGEX", Pattern.compile(TextUtil.FOUR_CAPS_REGEX))); pipes.add(new RegexMatches("FOUR_DIGITS_REGEX", Pattern.compile(TextUtil.FOUR_DIGITS_REGEX))); pipes.add(new RegexMatches("HAS_DASH_REGEX", Pattern.compile(TextUtil.HAS_DASH_REGEX))); pipes.add(new RegexMatches("HAS_DIGIT_REGEX", Pattern.compile(TextUtil.HAS_DIGIT_REGEX))); pipes.add(new RegexMatches("HEX_REGEX", Pattern.compile(TextUtil.HEX_REGEX))); pipes.add(new RegexMatches("HTML_REGEX", Pattern.compile(TextUtil.HTML_REGEX))); pipes.add(new RegexMatches("IN_PARENTHESES_REGEX", Pattern.compile(TextUtil.IN_PARENTHESES_REGEX))); pipes.add(new RegexMatches("INIT_CAPS_ALPHA_REGEX", Pattern.compile(TextUtil.INIT_CAPS_ALPHA_REGEX))); pipes.add(new RegexMatches("INIT_CAPS_REGEX", Pattern.compile(TextUtil.INIT_CAPS_REGEX))); pipes.add(new RegexMatches("INIT_DASH_REGEX", Pattern.compile(TextUtil.INIT_DASH_REGEX))); pipes.add(new RegexMatches("IP_REGEX", Pattern.compile(TextUtil.IP_REGEX))); pipes.add(new RegexMatches("NEGATIVE_INTEGER_REGEX", Pattern.compile(TextUtil.NEGATIVE_INTEGER_REGEX))); pipes.add(new RegexMatches("ONE_CAP_REGEX", Pattern.compile(TextUtil.ONE_CAP_REGEX))); pipes.add(new RegexMatches("ONE_DIGIT_REGEX", Pattern.compile(TextUtil.ONE_DIGIT_REGEX))); pipes.add(new RegexMatches("POSITIVE_INTEGER_REGEX", Pattern.compile(TextUtil.POSITIVE_INTEGER_REGEX))); pipes.add(new RegexMatches("PUNCTUATION_REGEX", Pattern.compile(TextUtil.PUNCTUATION_REGEX))); pipes.add(new RegexMatches("ROMAN_NUMBER_CAPITAL_REGEX", Pattern.compile(TextUtil.ROMAN_NUMBER_CAPITAL_REGEX))); pipes.add(new RegexMatches("ROMAN_NUMBER_SMALL_REGEX", Pattern.compile(TextUtil.ROMAN_NUMBER_SMALL_REGEX))); pipes.add(new RegexMatches("SINGLE_INITIAL_REGEX", Pattern.compile(TextUtil.SINGLE_INITIAL_REGEX))); pipes.add(new RegexMatches("THREE_CAPS_REGEX", Pattern.compile(TextUtil.THREE_CAPS_REGEX)));
public TestCRFPipe(String trainingFilename) throws IOException { ArrayList<Pipe> pipes = new ArrayList<Pipe>(); PrintWriter out = new PrintWriter("test.out"); int[][] conjunctions = new int[3][]; conjunctions[0] = new int[] { -1 }; conjunctions[1] = new int[] { 1 }; conjunctions[2] = new int[] { -2, -1 }; pipes.add(new SimpleTaggerSentence2TokenSequence()); //pipes.add(new FeaturesInWindow("PREV-", -1, 1)); //pipes.add(new FeaturesInWindow("NEXT-", 1, 2)); pipes.add(new OffsetConjunctions(conjunctions)); pipes.add(new TokenTextCharSuffix("C1=", 1)); pipes.add(new TokenTextCharSuffix("C2=", 2)); pipes.add(new TokenTextCharSuffix("C3=", 3)); pipes.add(new RegexMatches("CAPITALIZED", Pattern.compile("^\\p{Lu}.*"))); pipes.add(new RegexMatches("STARTSNUMBER", Pattern.compile("^[0-9].*"))); pipes.add(new RegexMatches("HYPHENATED", Pattern.compile(".*\\-.*"))); pipes.add(new RegexMatches("DOLLARSIGN", Pattern.compile("\\$.*"))); pipes.add(new TokenFirstPosition("FIRSTTOKEN")); pipes.add(new TokenSequence2FeatureVectorSequence()); pipes.add(new SequencePrintingPipe(out)); Pipe pipe = new SerialPipes(pipes); InstanceList trainingInstances = new InstanceList(pipe); trainingInstances.addThruPipe(new LineGroupIterator(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(trainingFilename)))), Pattern.compile("^\\s*$"), true)); out.close(); }
public TestCRFPipe(String trainingFilename) throws IOException { ArrayList<Pipe> pipes = new ArrayList<Pipe>(); PrintWriter out = new PrintWriter("test.out"); int[][] conjunctions = new int[3][]; conjunctions[0] = new int[] { -1 }; conjunctions[1] = new int[] { 1 }; conjunctions[2] = new int[] { -2, -1 }; pipes.add(new SimpleTaggerSentence2TokenSequence()); //pipes.add(new FeaturesInWindow("PREV-", -1, 1)); //pipes.add(new FeaturesInWindow("NEXT-", 1, 2)); pipes.add(new OffsetConjunctions(conjunctions)); pipes.add(new TokenTextCharSuffix("C1=", 1)); pipes.add(new TokenTextCharSuffix("C2=", 2)); pipes.add(new TokenTextCharSuffix("C3=", 3)); pipes.add(new RegexMatches("CAPITALIZED", Pattern.compile("^\\p{Lu}.*"))); pipes.add(new RegexMatches("STARTSNUMBER", Pattern.compile("^[0-9].*"))); pipes.add(new RegexMatches("HYPHENATED", Pattern.compile(".*\\-.*"))); pipes.add(new RegexMatches("DOLLARSIGN", Pattern.compile("\\$.*"))); pipes.add(new TokenFirstPosition("FIRSTTOKEN")); pipes.add(new TokenSequence2FeatureVectorSequence()); pipes.add(new SequencePrintingPipe(out)); Pipe pipe = new SerialPipes(pipes); InstanceList trainingInstances = new InstanceList(pipe); trainingInstances.addThruPipe(new LineGroupIterator(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(trainingFilename)))), Pattern.compile("^\\s*$"), true)); out.close(); }
public TestCRFPipe(String trainingFilename) throws IOException { ArrayList<Pipe> pipes = new ArrayList<Pipe>(); PrintWriter out = new PrintWriter("test.out"); int[][] conjunctions = new int[3][]; conjunctions[0] = new int[] { -1 }; conjunctions[1] = new int[] { 1 }; conjunctions[2] = new int[] { -2, -1 }; pipes.add(new SimpleTaggerSentence2TokenSequence()); //pipes.add(new FeaturesInWindow("PREV-", -1, 1)); //pipes.add(new FeaturesInWindow("NEXT-", 1, 2)); pipes.add(new OffsetConjunctions(conjunctions)); pipes.add(new TokenTextCharSuffix("C1=", 1)); pipes.add(new TokenTextCharSuffix("C2=", 2)); pipes.add(new TokenTextCharSuffix("C3=", 3)); pipes.add(new RegexMatches("CAPITALIZED", Pattern.compile("^\\p{Lu}.*"))); pipes.add(new RegexMatches("STARTSNUMBER", Pattern.compile("^[0-9].*"))); pipes.add(new RegexMatches("HYPHENATED", Pattern.compile(".*\\-.*"))); pipes.add(new RegexMatches("DOLLARSIGN", Pattern.compile("\\$.*"))); pipes.add(new TokenFirstPosition("FIRSTTOKEN")); pipes.add(new TokenSequence2FeatureVectorSequence()); pipes.add(new SequencePrintingPipe(out)); Pipe pipe = new SerialPipes(pipes); InstanceList trainingInstances = new InstanceList(pipe); trainingInstances.addThruPipe(new LineGroupIterator(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(trainingFilename)))), Pattern.compile("^\\s*$"), true)); out.close(); }
/** Pipes added based on experience with full text */ private static void addFullTextPipes(List<String> usedPipeNames, List<Pipe> pipes) { // blabla 24 24 pipes.add(new LongRegexSpaced("digit_then_other_then_digit", Pattern .compile("\\d+[^\\d]+\\d+"), 2, 4)); // 30 mM K SO , 5 mM MgCl 6H O, 10 mM 24 24 22 HEPES pipes.add(new LongRegexSpaced( "digit_then_other_then_digit_then_other_then_digit", Pattern .compile(".*\\d+[^\\d\\n]+\\d+[^\\d\\n]+\\d+.*"), 4, 9)); // n 19 // n 5 pipes.add(new LongRegexSpaced("n_space_digit", Pattern .compile("n \\d+"), 2, 2)); pipes.add(new LongRegexSpaced("parenthesis_n_space_digit_parenthesis", Pattern.compile("\\( n \\d+ \\)"), 3, 4)); pipes.add(new LongRegexSpaced("n_space_digit_parenthesis", Pattern .compile("n \\d+ \\)"), 3, 4)); pipes.add(new LongRegexSpaced("parenthesis_n_space_digit", Pattern .compile("\\( n \\d+"), 3, 4)); // Fig is never found in any lexicon pipes.add(new RegexMatches("Figure", Pattern.compile(".*Fig.*"))); }
pipeParam.add(new RegexMatches("INITLOWCAPS_ANYTHING_NONUMBER", Pattern .compile("[" + UNICODE_LOWER + "][" + UNICODE_UPPER + "][^0-9]*"))); pipeParam.add(new RegexMatches("INITLOWCAPS_ANYTHING_WITHNUMBER", Pattern.compile("[" + UNICODE_LOWER + "][" + UNICODE_UPPER + "].*[0-9].*"))); pipeParam.add(new RegexMatches("INITCAPS", Pattern.compile("[" + UNICODE_UPPER + "].*"))); pipeParam.add(new RegexMatches("INITCAPSALPHA", Pattern.compile("[" + UNICODE_UPPER + "][" + UNICODE_LOWER + "].*"))); pipeParam.add(new RegexMatches("ALLCAPS", Pattern.compile("[" + UNICODE_UPPER + "]+"))); pipeParam.add(new RegexMatches("CAPSMIX", Pattern.compile("[" + UNICODE_UPPER + UNICODE_LOWER + "]+"))); pipeParam .add(new RegexMatches("HASDIGIT", Pattern.compile(".*[0-9].*"))); pipeParam .add(new RegexMatches("SINGLEDIGIT", Pattern.compile("[0-9]"))); pipeParam.add(new RegexMatches("DOUBLEDIGIT", Pattern .compile("[0-9][0-9]"))); pipeParam.add(new RegexMatches("NATURALNUMBER", Pattern .compile("[0-9]+"))); pipeParam.add(new RegexMatches("REALNUMBER", Pattern .compile("[-0-9]+[.,]+[0-9.,]+"))); pipeParam.add(new RegexMatches("HASDASH", Pattern.compile(".*-.*"))); pipeParam.add(new RegexMatches("INITDASH", Pattern.compile("-.*"))); pipeParam.add(new RegexMatches("ENDDASH", Pattern.compile(".*-"))); pipeParam.add(new RegexMatches("ALPHANUMERIC", Pattern.compile(".*[" + UNICODE_UPPER + UNICODE_LOWER + "].*[0-9].*"))); pipeParam.add(new RegexMatches("ALPHANUMERIC", Pattern .compile(".*[0-9].*[" + UNICODE_UPPER + UNICODE_LOWER + "].*")));
pipeParam.add(new RegexMatches("INITLOWCAPS_ANYTHING_NONUMBER", Pattern .compile("[" + UNICODE_LOWER + "][" + UNICODE_UPPER + "][^0-9]*"))); pipeParam.add(new RegexMatches("INITLOWCAPS_ANYTHING_WITHNUMBER", Pattern.compile("[" + UNICODE_LOWER + "][" + UNICODE_UPPER + "].*[0-9].*"))); pipeParam.add(new RegexMatches("INITCAPS", Pattern.compile("[" + UNICODE_UPPER + "].*"))); pipeParam.add(new RegexMatches("INITCAPSALPHA", Pattern.compile("[" + UNICODE_UPPER + "][" + UNICODE_LOWER + "].*"))); pipeParam.add(new RegexMatches("ALLCAPS", Pattern.compile("[" + UNICODE_UPPER + "]+"))); pipeParam.add(new RegexMatches("CAPSMIX", Pattern.compile("[" + UNICODE_UPPER + UNICODE_LOWER + "]+"))); pipeParam .add(new RegexMatches("HASDIGIT", Pattern.compile(".*[0-9].*"))); pipeParam .add(new RegexMatches("SINGLEDIGIT", Pattern.compile("[0-9]"))); pipeParam.add(new RegexMatches("DOUBLEDIGIT", Pattern .compile("[0-9][0-9]"))); pipeParam.add(new RegexMatches("NATURALNUMBER", Pattern .compile("[0-9]+"))); pipeParam.add(new RegexMatches("REALNUMBER", Pattern .compile("[-0-9]+[.,]+[0-9.,]+"))); pipeParam.add(new RegexMatches("HASDASH", Pattern.compile(".*-.*"))); pipeParam.add(new RegexMatches("INITDASH", Pattern.compile("-.*"))); pipeParam.add(new RegexMatches("ENDDASH", Pattern.compile(".*-"))); pipeParam.add(new RegexMatches("ALPHANUMERIC", Pattern.compile(".*[" + UNICODE_UPPER + UNICODE_LOWER + "].*[0-9].*"))); pipeParam.add(new RegexMatches("ALPHANUMERIC", Pattern .compile(".*[0-9].*[" + UNICODE_UPPER + UNICODE_LOWER + "].*")));
pipeParam.add(new RegexMatches("INITLOWCAPS_ANYTHING_NONUMBER", Pattern .compile("[" + UNICODE_LOWER + "][" + UNICODE_UPPER + "][^0-9]*"))); pipeParam.add(new RegexMatches("INITLOWCAPS_ANYTHING_WITHNUMBER", Pattern.compile("[" + UNICODE_LOWER + "][" + UNICODE_UPPER + "].*[0-9].*"))); pipeParam.add(new RegexMatches("INITCAPS", Pattern.compile("[" + UNICODE_UPPER + "].*"))); pipeParam.add(new RegexMatches("INITCAPSALPHA", Pattern.compile("[" + UNICODE_UPPER + "][" + UNICODE_LOWER + "].*"))); pipeParam.add(new RegexMatches("ALLCAPS", Pattern.compile("[" + UNICODE_UPPER + "]+"))); pipeParam.add(new RegexMatches("CAPSMIX", Pattern.compile("[" + UNICODE_UPPER + UNICODE_LOWER + "]+"))); pipeParam .add(new RegexMatches("HASDIGIT", Pattern.compile(".*[0-9].*"))); pipeParam .add(new RegexMatches("SINGLEDIGIT", Pattern.compile("[0-9]"))); pipeParam.add(new RegexMatches("DOUBLEDIGIT", Pattern .compile("[0-9][0-9]"))); pipeParam.add(new RegexMatches("NATURALNUMBER", Pattern .compile("[0-9]+"))); pipeParam.add(new RegexMatches("REALNUMBER", Pattern .compile("[-0-9]+[.,]+[0-9.,]+"))); pipeParam.add(new RegexMatches("HASDASH", Pattern.compile(".*-.*"))); pipeParam.add(new RegexMatches("INITDASH", Pattern.compile("-.*"))); pipeParam.add(new RegexMatches("ENDDASH", Pattern.compile(".*-"))); pipeParam.add(new RegexMatches("ALPHANUMERIC", Pattern.compile(".*[" + UNICODE_UPPER + UNICODE_LOWER + "].*[0-9].*")));
pipes.add(new TokenTextCharSuffix("C2=", 2)); pipes.add(new TokenTextCharSuffix("C3=", 3)); pipes.add(new RegexMatches("CAPITALIZED", Pattern.compile("^\\p{Lu}.*"))); pipes.add(new RegexMatches("STARTSNUMBER", Pattern.compile("^[0-9].*"))); pipes.add(new RegexMatches("HYPHENATED", Pattern.compile(".*\\-.*"))); pipes.add(new RegexMatches("DOLLARSIGN", Pattern.compile(".*\\$.*"))); pipes.add(new TokenFirstPosition("FIRSTTOKEN")); pipes.add(new TokenSequence2FeatureVectorSequence());
pipes.add(new TokenTextCharSuffix("C2=", 2)); pipes.add(new TokenTextCharSuffix("C3=", 3)); pipes.add(new RegexMatches("CAPITALIZED", Pattern.compile("^\\p{Lu}.*"))); pipes.add(new RegexMatches("STARTSNUMBER", Pattern.compile("^[0-9].*"))); pipes.add(new RegexMatches("HYPHENATED", Pattern.compile(".*\\-.*"))); pipes.add(new RegexMatches("DOLLARSIGN", Pattern.compile(".*\\$.*"))); pipes.add(new TokenFirstPosition("FIRSTTOKEN")); pipes.add(new TokenSequence2FeatureVectorSequence());
pipes.add(new TokenTextCharSuffix("C2=", 2)); pipes.add(new TokenTextCharSuffix("C3=", 3)); pipes.add(new RegexMatches("CAPITALIZED", Pattern.compile("^\\p{Lu}.*"))); pipes.add(new RegexMatches("STARTSNUMBER", Pattern.compile("^[0-9].*"))); pipes.add(new RegexMatches("HYPHENATED", Pattern.compile(".*\\-.*"))); pipes.add(new RegexMatches("DOLLARSIGN", Pattern.compile(".*\\$.*"))); pipes.add(new TokenFirstPosition("FIRSTTOKEN")); pipes.add(new TokenSequence2FeatureVectorSequence());
new RegexMatches( "SingleLetter", Pattern.compile( "[A-Za-z]" ) ), new RegexMatches( "AllCaps", Pattern.compile( ALLCAPS ) ), new RegexMatches( "AllLower", Pattern.compile( ALLLOWER ) ), new RegexMatches( "InitCaps", Pattern.compile( INITCAPS ) ), new RegexMatches( "MixedCase", Pattern.compile( MIXEDCASE ) ), new RegexMatches( "MixedNum", Pattern.compile( MIXEDNUM ) ), new RegexMatches( "EndSentPunc", Pattern.compile( ENDSENTENCE ) ), new RegexMatches( "Punc", Pattern.compile( PUNCTUATION ) ), new RegexMatches( "Bracket", Pattern.compile( BRACKET ) ), new RegexMatches( "Ordinal", Pattern.compile( ORDINAL, Pattern.CASE_INSENSITIVE ) ), new RegexMatches( "10^3n", Pattern.compile( ILLION, Pattern.CASE_INSENSITIVE ) ), new LongRegexMatches( "Numeric", Pattern.compile( DECIMAL ), 1, 3 ), new LongRegexMatches( "BigNumber", Pattern.compile( COMMA_DECIMAL ), 1, 7 ), new LongRegexMatches( "kmbNumber", Pattern.compile( DECIMAL + ILLION, Pattern.CASE_INSENSITIVE ), 1, 4 ), new RegexMatches( "kmbMixed", Pattern.compile( MIXED_ILLION, Pattern.CASE_INSENSITIVE ) ), new LongRegexMatches( "Dollars", Pattern.compile( "[$](" + RANGE + "|" + DECIMAL + "|" + COMMA_DECIMAL + "|" + DECIMAL + ILLION + "|" + MIXED_ILLION + ")", Pattern.CASE_INSENSITIVE ), 2, 8 ), new RegexMatches( "NumberWord", Pattern.compile( NUMBER_WORD, Pattern.CASE_INSENSITIVE ) ), new RegexMatches( "Currency", Pattern.compile( CURRENCY, Pattern.CASE_INSENSITIVE ) ), new LongRegexMatches( "MoneyWords", Pattern.compile( MONEYWORDS, Pattern.CASE_INSENSITIVE ), 2, 4 ), new RegexMatches( "MixedAmPm", Pattern.compile( MIXED_AMPM, Pattern.CASE_INSENSITIVE ) ), new LongRegexMatches( "TimeNum", Pattern.compile( TIMENUM ), 3, 5 ),
new RegexMatches("SingleLetter", Pattern.compile("[A-Za-z]")), new RegexMatches("AllCaps", Pattern.compile(ALLCAPS)), new RegexMatches("AllLower", Pattern.compile(ALLLOWER)), new RegexMatches("InitCaps", Pattern.compile(INITCAPS)), new RegexMatches("MixedCase", Pattern.compile(MIXEDCASE)), new RegexMatches("MixedNum", Pattern.compile(MIXEDNUM)), new RegexMatches("EndSentPunc", Pattern.compile(ENDSENTENCE)), new RegexMatches("Punc", Pattern.compile(PUNCTUATION)), new RegexMatches("Bracket", Pattern.compile(BRACKET)), new RegexMatches("Ordinal", Pattern.compile (ORDINAL, Pattern.CASE_INSENSITIVE)), new RegexMatches("10^3n", Pattern.compile (ILLION, Pattern.CASE_INSENSITIVE)), new LongRegexMatches("Numeric", Pattern.compile(DECIMAL), 1, 3), new LongRegexMatches("kmbNumber", Pattern.compile (DECIMAL+ILLION, Pattern.CASE_INSENSITIVE), 1, 4), new RegexMatches("kmbMixed", Pattern.compile (MIXED_ILLION, Pattern.CASE_INSENSITIVE)), new LongRegexMatches("Dollars", Pattern.compile new RegexMatches("NumberWord", Pattern.compile (NUMBER_WORD, Pattern.CASE_INSENSITIVE)), new RegexMatches("Currency", Pattern.compile (CURRENCY, Pattern.CASE_INSENSITIVE)), new LongRegexMatches("MoneyWords", Pattern.compile new RegexMatches("MixedAmPm", Pattern.compile
new RegexMatches("SingleLetter", Pattern.compile("[A-Za-z]")), new RegexMatches("AllCaps", Pattern.compile(ALLCAPS)), new RegexMatches("AllLower", Pattern.compile(ALLLOWER)), new RegexMatches("InitCaps", Pattern.compile(INITCAPS)), new RegexMatches("MixedCase", Pattern.compile(MIXEDCASE)), new RegexMatches("MixedNum", Pattern.compile(MIXEDNUM)), new RegexMatches("EndSentPunc", Pattern.compile(ENDSENTENCE)), new RegexMatches("Punc", Pattern.compile(PUNCTUATION)), new RegexMatches("Bracket", Pattern.compile(BRACKET)), new RegexMatches("Ordinal", Pattern.compile (ORDINAL, Pattern.CASE_INSENSITIVE)), new RegexMatches("10^3n", Pattern.compile (ILLION, Pattern.CASE_INSENSITIVE)), new LongRegexMatches("Numeric", Pattern.compile(DECIMAL), 1, 3), new LongRegexMatches("kmbNumber", Pattern.compile (DECIMAL+ILLION, Pattern.CASE_INSENSITIVE), 1, 4), new RegexMatches("kmbMixed", Pattern.compile (MIXED_ILLION, Pattern.CASE_INSENSITIVE)), new LongRegexMatches("Dollars", Pattern.compile new RegexMatches("NumberWord", Pattern.compile (NUMBER_WORD, Pattern.CASE_INSENSITIVE)), new RegexMatches("Currency", Pattern.compile (CURRENCY, Pattern.CASE_INSENSITIVE)), new LongRegexMatches("MoneyWords", Pattern.compile new RegexMatches("MixedAmPm", Pattern.compile
new RegexMatches("SingleLetter", Pattern.compile("[A-Za-z]")), new RegexMatches("AllCaps", Pattern.compile(ALLCAPS)), new RegexMatches("AllLower", Pattern.compile(ALLLOWER)), new RegexMatches("InitCaps", Pattern.compile(INITCAPS)), new RegexMatches("MixedCase", Pattern.compile(MIXEDCASE)), new RegexMatches("MixedNum", Pattern.compile(MIXEDNUM)), new RegexMatches("EndSentPunc", Pattern.compile(ENDSENTENCE)), new RegexMatches("Punc", Pattern.compile(PUNCTUATION)), new RegexMatches("Bracket", Pattern.compile(BRACKET)), new RegexMatches("Ordinal", Pattern.compile (ORDINAL, Pattern.CASE_INSENSITIVE)), new RegexMatches("10^3n", Pattern.compile (ILLION, Pattern.CASE_INSENSITIVE)), new LongRegexMatches("Numeric", Pattern.compile(DECIMAL), 1, 3), new LongRegexMatches("kmbNumber", Pattern.compile (DECIMAL+ILLION, Pattern.CASE_INSENSITIVE), 1, 4), new RegexMatches("kmbMixed", Pattern.compile (MIXED_ILLION, Pattern.CASE_INSENSITIVE)), new LongRegexMatches("Dollars", Pattern.compile new RegexMatches("NumberWord", Pattern.compile (NUMBER_WORD, Pattern.CASE_INSENSITIVE)), new RegexMatches("Currency", Pattern.compile (CURRENCY, Pattern.CASE_INSENSITIVE)), new LongRegexMatches("MoneyWords", Pattern.compile new RegexMatches("MixedAmPm", Pattern.compile
new RegexMatches ("INITCAP", Pattern.compile (CAPS+".*")), new RegexMatches ("CAPITALIZED", Pattern.compile (CAPS+LOW+"*")), new RegexMatches ("ALLCAPS", Pattern.compile (CAPS+"+")), new RegexMatches ("MIXEDCAPS", Pattern.compile ("[A-Z][a-z]+[A-Z][A-Za-z]*")), new RegexMatches ("CONTAINSDIGITS", Pattern.compile (".*[0-9].*")), new RegexMatches ("ALLDIGITS", Pattern.compile ("[0-9]+")), new RegexMatches ("NUMERICAL", Pattern.compile ("[-0-9]+[\\.,]+[0-9\\.,]+")), new RegexMatches ("MULTIDOTS", Pattern.compile ("\\.\\.+")), new RegexMatches ("ENDSINDOT", Pattern.compile ("[^\\.]+.*\\.")), new RegexMatches ("CONTAINSDASH", Pattern.compile (ALPHANUM+"+-"+ALPHANUM+"*")), new RegexMatches ("ACRO", Pattern.compile ("[A-Z][A-Z\\.]*\\.[A-Z\\.]*")), new RegexMatches ("LONELYINITIAL", Pattern.compile (CAPS+"\\.")), new RegexMatches ("SINGLECHAR", Pattern.compile (ALPHA)), new RegexMatches ("CAPLETTER", Pattern.compile ("[A-Z]")), new RegexMatches ("PUNC", Pattern.compile (PUNT)), new RegexMatches ("QUOTE", Pattern.compile (QUOTE)),