options.add(ranges.union(lcaseLettersToUpper).difference(lcaseLetters)); int bestComplexity = complexityWordFolded(ranges); int complexity = complexityWordFolded(option); if (complexity < bestComplexity) { bestComplexity = complexity; return new Charset(best, ieExplicits);
private static int complexityWordFolded(CharRanges ranges) { return Math.min( complexityWordFoldedHelper(ranges), 1 + complexityWordFoldedHelper( CharRanges.ALL_CODE_UNITS.difference(ranges))); }
options.add(ranges.union(lcaseLettersToUpper).difference(lcaseLetters)); int bestComplexity = complexityWordFolded(ranges); int complexity = complexityWordFolded(option); if (complexity < bestComplexity) { bestComplexity = complexity; return new Charset(best, ieExplicits);
DecomposedCharset decompose() { CharRanges negRanges = CharRanges.ALL_CODE_UNITS.difference(ranges); if (!ieExplicits.isEmpty()) { if (negRanges.intersection(ieExplicits).isEmpty()) { return decompose(ranges, false); } else if (ranges.intersection(ieExplicits).isEmpty()) { return decompose(negRanges, true); } } DecomposedCharset positive = decompose(ranges, false); DecomposedCharset negative = decompose(negRanges, true); return positive.complexity() <= negative.complexity() ? positive : negative; }
DecomposedCharset decompose() { CharRanges negRanges = CharRanges.ALL_CODE_UNITS.difference(ranges); if (!ieExplicits.isEmpty()) { if (negRanges.intersection(ieExplicits).isEmpty()) { return decompose(ranges, false); } else if (ranges.intersection(ieExplicits).isEmpty()) { return decompose(negRanges, true); } } DecomposedCharset positive = decompose(ranges, false); DecomposedCharset negative = decompose(negRanges, true); return positive.complexity() <= negative.complexity() ? positive : negative; }
@Override protected void appendSourceCode(StringBuilder sb) { if (DOT_CHARSET.ranges.equals(ranges)) { sb.append('.'); return; } decompose().appendSourceCode(sb); }
@Override public boolean isCaseSensitive() { // We could test // !ranges.equals(CaseCanonicalize.expandToAllMatched(ranges)) // but we get better optimizations by leaving the 'i' flag on in most // cases. // Check whether skipping all the character groups that are known // case-insensitive leaves us with something that matches the above // definition. CharRanges withoutNamedGroups = decompose().ranges; return !withoutNamedGroups.equals( CaseCanonicalize.expandToAllMatched(withoutNamedGroups)); }
@Override public boolean isCaseSensitive() { // We could test // !ranges.equals(CaseCanonicalize.expandToAllMatched(ranges)) // but we get better optimizations by leaving the 'i' flag on in most // cases. // Check whether skipping all the character groups that are known // case-insensitive leaves us with something that matches the above // definition. CharRanges withoutNamedGroups = decompose().ranges; return !withoutNamedGroups.equals( CaseCanonicalize.expandToAllMatched(withoutNamedGroups)); }
@Override protected void appendSourceCode(StringBuilder sb) { if (DOT_CHARSET.ranges.equals(ranges)) { sb.append('.'); return; } decompose().appendSourceCode(sb); }
private static int complexityWordFolded(CharRanges ranges) { return Math.min( complexityWordFoldedHelper(ranges), 1 + complexityWordFoldedHelper( CharRanges.ALL_CODE_UNITS.difference(ranges))); }