@Override public PatternReplaceFilter create(TokenStream input) { return new PatternReplaceFilter(input, pattern, replacement, replaceAll); } }
/** * Split the input using configured pattern */ @Override public PatternTokenizer create(final AttributeFactory factory) { return new PatternTokenizer(factory, pattern, group); } }
@Override public SimplePatternSplitTokenizer create(final AttributeFactory factory) { return new SimplePatternSplitTokenizer(factory, dfa); } }
@Override public PatternCaptureGroupTokenFilter create(TokenStream input) { return new PatternCaptureGroupTokenFilter(input, preserveOriginal, pattern); } }
@Override public CharFilter create(Reader input) { return new PatternReplaceCharFilter(pattern, replacement, input); }
/** Creates a new PatternTokenizerFactory */ public PatternTokenizerFactory(Map<String,String> args) { super(args); pattern = getPattern(args, PATTERN); group = getInt(args, GROUP, -1); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } }
/** Creates a new PatternReplaceCharFilterFactory */ public PatternReplaceCharFilterFactory(Map<String, String> args) { super(args); pattern = getPattern(args, "pattern"); replacement = get(args, "replacement", ""); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } }
@Override public int read(char[] cbuf, int off, int len) throws IOException { // Buffer all input on the first call. if (transformedInput == null) { fill(); } return transformedInput.read(cbuf, off, len); }
@Override public SimplePatternTokenizer create(final AttributeFactory factory) { return new SimplePatternTokenizer(factory, dfa); } }
public PatternCaptureGroupFilterFactory(Map<String,String> args) { super(args); pattern = getPattern(args, "pattern"); preserveOriginal = args.containsKey("preserve_original") ? Boolean.parseBoolean(args.get("preserve_original")) : true; } @Override
@Override public void reset() throws IOException { super.reset(); fillBuffer(input); matcher.reset(str); index = 0; }
@Override public void end() throws IOException { super.end(); final int ofs = correctOffset(str.length()); offsetAtt.setOffset(ofs, ofs); }
private void fillToken(int offsetStart) { termAtt.setLength(tokenUpto); offsetAtt.setOffset(correctOffset(offsetStart), correctOffset(offsetStart+tokenUpto)); }
private int nextCodePoint() throws IOException { int ch = nextCodeUnit(); if (ch == -1) { return ch; } if (Character.isHighSurrogate((char) ch)) { return Character.toCodePoint((char) ch, (char) nextCodeUnit()); } else { return ch; } } }
private int nextCodePoint() throws IOException { int ch = nextCodeUnit(); if (ch == -1) { return ch; } if (Character.isHighSurrogate((char) ch)) { return Character.toCodePoint((char) ch, (char) nextCodeUnit()); } else { return ch; } } }
private int nextCodeUnit() throws IOException { int result; if (pendingUpto < pendingLimit) { result = pendingChars[pendingUpto++]; if (pendingUpto == pendingLimit) { // We used up the pending buffer pendingUpto = 0; pendingLimit = 0; } appendToToken((char) result); offset++; } else if (bufferLimit == -1) { return -1; } else { assert bufferNextRead <= bufferLimit: "bufferNextRead=" + bufferNextRead + " bufferLimit=" + bufferLimit; if (bufferNextRead == bufferLimit) { bufferLimit = input.read(buffer, 0, buffer.length); if (bufferLimit == -1) { return -1; } bufferNextRead = 0; } result = buffer[bufferNextRead++]; offset++; appendToToken((char) result); } return result; }
private int nextCodeUnit() throws IOException { int result; if (pendingUpto < pendingLimit) { result = pendingChars[pendingUpto++]; if (pendingUpto == pendingLimit) { // We used up the pending buffer pendingUpto = 0; pendingLimit = 0; } appendToToken((char) result); offset++; } else if (bufferLimit == -1) { return -1; } else { assert bufferNextRead <= bufferLimit: "bufferNextRead=" + bufferNextRead + " bufferLimit=" + bufferLimit; if (bufferNextRead == bufferLimit) { bufferLimit = input.read(buffer, 0, buffer.length); if (bufferLimit == -1) { return -1; } bufferNextRead = 0; } result = buffer[bufferNextRead++]; offset++; appendToToken((char) result); } return result; }
@Override public void end() throws IOException { super.end(); final int ofs = correctOffset(offset + pendingLimit - pendingUpto); offsetAtt.setOffset(ofs, ofs); }
@Override public int read() throws IOException { if (transformedInput == null) { fill(); } return transformedInput.read(); }
@Override public void end() throws IOException { super.end(); final int ofs = correctOffset(offset + pendingLimit - pendingUpto); offsetAtt.setOffset(ofs, ofs); }