Refine search
private boolean substr(byte[] srcBytes, int offset, int range, ImmutableBytesWritable outPtr) { Matcher matcher = pattern.matcher(srcBytes, 0, range); boolean ret = matcher.search(offset, range, Option.DEFAULT) >= 0; if (ret) { int len = matcher.getEnd() - matcher.getBegin(); outPtr.set(srcBytes, matcher.getBegin(), len); } else { outPtr.set(ByteUtil.EMPTY_BYTE_ARRAY); } return ret; }
protected void bench(String _reg, String _str, int warmup, int times) throws Exception { byte[] reg = _reg.getBytes(); byte[] str = _str.getBytes(); Regex p = new Regex(reg,0,reg.length,Option.DEFAULT,ASCIIEncoding.INSTANCE,Syntax.DEFAULT); System.err.println("::: /" + _reg + "/ =~ \"" + _str + "\", " + warmup + " * " + times + " times"); for(int j=0;j<warmup;j++) { long before = System.currentTimeMillis(); for(int i = 0; i < times; i++) { p.matcher(str, 0, str.length).search(0, str.length, Option.NONE); } long time = System.currentTimeMillis() - before; System.err.println(": " + time + "ms"); } }
public List<Match> matches(String text) throws InterruptedException { List<Match> matches = new ArrayList<>(); byte[] textAsBytes = text.getBytes(StandardCharsets.UTF_8); Matcher matcher = compiledExpression.matcher(textAsBytes); int result = matcher.search(0, textAsBytes.length, Option.MULTILINE); boolean matchNotFound = result == -1; if (matchNotFound) { return null; } if (result == INTERRUPTED) { throw new InterruptedException(); } if (compiledExpression.numberOfNames() == 0) { return matches; } Region region = matcher.getEagerRegion(); for (Iterator<NameEntry> iterator = compiledExpression.namedBackrefIterator(); iterator.hasNext();) { NameEntry entry = iterator.next(); String groupName = extractString(entry.name, entry.nameP, entry.nameEnd); int[] matchNumbers = entry.getBackRefs(); matches.add(match(groupName, region, textAsBytes, matchNumbers)); } return matches; }
private IRubyObject subBangIter(ThreadContext context, Regex pattern, Block block) { int range = value.getBegin() + value.getRealSize(); Matcher matcher = pattern.matcher(value.getUnsafeBytes(), value.getBegin(), range); if (RubyRegexp.matcherSearch(context.runtime, matcher, value.getBegin(), range, Option.NONE) >= 0) { frozenCheck(true); byte[] bytes = value.getUnsafeBytes(); int size = value.getRealSize(); RubyMatchData match = RubyRegexp.createMatchData(context, this, matcher, pattern); context.setBackRef(match); RubyString repl = objAsString(context, block.yield(context, makeShared(context.runtime, matcher.getBegin(), matcher.getEnd() - matcher.getBegin()))); modifyCheck(bytes, size); frozenCheck(true); context.setBackRef(match); return subBangCommon(context, pattern, matcher, repl, repl.flags); } else { return context.setBackRef(context.runtime.getNil()); } }
@Override public int compareTo(byte[] value, int offset, int length) { // Use subsequence match instead of full sequence match to adhere to the // principle of least surprise. Matcher m = pattern.matcher(value); return m.search(offset, length, pattern.getOptions()) < 0 ? 1 : 0; }
public Region match(String str, int from) { byte[] strBytes = str.getBytes(); Matcher matcher = this.pattern.matcher(strBytes, 0, strBytes.length); if (matcher.search(from, strBytes.length, 0) >= 0) { return matcher.getEagerRegion(); } return null; }
int begin = value.getBegin(); int range = begin + value.getRealSize(); final Matcher matcher = prepared.matcher(bytes, begin, range); if (pattern.numberOfCaptures() == 0) { while (RubyRegexp.matcherSearch(runtime, matcher, begin + end, range, Option.NONE) >= 0) { end = positionEnd(matcher, enc, begin, range); RubyString substr = makeShared19(runtime, matcher.getBegin(), matcher.getEnd() - matcher.getBegin()); substr.infectBy(tuFlags); ary.append(substr);
private IRubyObject scanOnceNG(RubyRegexp regex, Matcher matcher, int range) { if (matcher.search(matcher.value + value.begin, range, Option.NONE) >= 0) { int end = matcher.getEnd(); if (matcher.getBegin() == end) { if (value.realSize > end) { matcher.value = end + regex.getPattern().getEncoding().length(value.bytes[value.begin + end]); } else { matcher.value = end + 1; } } else { matcher.value = end; } return substr(matcher.getBegin(), end - matcher.getBegin()).infectBy(regex); } return null; }
static RubyString regsub19(RubyString str, RubyString src, Matcher matcher, Regex pattern) { Region regs = matcher.getRegion(); case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (pattern.noNameGroupIsActive(Syntax.RUBY)) { no = c - '0'; break; no = pattern.nameToBackrefNumber(bytes, name, nameEnd, regs); } catch (JOniException je) { throw str.getRuntime().newIndexError(je.getMessage()); break; case '`': val.cat( srcbs.getUnsafeBytes(), srcbs.getBegin(), matcher.getBegin(), srcEnc); continue; case '\'': val.cat(srcbs.getUnsafeBytes(), srcbs.getBegin() + matcher.getEnd(), srcbs.getRealSize() - matcher.getEnd(), srcEnc); continue; case '+': if (regs == null) { if (matcher.getBegin() == -1) { no = 0; continue; if (no != 0 || matcher.getBegin() == -1) continue; val.cat(srcbs.getUnsafeBytes(), srcbs.getBegin() + matcher.getBegin(), matcher.getEnd() - matcher.getBegin(), srcEnc);
protected void handleFileEncodingComment(ByteList encodingLine) throws IOException { int realSize = encodingLine.getRealSize(); int begin = encodingLine.getBegin(); Matcher matcher = encodingRegexp.matcher(encodingLine.getUnsafeBytes(), begin, begin + realSize); int result = RubyRegexp.matcherSearch(parserSupport.getConfiguration().getRuntime(), matcher, begin, begin + realSize, Option.IGNORECASE); if (result < 0) return; int begs[] = matcher.getRegion().beg; int ends[] = matcher.getRegion().end; setEncoding(new ByteList(encodingLine.getUnsafeBytes(), begs[1], ends[1] - begs[1])); }
private IRubyObject scanNoIter(ThreadContext context, Regex pattern, Matcher matcher, Encoding enc, int begin, int range, int tuFlags) { Ruby runtime = context.runtime; RubyArray ary = runtime.newArray(); int end = 0; if (pattern.numberOfCaptures() == 0) { while (RubyRegexp.matcherSearch(runtime, matcher, begin + end, range, Option.NONE) >= 0) { end = positionEnd(matcher, enc, begin, range); RubyString substr = makeShared(runtime, matcher.getBegin(), matcher.getEnd() - matcher.getBegin()); substr.infectBy(tuFlags); ary.append(substr); } } else { while (RubyRegexp.matcherSearch(runtime, matcher, begin + end, range, Option.NONE) >= 0) { end = positionEnd(matcher, enc, begin, range); ary.append(populateCapturesForScan(runtime, matcher, range, tuFlags, false)); } } if (ary.size() > 0) { RubyMatchData match = RubyRegexp.createMatchData(context, this, matcher, pattern); match.infectBy(tuFlags); context.setBackRef(match); } else { context.setBackRef(runtime.getNil()); } return ary; }
private IRubyObject scanOnce(RubyRegexp regex, Matcher matcher, int range) { if (matcher.search(matcher.value + value.begin, range, Option.NONE) >= 0) { Region region = matcher.getRegion(); int end = region.end[0]; if (region.beg[0] == end) { if (value.realSize > end) { matcher.value = end + regex.getPattern().getEncoding().length(value.bytes[value.begin + end]); } else { matcher.value = end + 1; } } else { matcher.value = end; } RubyArray result = getRuntime().newArray(region.numRegs); for (int i=1; i<region.numRegs; i++) { int beg = region.beg[i]; if (beg == -1) { result.append(getRuntime().getNil()); } else { result.append(substr(beg, region.end[i] - beg).infectBy(regex)); } } return result; } return null; }
@Override public boolean find(int start) { return update(matcher.search(byteIndex(start), byteLength, Option.NONE)); }
if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return FAILED; // mismatch_no_msa; } else if ((regex.anchor & AnchorType.SEMI_END_BUF) != 0) { int preEnd = enc.stepBack(bytes, str, end, end, 1); if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return FAILED; // mismatch_no_msa; if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return FAILED; // mismatch_no_msa; s = start = str; prev = -1; msaInit(option, start); if (matchCheck(end, s, prev, interrupt)) return match(s); return mismatch(); msaInit(option, origStart); if (Config.USE_CEC) { int offset = Math.min(start, range) - str; if ((end - start) < regex.thresholdLength) return mismatch(); if (!forwardSearchRange(bytes, str, end, s, schRange, this)) return mismatch(); // low, high, lowPrev if (s < low) { s = low; if (matchCheck(origRange, s, prev, interrupt)) return match(s); // ??? prev = s; s += enc.length(bytes, s, end);
@Override public Integer run(ThreadContext context, Matcher matcher) throws InterruptedException { return match ? matcher.matchInterruptible(start, range, option) : matcher.searchInterruptible(start, range, option); }
private final int mismatch() { if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { if (msaBestLen >= 0) { int s = msaBestS; return match(s); } } // falls through finish: return FAILED; }