/** * Will compute the next <i>offset info</i> for a text line (line terminated * by either '\r', '\n' or '\r\n'). <br> * The <i>offset info</i> computed and returned as {@link OffsetInfo} where * {@link OffsetInfo#isStartsWithMatch()} will always return true. * * @return offset info */ public OffsetInfo nextOffsetInfo() throws IOException { return this.nextOffsetInfo(null); }
/** * Constructs an instance of demarcator with provided {@link InputStream} * and max buffer size and initial buffer size. Each demarcated token must * fit within max buffer size, otherwise the exception will be raised. */ AbstractDemarcator(InputStream is, int maxDataSize, int initialBufferSize) { this.validate(is, maxDataSize, initialBufferSize); this.is = is; this.initialBufferSize = initialBufferSize; this.buffer = new byte[initialBufferSize]; this.maxDataSize = maxDataSize; }
/** * Constructs an instance of demarcator with provided {@link InputStream} * and max buffer size and initial buffer size. Each demarcated token must * fit within max buffer size, otherwise the exception will be raised. */ AbstractTextDemarcator(Reader reader, int maxDataSize, int initialBufferSize) { this.validate(reader, maxDataSize, initialBufferSize); this.reader = reader; this.buffer = new char[initialBufferSize]; this.maxDataSize = maxDataSize; }
try (final StreamDemarcator demarcator = new StreamDemarcator(flowFileContent, demarcatorBytes, maxMessageSize)) { while ((messageContent = demarcator.nextToken()) != null) { publish(flowFile, messageKey, messageContent, topic, tracker);
final List<NonThreadSafeCircularBuffer> circularBuffers = new ArrayList<>(); for (final byte[] stopper : stoppers) { final NonThreadSafeCircularBuffer circularBuffer = new NonThreadSafeCircularBuffer(stopper); if (stopper.length > longest) { longest = stopper.length; if (circ.addAndCompare((byte) next)) { final int bytesToCopy = longest - circ.getByteArray().length; for (int i = 0; i < bytesToCopy; i++) { final int oldestByte = longestBuffer.getOldestByte(); if (oldestByte != -1) { out.write(oldestByte); longestBuffer.addAndCompare((byte) 0); return circ.getByteArray(); if (longestBuffer.isFilled()) { out.write(longestBuffer.getOldestByte());
OffsetInfo previousOffsetInfo = null; long lastCrlfLength = 0; while ((offsetInfo = demarcator.nextOffsetInfo(startsWithFilter)) != null) { lastCrlfLength = offsetInfo.getCrlfLength(); if (startsWithFilter != null && !offsetInfo.isStartsWithMatch()) { if (offsetInfo.getCrlfLength() != -1) { previousOffsetInfo = offsetInfo; if (length + offsetInfo.getLength() > this.maxSplitSize) { throw new IllegalStateException( "Computing header resulted in header size being > MAX split size of " + this.maxSplitSize + "."); } else { length += offsetInfo.getLength(); actualLineCount++; if (actualLineCount == splitMaxLineCount) {
circularBuffers.add(new NonThreadSafeCircularBuffer(stopper)); if (circ.addAndCompare((byte) next)) { return circ.getByteArray();
length += remainderSplitInfo.remaningOffsetInfo.getLength(); actualLineCount++; while ((offsetInfo = demarcator.nextOffsetInfo()) != null) { lastCrlfLength = offsetInfo.getCrlfLength(); if (offsetInfo.getLength() == offsetInfo.getCrlfLength()) { trailingCrlfLength += offsetInfo.getCrlfLength(); trailingLineCount++; } else if (offsetInfo.getLength() > offsetInfo.getCrlfLength()) { trailingCrlfLength = 0; // non-empty line came in, thus resetting counter if (length + offsetInfo.getLength() + startingLength > this.maxSplitSize) { if (length == 0) { // single line per split length += offsetInfo.getLength(); actualLineCount++; } else { length += offsetInfo.getLength(); actualLineCount++; if (splitMaxLineCount > 0 && actualLineCount >= splitMaxLineCount) {
while (data == null && this.availableBytesLength != -1) { if (this.index >= this.availableBytesLength) { this.fill(); this.index = i + 1; int size = Math.max(1, this.index - this.mark); offsetInfo = new OffsetInfo(this.offset, size, delimiterSize); this.offset += size; if (startsWith != null) { data = this.extractDataToken(size); int size = this.index - this.mark; if (size > 0) { offsetInfo = new OffsetInfo(this.offset, size, delimiterSize); this.offset += size; data = this.extractDataToken(this.index - this.mark); offsetInfo.setStartsWithMatch(false); } else { for (int i = 0; i < startsWith.length; i++) { byte sB = startsWith[i]; if (sB != data[i]) { offsetInfo.setStartsWithMatch(false); break;
@Override public void process(final InputStream in, final OutputStream out) throws IOException { try (final LineDemarcator demarcator = new LineDemarcator(in, charset, maxBufferSize, 8192); final BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(out, charset))) { String oneLine; while (null != (oneLine = demarcator.nextLine())) { final String updatedValue = replacementValue.concat(oneLine); bw.write(updatedValue); } } } });
while (token == null && this.availableBytesLength != -1) { if (this.index >= this.availableBytesLength) { this.fill(); this.index = i + 1; int size = this.index - this.mark - this.delimiterBytes.length; token = this.extractDataToken(size); this.mark = this.index; j = 0; token = this.extractDataToken(this.index - this.mark);
/** * Constructs a new instance * * @param is * instance of {@link InputStream} representing the data * @param delimiterBytes * byte array representing delimiter bytes used to split the * input stream. Can be 'null'. NOTE: the 'null' is allowed only * for convenience and consistency since without delimiter this * class is no different then BufferedReader which reads the * entire stream into a byte array and there may be a more * efficient ways to do that (if that is the case). * @param maxDataSize * maximum size of data derived from the input stream. This means * that neither {@link InputStream} nor its individual tokens (if * delimiter is used) can ever be greater then this size. * @param initialBufferSize * initial size of the buffer used to buffer {@link InputStream} * or its parts (if delimiter is used) to create its byte[] * representation. Must be positive integer. The buffer will grow * automatically as needed up to the Integer.MAX_VALUE; * */ public StreamDemarcator(InputStream is, byte[] delimiterBytes, int maxDataSize, int initialBufferSize) { super(is, maxDataSize, initialBufferSize); this.validate(delimiterBytes); this.delimiterBytes = delimiterBytes; }
while (this.availableBytesLength != -1) { if (this.index >= this.availableBytesLength) { this.fill();
try (final StreamDemarcator demarcator = new StreamDemarcator(flowFileContent, demarcatorBytes, maxMessageSize)) { while ((messageContent = demarcator.nextToken()) != null) { publish(flowFile, messageKey, messageContent, topic, tracker);
@Override public void process(final InputStream in, final OutputStream out) throws IOException { try (final LineDemarcator demarcator = new LineDemarcator(in, charset, maxBufferSize, 8192); final BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(out, charset))) { String line; while ((line = demarcator.nextLine()) != null) { // We need to determine what line ending was used and use that after our replacement value. lineEndingBuilder.setLength(0); for (int i = line.length() - 1; i >= 0; i--) { final char c = line.charAt(i); if (c == '\r' || c == '\n') { lineEndingBuilder.append(c); } else { break; } } bw.write(replacementValue); // Preserve original line endings. Reverse string because we iterated over original line ending in reverse order, appending to builder. // So if builder has multiple characters, they are now reversed from the original string's ordering. bw.write(lineEndingBuilder.reverse().toString()); } } } });
try (final StreamDemarcator demarcator = new StreamDemarcator(flowFileContent, demarcatorBytes, maxMessageSize)) { while ((messageContent = demarcator.nextToken()) != null) { publish(flowFile, messageKey, messageContent, topic, tracker);
@Override public void process(final InputStream in, final OutputStream out) throws IOException { try (final LineDemarcator demarcator = new LineDemarcator(in, charset, maxBufferSize, 8192); final BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(out, charset))) { String oneLine; while (null != (oneLine = demarcator.nextLine())) { // we need to find the first carriage return or new-line so that we can append the new value // before the line separate. However, we don't want to do this using a regular expression due // to performance concerns. So we will find the first occurrence of either \r or \n and use // that to insert the replacement value. boolean foundNewLine = false; for (int i = 0; i < oneLine.length(); i++) { final char c = oneLine.charAt(i); if (foundNewLine) { bw.write(c); continue; } if (c == '\r' || c == '\n') { bw.write(replacementValue); foundNewLine = true; } bw.write(c); } if (!foundNewLine) { bw.write(replacementValue); } } } } });
try (final StreamDemarcator demarcator = new StreamDemarcator(flowFileContent, demarcatorBytes, maxMessageSize)) { while ((messageContent = demarcator.nextToken()) != null) { publish(flowFile, messageKey, messageContent, topic, tracker);
@Override public void process(final InputStream in, final OutputStream out) throws IOException { try (final LineDemarcator demarcator = new LineDemarcator(in, charset, maxBufferSize, initialBufferSize); final BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(out, charset))) { String oneLine; while (null != (oneLine = demarcator.nextLine())) { int matches = 0; int lastEnd = 0; final Matcher matcher = searchPattern.matcher(oneLine); while (matcher.find()) { bw.write(oneLine, lastEnd, matcher.start() - lastEnd); bw.write(replacementValue); matches++; lastEnd = matcher.end(); } if (matches > 0) { bw.write(oneLine, lastEnd, oneLine.length() - lastEnd); } else { bw.write(oneLine); } } } } });
try (final StreamDemarcator demarcator = new StreamDemarcator(flowFileContent, demarcatorBytes, maxMessageSize)) { while ((messageContent = demarcator.nextToken()) != null) { publish(flowFile, messageKey, messageContent, topic, tracker);