@Override public void run() { //TODO: AAA, upgrade to non blocking collectUntilEndOfStream(this); }
@Override public void shutdown() { endOfData(outputRing); }
public void run() { //read from the byte stream which is already chunked by lines readData(this, inputRing, outputRing); }
writeUInt(typeExtractor, output); break; case TypeExtractor.TYPE_SINT: writeInt(typeExtractor, output); break; case TypeExtractor.TYPE_ULONG: writeULong(typeExtractor, output); break; case TypeExtractor.TYPE_SLONG: writeLong(typeExtractor, output); break; case TypeExtractor.TYPE_ASCII: writeASCIISplit(data, offset1, length1, offset2, length2, output); break; case TypeExtractor.TYPE_BYTES: writeBytesSplit(data, offset1, length1, offset2, length2, output); break; case TypeExtractor.TYPE_DECIMAL: writeDecimal(typeExtractor, output); break; case TypeExtractor.TYPE_NULL: writeNull(output); break;
writeUInt(typeExtractor, output); break; case TypeExtractor.TYPE_SINT: writeInt(typeExtractor, output); break; case TypeExtractor.TYPE_ULONG: writeULong(typeExtractor, output); break; case TypeExtractor.TYPE_SLONG: writeLong(typeExtractor, output); break; case TypeExtractor.TYPE_ASCII: writeASCII(data, offset, length, output); break; case TypeExtractor.TYPE_BYTES: writeBytes(data, offset, length, output); break; case TypeExtractor.TYPE_DECIMAL: writeDecimal(typeExtractor, output); break; case TypeExtractor.TYPE_NULL: writeNull(output); break;
LineSplitterByteBufferStage lineSplitter = new LineSplitterByteBufferStage(gm, data, linesRing); lineSplitter.startup(); lineSplitter.blockingRun(); lineSplitter.shutdown(); FieldSplitterStage fieldSplitter = new FieldSplitterStage(gm, linesRing, fieldsRing); fieldSplitter.startup(); fieldSplitter.run(); fieldSplitter.shutdown();
LineSplitterByteBufferStage lineSplitter = new LineSplitterByteBufferStage(gm, data, linesRing); lineSplitter.startup(); lineSplitter.run(); lineSplitter.shutdown(); RingStreams.visitBytes(linesRing, buildLineTestingVisitor());
public void blockingRun() { do { shutdownPosition = parseSingleByteBuffer(this, activeByteBuffer); } while (shutdownPosition<activeByteBuffer.limit()); resetForNextByteBuffer(this); Pipe.publishAllBatchedWrites(outputRing); requestShutdown(); }
LineSplitterByteBufferStage lineSplitter = new LineSplitterByteBufferStage(gm, data, linesRing); FieldSplitterStage fieldSplitter = new FieldSplitterStage(gm, linesRing, fieldsRing); MetaMessagesToCSVStage csvBuilderStage = new MetaMessagesToCSVStage(gm, fieldsRing, flatFileRing); FileChannel outputFileChannel; try { throw new RuntimeException(e1); FileWriteStage fileWriter = new FileWriteStage(gm, flatFileRing, outputFileChannel);
@Override public void run() { if (hasRun) { requestShutdown(); return; do { startPos = pos; resetForNextByteBuffer(this); bytesRead = parseSingleByteBuffer(this, map); } while (bytesRead<map.limit()); shutdownPosition = bytesRead; requestShutdown(); log.trace("shutdown the line splitter"); hasRun = true;
byte[] data = byteBackingArray(meta, inputRing); beginningOfLine(outputRing); consumeBytes(stage.typeExtractor, outputRing, data, pos, len); } else { consumeBytes(stage.typeExtractor, outputRing, data, pos, len1, 0, len - len1); endOfLine(outputRing);
ByteBuffer data = generateCVSData(21); int dataSize = data.limit(); LineSplitterByteBufferStage lineSplitter = new LineSplitterByteBufferStage(gm, data, linesRing); assertTrue("Missed on "+t+" vs "+(t+1)+" at idx"+mismatchAt(last,results)+"\n", Arrays.equals(last, results));
private static void consumeField(int fieldIdx, TypeExtractor typeExtractor, Pipe output, byte[] data, int offset, int length) { TypeExtractor.resetFieldSum(typeExtractor); TypeExtractor.appendContent(typeExtractor, data, offset, offset+length); //NOTE: in this case fieldIdx is not used however it holds the column number starting with zero // System.err.println("xxxxx "+new String(data,offset,length)); writeMetaMessage(typeExtractor, data, offset, length, output); //TODO: As an alternate implementation we can // * Open a message of type X in beginningOfLine() ByteBuffer.addMessageIDx(x) // * For every call including zero lookup the type and use they ByteBuffer.add XXX // * change endOfData to use RingBuffer.publishEOF(ring); // * publish in the endOfLine() method. //TODO: The best approach would be to have these code generated from the template file - see JavaPoet See YF // using the high level API to write would allow us to write the fields in any order that they arrive. // WAIT: may not need code generation. If we pass in array of strings that represent the fields in order // Then on startup convert those strings to an array of LOCs // then on parse use fieldIdx to look up the LOC and type to do the "right thing" // the switch would be a conditional that could be removed by code generation.... but only if needed. }
private static void consumeBytes(TypeExtractor typeExtractor, Pipe output, byte[] data, int offset, int length) { int fieldIdx = 0; byte prevB = 0; int quoteCount = 0; int i = offset; int fieldStart = offset; int lineStop = length+offset; while (i<lineStop) { if (',' == data[i] && '\\'!=prevB && (quoteCount&1)==0) { //send the new field as a message up to this point. consumeField(fieldIdx++, typeExtractor, output, data, fieldStart, i-fieldStart); fieldStart = i+1; } prevB = data[i++]; quoteCount += quoter[0xFF&prevB]; } //last field at the end of length consumeField(fieldIdx++, typeExtractor, output, data, fieldStart, lineStop-fieldStart); }
/** * * @param graphManager * @param sourceByteBuffer * @param outputRing _out_ Resulting byte buffer ring. */ public LineSplitterByteBufferStage(GraphManager graphManager, ByteBuffer sourceByteBuffer, Pipe outputRing) { super(graphManager, NONE, outputRing); this.activeByteBuffer=sourceByteBuffer; this.outputRing=outputRing; if (Pipe.from(outputRing) != RawDataSchema.FROM) { throw new UnsupportedOperationException("This class can only be used with the very simple RAW_BYTES catalog of messages."); } stepSize = RawDataSchema.FROM.fragDataSize[0]; //NOTE: this block has constants that could be moved up and out quoter = new byte[256]; //these are all zeros quoter['"'] = 1; //except for the value of quote. if (outputRing.maxVarLen<1) { throw new UnsupportedOperationException(); } resetForNextByteBuffer(this); }
if (isEOL(b) ) {
@Override public void run() { shutdownPosition = parseSingleByteBuffer(this, activeByteBuffer); if (shutdownPosition>=activeByteBuffer.limit()) { resetForNextByteBuffer(this); Pipe.publishAllBatchedWrites(outputRing); requestShutdown(); } }
private static void consumeField(int fieldIdx, TypeExtractor typeExtractor, Pipe output, byte[] data, int offset1, int length1, int offset2, int length2) { TypeExtractor.resetFieldSum(typeExtractor); assert(length1>=0) : "bad length "+length1; TypeExtractor.appendContent(typeExtractor, data, offset1, offset1+length1); assert(length2>=0) : "bad length "+length2; TypeExtractor.appendContent(typeExtractor, data, offset2, offset2+length2); //NOTE: in this case fieldIdx is not used however it holds the column number starting with zero writeMetaMessage(typeExtractor, data, offset1, length1, offset2, length2, output); }
if (',' == data[i] && '\\'!=prevB) { consumeField(fieldIdx++, typeExtractor, output, data, fieldStart, i-fieldStart); fieldStart = i+1; consumeField(fieldIdx++, typeExtractor, output, data, fieldStart, i-fieldStart); } else { consumeField(fieldIdx++, typeExtractor, output, data, lastStart, lastStop-lastStart, fieldStart, i-fieldStart); consumeField(fieldIdx++, typeExtractor, output, data, fieldStart, lineStop-fieldStart); } else { consumeField(fieldIdx++, typeExtractor, output, data, lastStart, lastStop-lastStart, fieldStart, lineStop-fieldStart); if (',' == data[i] && '\\'!=prevB && (quoteCount&1)==0) { consumeField(fieldIdx++, typeExtractor, output, data, fieldStart, i-fieldStart); fieldStart = i+1; consumeField(fieldIdx++, typeExtractor, output, data, fieldStart, lineStop-fieldStart);