com.ociweb.pronghorn.components.ingestion.csv java code examples

@Override
public void run() {
  //TODO: AAA, upgrade to non blocking
  
    collectUntilEndOfStream(this);  
  
}

@Override
public void shutdown() {
  
  endOfData(outputRing);
}

public void run() {
  
    //read from the byte stream which is already chunked by lines
     readData(this, inputRing, outputRing);
}

  writeUInt(typeExtractor, output);
  break;
case TypeExtractor.TYPE_SINT:
  writeInt(typeExtractor, output);
  break;
case TypeExtractor.TYPE_ULONG:
  writeULong(typeExtractor, output);
  break;
case TypeExtractor.TYPE_SLONG:
  writeLong(typeExtractor, output); 	
  break;
case TypeExtractor.TYPE_ASCII:
  writeASCIISplit(data, offset1, length1, offset2, length2, output);			    			    	
  break; 
case TypeExtractor.TYPE_BYTES:
  writeBytesSplit(data, offset1, length1, offset2, length2, output);		    	
  break; 
case TypeExtractor.TYPE_DECIMAL:
  writeDecimal(typeExtractor, output);			    			    	
  break;
case TypeExtractor.TYPE_NULL:
  writeNull(output);				
  break;

    writeUInt(typeExtractor, output);
  break;
case TypeExtractor.TYPE_SINT:
    writeInt(typeExtractor, output);					
  break;
case TypeExtractor.TYPE_ULONG:
    writeULong(typeExtractor, output);	    			
  break;
case TypeExtractor.TYPE_SLONG:
    writeLong(typeExtractor, output); 				
  break;
case TypeExtractor.TYPE_ASCII:
    writeASCII(data, offset, length, output);
  break; 
case TypeExtractor.TYPE_BYTES:
    writeBytes(data, offset, length, output);		    	
  break; 
case TypeExtractor.TYPE_DECIMAL:
    writeDecimal(typeExtractor, output);		    			    	
  break;
case TypeExtractor.TYPE_NULL:
   writeNull(output);				
  break;

LineSplitterByteBufferStage lineSplitter = new LineSplitterByteBufferStage(gm, data, linesRing);
lineSplitter.startup();
lineSplitter.blockingRun();
lineSplitter.shutdown();
FieldSplitterStage fieldSplitter = new FieldSplitterStage(gm, linesRing, fieldsRing);
fieldSplitter.startup();
fieldSplitter.run();
fieldSplitter.shutdown();

LineSplitterByteBufferStage lineSplitter = new LineSplitterByteBufferStage(gm, data, linesRing);
lineSplitter.startup();
lineSplitter.run();
lineSplitter.shutdown();
RingStreams.visitBytes(linesRing, buildLineTestingVisitor());

public void blockingRun() {
  do {
    shutdownPosition = parseSingleByteBuffer(this, activeByteBuffer);			    	
  } while (shutdownPosition<activeByteBuffer.limit());
  resetForNextByteBuffer(this);	
  Pipe.publishAllBatchedWrites(outputRing);
  requestShutdown();
}

LineSplitterByteBufferStage lineSplitter = new LineSplitterByteBufferStage(gm, data, linesRing);
FieldSplitterStage fieldSplitter = new FieldSplitterStage(gm, linesRing, fieldsRing);
MetaMessagesToCSVStage csvBuilderStage = new MetaMessagesToCSVStage(gm, fieldsRing, flatFileRing);
FileChannel outputFileChannel;
try {
  throw new RuntimeException(e1);
FileWriteStage fileWriter = new FileWriteStage(gm, flatFileRing, outputFileChannel);

@Override
public void run() {
  if (hasRun) {
    requestShutdown();
    return;
    do {
      startPos = pos;
      resetForNextByteBuffer(this);
        bytesRead = parseSingleByteBuffer(this, map);
      } while (bytesRead<map.limit());
    shutdownPosition = bytesRead;
    requestShutdown();
    log.trace("shutdown the line splitter");
    hasRun = true;

byte[] data = byteBackingArray(meta, inputRing);
beginningOfLine(outputRing);
  consumeBytes(stage.typeExtractor, outputRing, data, pos, len); 
} else {			
  consumeBytes(stage.typeExtractor, outputRing, data, pos, len1, 0, len - len1);	
endOfLine(outputRing);

ByteBuffer data = generateCVSData(21);		    
int dataSize = data.limit();
  LineSplitterByteBufferStage lineSplitter = new LineSplitterByteBufferStage(gm, data, linesRing);
    assertTrue("Missed on "+t+" vs "+(t+1)+" at idx"+mismatchAt(last,results)+"\n",
          Arrays.equals(last,  results));

private static void consumeField(int fieldIdx, TypeExtractor typeExtractor, Pipe output, byte[] data, int offset, int length) {
  TypeExtractor.resetFieldSum(typeExtractor);
  TypeExtractor.appendContent(typeExtractor, data, offset, offset+length);
          
  //NOTE: in this case fieldIdx is not used however it holds the column number starting with zero
  
//	System.err.println("xxxxx "+new String(data,offset,length));
  
  
  writeMetaMessage(typeExtractor, data, offset, length, output);
  
  
  //TODO: As an alternate implementation we can 
  //            * Open a message of type X in beginningOfLine() ByteBuffer.addMessageIDx(x)
  //            * For every call including zero lookup the type and use they ByteBuffer.add XXX
  //            * change endOfData to use RingBuffer.publishEOF(ring);
  //            * publish in the endOfLine() method.
  //TODO: The best approach would be to have these code generated from the template file  - see JavaPoet See YF
  //        using the high level API to write would allow us to write the fields in any order that they arrive.
  //      WAIT: may not need code generation. If we pass in array of strings that represent the fields in order
  //         Then on startup convert those strings to an array of LOCs
  //         then on parse use fieldIdx to look up the LOC and type to do the "right thing" 
  //             the switch would be a conditional that could be removed by code generation.... but only if needed.
  
  
}

private static void consumeBytes(TypeExtractor typeExtractor, Pipe output, byte[] data, int offset, int length) {
      
  
  int fieldIdx = 0;
  byte prevB = 0;
  int quoteCount = 0;
  int i = offset;
  int fieldStart = offset;
  int lineStop = length+offset;
  while (i<lineStop) {			
    if (',' == data[i] && '\\'!=prevB && (quoteCount&1)==0) {
      //send the new field as a message up to this point.				
      consumeField(fieldIdx++, typeExtractor, output, data, fieldStart, i-fieldStart);
      fieldStart = i+1;
    }
    prevB = data[i++];
    quoteCount += quoter[0xFF&prevB];
  }
  //last field at the end of length
  consumeField(fieldIdx++, typeExtractor, output, data, fieldStart, lineStop-fieldStart);
      
}

/**
 *
 * @param graphManager
 * @param sourceByteBuffer
 * @param outputRing _out_ Resulting byte buffer ring.
 */
public LineSplitterByteBufferStage(GraphManager graphManager, ByteBuffer sourceByteBuffer, Pipe outputRing) {
  super(graphManager, NONE, outputRing);
  this.activeByteBuffer=sourceByteBuffer;
  
  this.outputRing=outputRing;
  
  if (Pipe.from(outputRing) != RawDataSchema.FROM) {
    throw new UnsupportedOperationException("This class can only be used with the very simple RAW_BYTES catalog of messages.");
  }
  
  stepSize = RawDataSchema.FROM.fragDataSize[0];
  
  //NOTE: this block has constants that could be moved up and out
  quoter = new byte[256]; //these are all zeros
  quoter['"'] = 1; //except for the value of quote.						
  if (outputRing.maxVarLen<1) {
    throw new UnsupportedOperationException();
  }
  resetForNextByteBuffer(this);
}

private ByteBuffer generateCVSData(int bits) {
  int size = 1<<bits;
  ByteBuffer target = ByteBuffer.allocate(size);
  
  int i = 0;
  byte[] bytes = buildLine(i);		
  while (target.remaining() > bytes.length) {
    target.put(bytes);
    bytes = buildLine(++i);
  }
  ((Buffer)target).flip();
  return target;
}

if (isEOL(b) ) {

@Override
public void run() {
      shutdownPosition = parseSingleByteBuffer(this, activeByteBuffer);			    	
      if (shutdownPosition>=activeByteBuffer.limit()) {
        resetForNextByteBuffer(this);
        Pipe.publishAllBatchedWrites(outputRing);
        requestShutdown();
      }
    
}

private static void consumeField(int fieldIdx, TypeExtractor typeExtractor, Pipe output, byte[] data, int offset1, int length1, int offset2, int length2) {
  TypeExtractor.resetFieldSum(typeExtractor);
  assert(length1>=0) : "bad length "+length1;
  TypeExtractor.appendContent(typeExtractor, data, offset1, offset1+length1);
  assert(length2>=0) : "bad length "+length2;
  TypeExtractor.appendContent(typeExtractor, data, offset2, offset2+length2);
      
  //NOTE: in this case fieldIdx is not used however it holds the column number starting with zero
      
  writeMetaMessage(typeExtractor, data, offset1, length1, offset2, length2, output);
}

if (',' == data[i] && '\\'!=prevB) {
  consumeField(fieldIdx++, typeExtractor, output, data, fieldStart, i-fieldStart);
  fieldStart = i+1;
    consumeField(fieldIdx++, typeExtractor, output, data, fieldStart, i-fieldStart);
  } else {
    consumeField(fieldIdx++, typeExtractor, output, data, lastStart, lastStop-lastStart, fieldStart, i-fieldStart);
  consumeField(fieldIdx++, typeExtractor, output, data, fieldStart, lineStop-fieldStart);	
} else {
  consumeField(fieldIdx++, typeExtractor, output, data, lastStart, lastStop-lastStart, fieldStart, lineStop-fieldStart);			
  if (',' == data[i] && '\\'!=prevB && (quoteCount&1)==0) {
    consumeField(fieldIdx++, typeExtractor, output, data, fieldStart, i-fieldStart);
    fieldStart = i+1;
consumeField(fieldIdx++, typeExtractor, output, data, fieldStart, lineStop-fieldStart);

How to use com.ociweb.pronghorn.components.ingestion.csv

Best Java code snippets using com.ociweb.pronghorn.components.ingestion.csv (Showing top 20 results out of 315)