@Override public Comparator<byte[]> getComparator( Class<byte[]> type ) { return new BytesComparator(); } }
@Override public int compare( BufferedInputStream lhsStream, BufferedInputStream rhsStream ) { byte[] lhs = lhsStream.getBuffer(); int lhsPos = lhsStream.getPosition(); int lhsLen = readLen( lhs, lhsPos ); lhsStream.skip( lhsLen + 4 ); byte[] rhs = rhsStream.getBuffer(); int rhsPos = rhsStream.getPosition(); int rhsLen = readLen( rhs, rhsPos ); rhsStream.skip( rhsLen + 4 ); return WritableComparator.compareBytes( lhs, lhsPos + 4, lhsLen, rhs, rhsPos + 4, rhsLen ); }
@Override public int compare( BufferedInputStream lhsStream, BufferedInputStream rhsStream ) { byte[] lhs = lhsStream.getBuffer(); int lhsPos = lhsStream.getPosition(); int lhsLen = readLen( lhs, lhsPos ); lhsStream.skip( lhsLen + 4 ); byte[] rhs = rhsStream.getBuffer(); int rhsPos = rhsStream.getPosition(); int rhsLen = readLen( rhs, rhsPos ); rhsStream.skip( rhsLen + 4 ); return WritableComparator.compareBytes( lhs, lhsPos + 4, lhsLen, rhs, rhsPos + 4, rhsLen ); }
@Override public Comparator<byte[]> getComparator( Class<byte[]> type ) { return new BytesComparator(); } }
valueFields.setComparator( "value", new BytesComparator() );
valueFields.setComparator( "value", new BytesComparator() );
@Test public void testSimpleGroupOnBytes() throws Exception { getPlatform().copyFromLocal( inputFileApache ); Tap source = new Hfs( new TextLine( new Fields( "offset", "line" ) ), inputFileApache ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) ); pipe = new Each( pipe, new InsertRawBytes( new Fields( "bytes" ), "inserted text as bytes", true, true ), Fields.ALL ); Fields bytes = new Fields( "bytes" ); bytes.setComparator( "bytes", new BytesComparator() ); pipe = new GroupBy( pipe, bytes ); pipe = new Every( pipe, new Count(), new Fields( "bytes", "count" ) ); Tap sink = new Hfs( new SequenceFile( Fields.ALL ), getOutputPath( "grouponbytes" ), SinkMode.REPLACE ); Map<Object, Object> properties = getProperties(); TupleSerializationProps.addSerialization( properties, BytesSerialization.class.getName() ); Flow flow = getPlatform().getFlowConnector( properties ).connect( source, sink, pipe ); flow.complete(); validateLength( flow, 10 ); // 10 unique counts }
@Test public void testSimpleGroupOnBytes() throws Exception { getPlatform().copyFromLocal( inputFileApache ); Tap source = new Hfs( new TextLine( new Fields( "offset", "line" ) ), inputFileApache ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) ); pipe = new Each( pipe, new InsertRawBytes( new Fields( "bytes" ), "inserted text as bytes", true, true ), Fields.ALL ); Fields bytes = new Fields( "bytes" ); bytes.setComparator( "bytes", new BytesComparator() ); pipe = new GroupBy( pipe, bytes ); pipe = new Every( pipe, new Count(), new Fields( "bytes", "count" ) ); Tap sink = new Hfs( new SequenceFile( Fields.ALL ), getOutputPath( "grouponbytes" ), SinkMode.REPLACE ); Map<Object, Object> properties = getProperties(); TupleSerializationProps.addSerialization( properties, BytesSerialization.class.getName() ); Flow flow = getPlatform().getFlowConnector( properties ).connect( source, sink, pipe ); flow.complete(); validateLength( flow, 10 ); // 10 unique counts }