protected int matchEachElementPos( Matcher matcher, TupleEntry input ) { int pos = 0; for( int i = 0; i < input.size(); i++ ) { String value = input.getString( i ); if( value == null ) value = ""; matcher.reset( value ); boolean matchFound = matcher.find(); if( LOG.isDebugEnabled() ) LOG.debug( "pattern: " + getPatternString() + ", matches: " + matchFound + ", element: '" + value + "'" ); if( matchFound == negateMatch ) return pos; pos++; } return -1; }
/** * Method getValue ... * * @param functionCall of FunctionCall<Context> * @return String */ protected String getValue( FunctionCall<Context> functionCall ) { // if one argument if( functionCall.getArguments().size() == 1 ) return functionCall.getArguments().getString( 0 ); // if many arguments Iterator<String> values = functionCall.getArguments().asIterableOf( String.class ).iterator(); StringBuilder result = new StringBuilder(); while( values.hasNext() ) { String next = values.next(); if( next != null ) result.append( next ); } return result.toString(); }
@SuppressWarnings("unchecked") @Override public void sink(FlowProcess<JobConf> fp, SinkCall<Object[], OutputCollector> sc) throws IOException { TupleEntry tuple = sc.getOutgoingEntry(); if (tuple.size() != 1) { throw new RuntimeException("ParquetValueScheme expects tuples with an arity of exactly 1, but found " + tuple.getFields()); } T value = (T) tuple.getObject(0); OutputCollector output = sc.getOutput(); output.collect(null, value); }
@Override public Tuple aggregate( FlowProcess flowProcess, TupleEntry args, Tuple context ) { if( context == null ) context = new Tuple( 0L ); switch( include ) { case ALL: break; case NO_NULLS: if( Tuples.frequency( args, null ) == args.size() ) return context; break; case ONLY_NULLS: if( Tuples.frequency( args, null ) != args.size() ) return context; break; } context.set( 0, context.getLong( 0 ) + 1L ); return context; }
@SuppressWarnings("unchecked") @Override public void sink(FlowProcess<? extends JobConf> fp, SinkCall<Object[], OutputCollector> sc) throws IOException { TupleEntry tuple = sc.getOutgoingEntry(); if (tuple.size() != 1) { throw new RuntimeException("ParquetValueScheme expects tuples with an arity of exactly 1, but found " + tuple.getFields()); } T value = (T) tuple.getObject(0); OutputCollector output = sc.getOutput(); output.collect(null, value); }
protected Node getArgument( OperationCall<?> operationCall ) { TupleEntry arguments = ( (FunctionCall<Object>) operationCall ).getArguments(); Node object = (Node) arguments.getObject( arguments.size() - 1, getCoercibleType() ); return deepCopy( object ); } }
@SuppressWarnings("unchecked") @Override public void sink(FlowProcess<? extends JobConf> fp, SinkCall<Object[], OutputCollector> sc) throws IOException { TupleEntry tuple = sc.getOutgoingEntry(); if (tuple.size() != 1) { throw new RuntimeException("ParquetValueScheme expects tuples with an arity of exactly 1, but found " + tuple.getFields()); } T value = (T) tuple.getObject(0); OutputCollector output = sc.getOutput(); output.collect(null, value); }
@SuppressWarnings("unchecked") @Override public void sink(FlowProcess<JobConf> fp, SinkCall<Object[], OutputCollector> sc) throws IOException { TupleEntry tuple = sc.getOutgoingEntry(); if (tuple.size() != 1) { throw new RuntimeException("ParquetValueScheme expects tuples with an arity of exactly 1, but found " + tuple.getFields()); } T value = (T) tuple.getObject(0); OutputCollector output = sc.getOutput(); output.collect(null, value); }
public void operate( FlowProcess flowProcess, BufferCall<Integer> bufferCall ) { bufferCall.setContext( bufferCall.getContext() + 1 ); TupleEntry group = bufferCall.getGroup(); // if( !group.getFields().equals( groupFields ) ) // throw new RuntimeException( "group fields do not match: " + group.getFields() + " != " + groupFields ); if( group.size() != groupFields.size() ) throw new RuntimeException( "group tuple size not fields size" ); if( group.size() == 0 ) throw new RuntimeException( "group tuple size is zero" ); boolean allAreNull = true; for( Object o : group.getTuple() ) { if( o != null ) allAreNull = false; } if( !nullsAreOK && allAreNull ) throw new RuntimeException( "group tuple value is null" ); Iterator<TupleEntry> iterator = bufferCall.getArgumentsIterator(); while( iterator.hasNext() ) bufferCall.getOutputCollector().add( iterator.next() ); } }
public static void validateLength( TupleEntryIterator iterator, int numTuples, int tupleSize, Pattern regex ) { int count = 0; while( iterator.hasNext() ) { TupleEntry tupleEntry = iterator.next(); if( tupleSize != -1 ) assertEquals( "wrong number of elements", tupleSize, tupleEntry.size() ); if( regex != null ) assertTrue( "regex: " + regex + " does not match: " + tupleEntry.getTuple().toString(), regex.matcher( tupleEntry.getTuple().toString() ).matches() ); count++; } try { iterator.close(); } catch( IOException exception ) { throw new RuntimeException( exception ); } assertEquals( "wrong number of lines", numTuples, count ); }
@Override public void operate( FlowProcess flowProcess, FunctionCall<Context> functionCall ) { TupleEntry arguments = functionCall.getArguments(); Node fromNode = (Node) arguments.getObject( 0, getCoercibleType() ); if( arguments.size() > ( isInto() ? 2 : 1 ) ) resetTransforms( arguments, functionCall.getContext().fields ); Node resultNode = getResultNode( functionCall ); copier.copy( fromNode, resultNode ); Context context = functionCall.getContext(); context.result.set( 0, resultNode ); functionCall.getOutputCollector().add( context.result ); }
@Test public void testCoerceIterable() { final SimpleDateFormat dateFormat = new SimpleDateFormat( "dd/MMM/yyyy:HH:mm:ss:SSS Z" ); CoercibleType coercible = new DateType( "dd/MMM/yyyy:HH:mm:ss:SSS Z", TimeZone.getDefault() ); Date date = new Date(); String stringDate = dateFormat.format( date ); Tuple tuple = Tuple.size( 4 ); Fields fields = Fields.size( 4 ).applyTypes( coercible, coercible, coercible, String.class ); TupleEntry results = new TupleEntry( fields, tuple ); results.setObject( 0, date ); results.setLong( 1, date.getTime() ); results.setString( 2, stringDate ); results.setString( 3, stringDate ); Iterable<String> iterable = results.asIterableOf( String.class ); int count = 0; for( String s : iterable ) { assertEquals( stringDate, s ); count++; } assertEquals( count, results.size() ); }
public void operate( FlowProcess flowProcess, BufferCall<TupleEntryCollector> bufferCall ) { if( bufferCall.getJoinerClosure() != null ) throw new IllegalStateException( "joiner closure should be null" ); if( insertHeader ) bufferCall.getOutputCollector().add( new Tuple( value ) ); Iterator<TupleEntry> iterator = bufferCall.getArgumentsIterator(); while( iterator.hasNext() ) { TupleEntry arguments = iterator.next(); // must be called if( expectedSize != -1 && arguments.size() != expectedSize ) throw new RuntimeException( "arguments wrong size" ); if( path != null ) bufferCall.getContext().add( arguments ); if( value != null ) bufferCall.getOutputCollector().add( new Tuple( value ) ); else bufferCall.getOutputCollector().add( arguments ); // copy } if( insertFooter ) bufferCall.getOutputCollector().add( new Tuple( value ) ); iterator.hasNext(); // regression }